Fri, Dec 5, 2025

Mempool visibility - 2025-12-05

Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet.

Methodology: A transaction is counted as "seen in mempool" only if it was observed by our sentries before the slot start time of the block that included it. This corrects for transactions that appear in the mempool after block propagation.

Show code
display_sql("mempool_availability", target_date)
View query
WITH first_seen AS (
    SELECT
        hash,
        min(event_date_time) AS first_event_time
    FROM mempool_transaction
    WHERE meta_network_name = 'mainnet'
      AND event_date_time >= '2025-12-05'::date - INTERVAL 1 DAY
      AND event_date_time < '2025-12-05'::date + INTERVAL 2 DAY
    GROUP BY hash
)
SELECT
    c.slot,
    c.slot_start_date_time,
    c.type AS tx_type,
    count() AS total_txs,
    -- Seen BEFORE slot start (public, available for inclusion)
    countIf(
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS seen_before_slot,
    -- Seen AFTER slot start (appeared after block propagation)
    countIf(
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS seen_after_slot,
    -- Age percentiles for transactions seen BEFORE (how long in mempool)
    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
        dateDiff('millisecond', m.first_event_time, c.slot_start_date_time), 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time
    ) AS age_percentiles_ms,
    -- Delay percentiles for transactions seen AFTER (propagation delay)
    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
        dateDiff('millisecond', c.slot_start_date_time, m.first_event_time), 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time
    ) AS delay_percentiles_ms,
    -- Age histogram (log2 buckets in seconds)
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 500 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_0,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 500 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 1000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_1,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 1000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 2000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_2,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 2000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 4000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_3,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 4000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 8000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_4,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 8000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 16000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_5,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 16000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 32000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_6,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 32000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 64000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_7,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 64000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 128000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_8,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 128000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 256000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_9,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 256000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 512000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_10,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 512000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 1024000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_11,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 1024000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 2048000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_12,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 2048000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_13,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_14,
    -- Delay histogram (log2 buckets in seconds)
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 500 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_0,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 500 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 1000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_1,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 1000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 2000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_2,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 2000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 4000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_3,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 4000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 8000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_4,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 8000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 16000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_5,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 16000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 32000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_6,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 32000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 64000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_7,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 64000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 128000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_8,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 128000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 256000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_9,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 256000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 512000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_10,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 512000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 1024000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_11,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 1024000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 2048000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_12,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 2048000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_13,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_14
FROM canonical_beacon_block_execution_transaction c
GLOBAL LEFT JOIN first_seen m ON c.hash = m.hash
WHERE c.meta_network_name = 'mainnet'
  AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
GROUP BY c.slot, c.slot_start_date_time, c.type
ORDER BY c.slot, c.type
Show code
df = load_parquet("mempool_availability", target_date)
df["tx_type_label"] = df["tx_type"].map(TX_TYPE_LABELS)
df["coverage_pct"] = df["seen_before_slot"] / df["total_txs"] * 100

# Calculate never seen (truly private)
df["never_seen"] = df["total_txs"] - df["seen_before_slot"] - df["seen_after_slot"]

# Extract p50 age from percentiles array (index 0)
df["p50_age_ms"] = df["age_percentiles_ms"].apply(lambda x: x[0] if x is not None and len(x) > 0 else np.nan)
df["p50_age_s"] = df["p50_age_ms"] / 1000

# Add hour column for time-series aggregation
df["hour"] = df["slot_start_date_time"].dt.floor("h")

total = df["total_txs"].sum()
before = df["seen_before_slot"].sum()
after = df["seen_after_slot"].sum()
never = total - before - after

print(f"Loaded {len(df):,} slot/type rows")
print(f"Slots: {df['slot'].nunique():,}")
print(f"Total transactions: {total:,}")
print(f"  Seen before slot: {before:,} ({100*before/total:.1f}%)")
print(f"  Seen after slot:  {after:,} ({100*after/total:.1f}%)")
print(f"  Never seen:       {never:,} ({100*never/total:.1f}%)")
Loaded 26,913 slot/type rows
Slots: 7,071
Total transactions: 1,708,608
  Seen before slot: 305,794 (17.9%)
  Seen after slot:  64,253 (3.8%)
  Never seen:       1,338,561 (78.3%)

Coverage by transaction typeΒΆ

Percentage of transactions seen in the public mempool before the slot they were included in. Low coverage indicates private or MEV transactions that bypass the public mempool or are submitted just-in-time.

Show code
# Aggregate by type
df_summary = df.groupby(["tx_type", "tx_type_label"]).agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_summary["never_seen"] = df_summary["total_txs"] - df_summary["seen_before_slot"] - df_summary["seen_after_slot"]
df_summary["before_pct"] = df_summary["seen_before_slot"] / df_summary["total_txs"] * 100
df_summary["after_pct"] = df_summary["seen_after_slot"] / df_summary["total_txs"] * 100
df_summary["never_pct"] = df_summary["never_seen"] / df_summary["total_txs"] * 100

# Display summary table
summary_display = df_summary[["tx_type_label", "total_txs", "before_pct", "after_pct", "never_pct"]].copy()
summary_display.columns = ["Type", "Total", "Before slot %", "After slot %", "Never seen %"]
for col in summary_display.columns[2:]:
    summary_display[col] = summary_display[col].round(1)
summary_display
Type Total Before slot % After slot % Never seen %
0 Legacy 298755 23.7 3.9 72.4
1 Access list 2673 9.0 1.6 89.5
2 EIP-1559 1381897 17.0 3.8 79.2
3 Blob 14143 0.0 0.0 100.0
4 EIP-7702 11140 0.0 0.0 100.0
Show code
# Coverage stacked bar chart showing before/after/never breakdown
fig = go.Figure()

fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["before_pct"],
    name="Before slot (public)",
    marker_color="#27ae60",
    text=df_summary["before_pct"].round(1),
    textposition="inside",
))
fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["after_pct"],
    name="After slot (propagated)",
    marker_color="#3498db",
    text=df_summary["after_pct"].round(1),
    textposition="inside",
))
fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["never_pct"],
    name="Never seen (private)",
    marker_color="#95a5a6",
    text=df_summary["never_pct"].round(1),
    textposition="inside",
))

fig.update_traces(texttemplate="%{text:.1f}%")
fig.update_layout(
    barmode="stack",
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Transaction type"),
    yaxis=dict(title="Percentage", range=[0, 105]),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Mempool visibility percentage over time for each transaction type. Blob transactions (type 3) typically have the highest visibility since they propagate through the public network.

Show code
# Aggregate to hourly for time-series
df_hourly = df.groupby(["hour", "tx_type", "tx_type_label"]).agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_hourly["coverage_pct"] = df_hourly["seen_before_slot"] / df_hourly["total_txs"] * 100

fig = px.line(
    df_hourly,
    x="hour",
    y="coverage_pct",
    color="tx_type_label",
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    labels={"hour": "Time", "coverage_pct": "Seen before slot (%)", "tx_type_label": "Type"},
    markers=True,
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Transaction volume over timeΒΆ

Hourly transaction counts split by public (seen in mempool) vs private (not seen). The private portion represents MEV bundles and other transactions submitted directly to builders.

Show code
# Aggregate across types by hour - 3-way breakdown
df_volume = df.groupby("hour").agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_volume["never_seen"] = df_volume["total_txs"] - df_volume["seen_before_slot"] - df_volume["seen_after_slot"]

fig = go.Figure()
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["seen_before_slot"],
    name="Before slot (public)",
    marker_color="#27ae60",
))
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["seen_after_slot"],
    name="After slot (propagated)",
    marker_color="#3498db",
))
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["never_seen"],
    name="Never seen (private)",
    marker_color="#95a5a6",
))
fig.update_layout(
    barmode="stack",
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Time"),
    yaxis=dict(title="Transaction count"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Coverage heatmapΒΆ

Heatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool).

Show code
# Pivot for heatmap using hourly aggregated data
df_pivot = df_hourly.pivot(index="tx_type_label", columns="hour", values="coverage_pct").fillna(0)

fig = go.Figure(
    data=go.Heatmap(
        z=df_pivot.values,
        x=df_pivot.columns,
        y=df_pivot.index,
        colorscale="Greens",
        colorbar=dict(title=dict(text="Coverage %", side="right")),
    )
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Time"),
    yaxis=dict(title="Transaction type"),
    height=300,
)
fig.show(config={"responsive": True})

Mempool age distributionΒΆ

How long transactions waited in the mempool before being included in a block. The age is measured from first observation in our sentries to the slot start time. Only transactions seen before their inclusion slot are counted.

Show code
# Extract all percentiles for each type
def extract_percentiles(group):
    # Collect all non-null percentile arrays, weighted by seen_before_slot count
    pct_arrays = []
    for _, row in group.iterrows():
        if row['seen_before_slot'] > 0 and row['age_percentiles_ms'] is not None:
            pcts = row['age_percentiles_ms']
            if not any(np.isnan(pcts)):
                pct_arrays.append(pcts)
    
    if not pct_arrays:
        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
    
    # Average percentiles across slots (simple mean for now)
    avg_pcts = np.nanmean(pct_arrays, axis=0)
    return pd.Series({
        'p50': avg_pcts[0] / 1000,
        'p75': avg_pcts[1] / 1000,
        'p80': avg_pcts[2] / 1000,
        'p85': avg_pcts[3] / 1000,
        'p90': avg_pcts[4] / 1000,
        'p95': avg_pcts[5] / 1000,
        'p99': avg_pcts[6] / 1000,
    })

df_age = df.groupby(['tx_type', 'tx_type_label']).apply(extract_percentiles, include_groups=False).reset_index()

# Display age table
age_display = df_age[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
age_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in age_display.columns[1:]:
    age_display[col] = age_display[col].round(1)
age_display
Type p50 (s) p75 (s) p90 (s) p95 (s) p99 (s)
0 Legacy 228.1 350.9 442.0 487.0 534.7
1 Access list 15.4 21.9 29.3 39.2 47.5
2 EIP-1559 76.3 147.4 269.1 372.9 614.7
3 Blob NaN NaN NaN NaN NaN
4 EIP-7702 NaN NaN NaN NaN NaN
Show code
# Visualize age percentiles as line chart
df_age_long = df_age.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
    var_name='percentile',
    value_name='age_s'
)
# Convert percentile labels to numeric for x-axis
df_age_long['pct_num'] = df_age_long['percentile'].str.replace('p', '').astype(int)

fig = px.line(
    df_age_long,
    x='pct_num',
    y='age_s',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    markers=True,
    log_y=True,
    labels={'pct_num': 'Percentile', 'age_s': 'Age (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})
Show code
# Aggregate histogram buckets across all slots per tx type
hist_cols = [f'age_hist_{i}' for i in range(15)]
df_hist = df.groupby(['tx_type', 'tx_type_label'])[hist_cols].sum().reset_index()

# Melt to long format for plotting
df_hist_long = df_hist.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=hist_cols,
    var_name='bucket',
    value_name='count'
)
df_hist_long['bucket_idx'] = df_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_hist_long['bucket_label'] = df_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))

# Sort by bucket index for proper ordering
df_hist_long = df_hist_long.sort_values(['tx_type', 'bucket_idx'])

fig = px.bar(
    df_hist_long,
    x='bucket_label',
    y='count',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    facet_col='tx_type_label',
    facet_col_wrap=2,
    labels={'bucket_label': 'Age bucket', 'count': 'Count', 'tx_type_label': 'Type'},
    category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
    margin=dict(l=60, r=30, t=60, b=100),
    showlegend=False,
    height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})

Propagation delay (seen after slot)ΒΆ

For transactions first seen in the mempool after block inclusion, this measures how long after the slot start they appeared.

Show code
# Extract delay percentiles for transactions seen AFTER slot start
def extract_delay_percentiles(group):
    pct_arrays = []
    for _, row in group.iterrows():
        if row['seen_after_slot'] > 0 and row['delay_percentiles_ms'] is not None:
            pcts = row['delay_percentiles_ms']
            if not any(np.isnan(pcts)):
                pct_arrays.append(pcts)
    
    if not pct_arrays:
        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
    
    avg_pcts = np.nanmean(pct_arrays, axis=0)
    return pd.Series({
        'p50': avg_pcts[0] / 1000,
        'p75': avg_pcts[1] / 1000,
        'p80': avg_pcts[2] / 1000,
        'p85': avg_pcts[3] / 1000,
        'p90': avg_pcts[4] / 1000,
        'p95': avg_pcts[5] / 1000,
        'p99': avg_pcts[6] / 1000,
    })

df_delay = df.groupby(['tx_type', 'tx_type_label']).apply(extract_delay_percentiles, include_groups=False).reset_index()

# Display delay table
delay_display = df_delay[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
delay_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in delay_display.columns[1:]:
    delay_display[col] = delay_display[col].round(2)
delay_display
Type p50 (s) p75 (s) p90 (s) p95 (s) p99 (s)
0 Legacy 5811.47 7115.60 8506.48 9133.48 9712.15
1 Access list 57.88 58.05 58.16 58.19 58.22
2 EIP-1559 5829.89 9850.04 15750.68 19666.82 24210.98
3 Blob NaN NaN NaN NaN NaN
4 EIP-7702 NaN NaN NaN NaN NaN
Show code
# Visualize delay percentiles as line chart
df_delay_long = df_delay.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
    var_name='percentile',
    value_name='delay_s'
)
# Convert percentile labels to numeric for x-axis
df_delay_long['pct_num'] = df_delay_long['percentile'].str.replace('p', '').astype(int)

fig = px.line(
    df_delay_long,
    x='pct_num',
    y='delay_s',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    markers=True,
    log_y=True,
    labels={'pct_num': 'Percentile', 'delay_s': 'Delay (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})
Show code
# Aggregate delay histogram buckets across all slots per tx type
delay_hist_cols = [f'delay_hist_{i}' for i in range(15)]
df_delay_hist = df.groupby(['tx_type', 'tx_type_label'])[delay_hist_cols].sum().reset_index()

# Melt to long format for plotting
df_delay_hist_long = df_delay_hist.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=delay_hist_cols,
    var_name='bucket',
    value_name='count'
)
df_delay_hist_long['bucket_idx'] = df_delay_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_delay_hist_long['bucket_label'] = df_delay_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))

# Sort by bucket index for proper ordering
df_delay_hist_long = df_delay_hist_long.sort_values(['tx_type', 'bucket_idx'])

fig = px.bar(
    df_delay_hist_long,
    x='bucket_label',
    y='count',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    facet_col='tx_type_label',
    facet_col_wrap=2,
    labels={'bucket_label': 'Delay bucket', 'count': 'Count', 'tx_type_label': 'Type'},
    category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
    margin=dict(l=60, r=30, t=60, b=100),
    showlegend=False,
    height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})

Sentry coverageΒΆ

How much of the canonical transaction set each mempool observer (sentry) captured. Higher coverage indicates better mempool visibility from that observation point.

Show code
display_sql("sentry_coverage", target_date)
View query
WITH canonical_hashes AS (
    SELECT DISTINCT hash
    FROM canonical_beacon_block_execution_transaction
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
),
total_canonical AS (
    SELECT count() AS total FROM canonical_hashes
)
SELECT
    meta_client_name AS sentry,
    count(DISTINCT hash) AS txs_seen,
    round(count(DISTINCT hash) * 100.0 / (SELECT total FROM total_canonical), 2) AS coverage_pct
FROM mempool_transaction
WHERE meta_network_name = 'mainnet'
  AND event_date_time >= '2025-12-05'::date - INTERVAL 1 HOUR
  AND event_date_time < '2025-12-05'::date + INTERVAL 1 DAY
  AND hash GLOBAL IN (SELECT hash FROM canonical_hashes)
GROUP BY meta_client_name
ORDER BY txs_seen DESC
Show code
df_sentry = load_parquet("sentry_coverage", target_date)

# Shorten sentry names for display
df_sentry["sentry_short"] = df_sentry["sentry"].str.replace("ethpandaops/mainnet/", "")

fig = px.bar(
    df_sentry.head(15),
    x="coverage_pct",
    y="sentry_short",
    orientation="h",
    labels={"coverage_pct": "Coverage (%)", "sentry_short": "Sentry"},
    text="coverage_pct",
)
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside")
fig.update_layout(
    margin=dict(l=250, r=60, t=30, b=60),
    xaxis=dict(range=[0, 105]),
    yaxis=dict(autorange="reversed"),
    height=500,
)
fig.show(config={"responsive": True})