Mempool visibility - 2025-12-05

Show all code

Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet.

Methodology: A transaction is counted as "seen in mempool" only if it was observed by our sentries before the slot start time of the block that included it. This corrects for transactions that appear in the mempool after block propagation.

Show code

display_sql("mempool_availability", target_date)

View query

WITH first_seen AS (
    SELECT
        hash,
        min(event_date_time) AS first_event_time
    FROM mempool_transaction
    WHERE meta_network_name = 'mainnet'
      AND event_date_time >= '2025-12-05'::date - INTERVAL 1 DAY
      AND event_date_time < '2025-12-05'::date + INTERVAL 2 DAY
    GROUP BY hash
)
SELECT
    c.slot,
    c.slot_start_date_time,
    c.type AS tx_type,
    count() AS total_txs,
    -- Seen BEFORE slot start (public, available for inclusion)
    countIf(
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS seen_before_slot,
    -- Seen AFTER slot start (appeared after block propagation)
    countIf(
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS seen_after_slot,
    -- Age percentiles for transactions seen BEFORE (how long in mempool)
    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
        dateDiff('millisecond', m.first_event_time, c.slot_start_date_time), 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time
    ) AS age_percentiles_ms,
    -- Delay percentiles for transactions seen AFTER (propagation delay)
    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
        dateDiff('millisecond', c.slot_start_date_time, m.first_event_time), 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time
    ) AS delay_percentiles_ms,
    -- Age histogram (log2 buckets in seconds)
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 500 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_0,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 500 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 1000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_1,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 1000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 2000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_2,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 2000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 4000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_3,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 4000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 8000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_4,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 8000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 16000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_5,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 16000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 32000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_6,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 32000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 64000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_7,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 64000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 128000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_8,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 128000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 256000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_9,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 256000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 512000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_10,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 512000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 1024000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_11,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 1024000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 2048000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_12,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 2048000 AND dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) < 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_13,
    countIf(dateDiff('millisecond', m.first_event_time, c.slot_start_date_time) >= 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time < c.slot_start_date_time) AS age_hist_14,
    -- Delay histogram (log2 buckets in seconds)
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 500 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_0,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 500 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 1000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_1,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 1000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 2000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_2,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 2000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 4000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_3,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 4000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 8000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_4,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 8000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 16000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_5,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 16000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 32000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_6,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 32000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 64000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_7,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 64000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 128000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_8,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 128000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 256000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_9,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 256000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 512000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_10,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 512000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 1024000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_11,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 1024000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 2048000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_12,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 2048000 AND dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) < 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_13,
    countIf(dateDiff('millisecond', c.slot_start_date_time, m.first_event_time) >= 3600000 AND 
        m.first_event_time IS NOT NULL
        AND m.first_event_time > '2020-01-01'
        AND m.first_event_time >= c.slot_start_date_time) AS delay_hist_14
FROM canonical_beacon_block_execution_transaction c
GLOBAL LEFT JOIN first_seen m ON c.hash = m.hash
WHERE c.meta_network_name = 'mainnet'
  AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
GROUP BY c.slot, c.slot_start_date_time, c.type
ORDER BY c.slot, c.type

Show code

df = load_parquet("mempool_availability", target_date)
df["tx_type_label"] = df["tx_type"].map(TX_TYPE_LABELS)
df["coverage_pct"] = df["seen_before_slot"] / df["total_txs"] * 100

# Calculate never seen (truly private)
df["never_seen"] = df["total_txs"] - df["seen_before_slot"] - df["seen_after_slot"]

# Extract p50 age from percentiles array (index 0)
df["p50_age_ms"] = df["age_percentiles_ms"].apply(lambda x: x[0] if x is not None and len(x) > 0 else np.nan)
df["p50_age_s"] = df["p50_age_ms"] / 1000

# Add hour column for time-series aggregation
df["hour"] = df["slot_start_date_time"].dt.floor("h")

total = df["total_txs"].sum()
before = df["seen_before_slot"].sum()
after = df["seen_after_slot"].sum()
never = total - before - after

print(f"Loaded {len(df):,} slot/type rows")
print(f"Slots: {df['slot'].nunique():,}")
print(f"Total transactions: {total:,}")
print(f"  Seen before slot: {before:,} ({100*before/total:.1f}%)")
print(f"  Seen after slot:  {after:,} ({100*after/total:.1f}%)")
print(f"  Never seen:       {never:,} ({100*never/total:.1f}%)")

Loaded 26,913 slot/type rows
Slots: 7,071
Total transactions: 1,708,608
  Seen before slot: 305,794 (17.9%)
  Seen after slot:  64,253 (3.8%)
  Never seen:       1,338,561 (78.3%)

Coverage by transaction type¶

Percentage of transactions seen in the public mempool before the slot they were included in. Low coverage indicates private or MEV transactions that bypass the public mempool or are submitted just-in-time.

Show code

# Aggregate by type
df_summary = df.groupby(["tx_type", "tx_type_label"]).agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_summary["never_seen"] = df_summary["total_txs"] - df_summary["seen_before_slot"] - df_summary["seen_after_slot"]
df_summary["before_pct"] = df_summary["seen_before_slot"] / df_summary["total_txs"] * 100
df_summary["after_pct"] = df_summary["seen_after_slot"] / df_summary["total_txs"] * 100
df_summary["never_pct"] = df_summary["never_seen"] / df_summary["total_txs"] * 100

# Display summary table
summary_display = df_summary[["tx_type_label", "total_txs", "before_pct", "after_pct", "never_pct"]].copy()
summary_display.columns = ["Type", "Total", "Before slot %", "After slot %", "Never seen %"]
for col in summary_display.columns[2:]:
    summary_display[col] = summary_display[col].round(1)
summary_display

	Type	Total	Before slot %	After slot %	Never seen %
0	Legacy	298755	23.7	3.9	72.4
1	Access list	2673	9.0	1.6	89.5
2	EIP-1559	1381897	17.0	3.8	79.2
3	Blob	14143	0.0	0.0	100.0
4	EIP-7702	11140	0.0	0.0	100.0

Show code

# Coverage stacked bar chart showing before/after/never breakdown
fig = go.Figure()

fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["before_pct"],
    name="Before slot (public)",
    marker_color="#27ae60",
    text=df_summary["before_pct"].round(1),
    textposition="inside",
))
fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["after_pct"],
    name="After slot (propagated)",
    marker_color="#3498db",
    text=df_summary["after_pct"].round(1),
    textposition="inside",
))
fig.add_trace(go.Bar(
    x=df_summary["tx_type_label"],
    y=df_summary["never_pct"],
    name="Never seen (private)",
    marker_color="#95a5a6",
    text=df_summary["never_pct"].round(1),
    textposition="inside",
))

fig.update_traces(texttemplate="%{text:.1f}%")
fig.update_layout(
    barmode="stack",
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Transaction type"),
    yaxis=dict(title="Percentage", range=[0, 105]),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Hourly coverage trends¶

Mempool visibility percentage over time for each transaction type.

Show code

# Aggregate to hourly for time-series
df_hourly = df.groupby(["hour", "tx_type", "tx_type_label"]).agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_hourly["coverage_pct"] = df_hourly["seen_before_slot"] / df_hourly["total_txs"] * 100

fig = px.line(
    df_hourly,
    x="hour",
    y="coverage_pct",
    color="tx_type_label",
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    labels={"hour": "Time", "coverage_pct": "Seen before slot (%)", "tx_type_label": "Type"},
    markers=True,
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Transaction volume over time¶

Hourly transaction counts split by public (seen in mempool) vs private (not seen). The private portion represents MEV bundles and other transactions submitted directly to builders.

Show code

# Aggregate across types by hour - 3-way breakdown
df_volume = df.groupby("hour").agg({
    "total_txs": "sum",
    "seen_before_slot": "sum",
    "seen_after_slot": "sum",
}).reset_index()
df_volume["never_seen"] = df_volume["total_txs"] - df_volume["seen_before_slot"] - df_volume["seen_after_slot"]

fig = go.Figure()
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["seen_before_slot"],
    name="Before slot (public)",
    marker_color="#27ae60",
))
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["seen_after_slot"],
    name="After slot (propagated)",
    marker_color="#3498db",
))
fig.add_trace(go.Bar(
    x=df_volume["hour"],
    y=df_volume["never_seen"],
    name="Never seen (private)",
    marker_color="#95a5a6",
))
fig.update_layout(
    barmode="stack",
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Time"),
    yaxis=dict(title="Transaction count"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Coverage heatmap¶

Heatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool).

Show code

# Pivot for heatmap using hourly aggregated data
df_pivot = df_hourly.pivot(index="tx_type_label", columns="hour", values="coverage_pct").fillna(0)

fig = go.Figure(
    data=go.Heatmap(
        z=df_pivot.values,
        x=df_pivot.columns,
        y=df_pivot.index,
        colorscale="Greens",
        colorbar=dict(title=dict(text="Coverage %", side="right")),
    )
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Time"),
    yaxis=dict(title="Transaction type"),
    height=300,
)
fig.show(config={"responsive": True})

Mempool age distribution¶

How long transactions waited in the mempool before being included in a block. The age is measured from first observation in Xatu Sentries to the slot start time. Only transactions seen before their inclusion slot are counted.

Show code

# Extract all percentiles for each type
def extract_percentiles(group):
    # Collect all non-null percentile arrays, weighted by seen_before_slot count
    pct_arrays = []
    for _, row in group.iterrows():
        if row['seen_before_slot'] > 0 and row['age_percentiles_ms'] is not None:
            pcts = row['age_percentiles_ms']
            if not any(np.isnan(pcts)):
                pct_arrays.append(pcts)
    
    if not pct_arrays:
        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
    
    # Average percentiles across slots (simple mean for now)
    avg_pcts = np.nanmean(pct_arrays, axis=0)
    return pd.Series({
        'p50': avg_pcts[0] / 1000,
        'p75': avg_pcts[1] / 1000,
        'p80': avg_pcts[2] / 1000,
        'p85': avg_pcts[3] / 1000,
        'p90': avg_pcts[4] / 1000,
        'p95': avg_pcts[5] / 1000,
        'p99': avg_pcts[6] / 1000,
    })

df_age = df.groupby(['tx_type', 'tx_type_label']).apply(extract_percentiles, include_groups=False).reset_index()

# Display age table
age_display = df_age[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
age_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in age_display.columns[1:]:
    age_display[col] = age_display[col].round(1)
age_display

	Type	p50 (s)	p75 (s)	p90 (s)	p95 (s)	p99 (s)
0	Legacy	228.1	350.9	442.0	487.0	534.7
1	Access list	15.4	21.9	29.3	39.2	47.5
2	EIP-1559	76.3	147.4	269.1	372.9	614.7
3	Blob	NaN	NaN	NaN	NaN	NaN
4	EIP-7702	NaN	NaN	NaN	NaN	NaN

Show code

# Visualize age percentiles as line chart
df_age_long = df_age.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
    var_name='percentile',
    value_name='age_s'
)
# Convert percentile labels to numeric for x-axis
df_age_long['pct_num'] = df_age_long['percentile'].str.replace('p', '').astype(int)

fig = px.line(
    df_age_long,
    x='pct_num',
    y='age_s',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    markers=True,
    log_y=True,
    labels={'pct_num': 'Percentile', 'age_s': 'Age (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Show code

# Aggregate histogram buckets across all slots per tx type
hist_cols = [f'age_hist_{i}' for i in range(15)]
df_hist = df.groupby(['tx_type', 'tx_type_label'])[hist_cols].sum().reset_index()

# Melt to long format for plotting
df_hist_long = df_hist.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=hist_cols,
    var_name='bucket',
    value_name='count'
)
df_hist_long['bucket_idx'] = df_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_hist_long['bucket_label'] = df_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))

# Sort by bucket index for proper ordering
df_hist_long = df_hist_long.sort_values(['tx_type', 'bucket_idx'])

fig = px.bar(
    df_hist_long,
    x='bucket_label',
    y='count',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    facet_col='tx_type_label',
    facet_col_wrap=2,
    labels={'bucket_label': 'Age bucket', 'count': 'Count', 'tx_type_label': 'Type'},
    category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
    margin=dict(l=60, r=30, t=60, b=100),
    showlegend=False,
    height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})

Propagation delay (seen after slot)¶

For transactions first seen in the mempool after block inclusion, this measures how long after the slot start they first appeared.

Show code

# Extract delay percentiles for transactions seen AFTER slot start
def extract_delay_percentiles(group):
    pct_arrays = []
    for _, row in group.iterrows():
        if row['seen_after_slot'] > 0 and row['delay_percentiles_ms'] is not None:
            pcts = row['delay_percentiles_ms']
            if not any(np.isnan(pcts)):
                pct_arrays.append(pcts)
    
    if not pct_arrays:
        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
    
    avg_pcts = np.nanmean(pct_arrays, axis=0)
    return pd.Series({
        'p50': avg_pcts[0] / 1000,
        'p75': avg_pcts[1] / 1000,
        'p80': avg_pcts[2] / 1000,
        'p85': avg_pcts[3] / 1000,
        'p90': avg_pcts[4] / 1000,
        'p95': avg_pcts[5] / 1000,
        'p99': avg_pcts[6] / 1000,
    })

df_delay = df.groupby(['tx_type', 'tx_type_label']).apply(extract_delay_percentiles, include_groups=False).reset_index()

# Display delay table
delay_display = df_delay[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
delay_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in delay_display.columns[1:]:
    delay_display[col] = delay_display[col].round(2)
delay_display

	Type	p50 (s)	p75 (s)	p90 (s)	p95 (s)	p99 (s)
0	Legacy	5811.47	7115.60	8506.48	9133.48	9712.15
1	Access list	57.88	58.05	58.16	58.19	58.22
2	EIP-1559	5829.89	9850.04	15750.68	19666.82	24210.98
3	Blob	NaN	NaN	NaN	NaN	NaN
4	EIP-7702	NaN	NaN	NaN	NaN	NaN

Show code

# Visualize delay percentiles as line chart
df_delay_long = df_delay.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
    var_name='percentile',
    value_name='delay_s'
)
# Convert percentile labels to numeric for x-axis
df_delay_long['pct_num'] = df_delay_long['percentile'].str.replace('p', '').astype(int)

fig = px.line(
    df_delay_long,
    x='pct_num',
    y='delay_s',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    markers=True,
    log_y=True,
    labels={'pct_num': 'Percentile', 'delay_s': 'Delay (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    height=400,
)
fig.show(config={"responsive": True})

Show code

# Aggregate delay histogram buckets across all slots per tx type
delay_hist_cols = [f'delay_hist_{i}' for i in range(15)]
df_delay_hist = df.groupby(['tx_type', 'tx_type_label'])[delay_hist_cols].sum().reset_index()

# Melt to long format for plotting
df_delay_hist_long = df_delay_hist.melt(
    id_vars=['tx_type', 'tx_type_label'],
    value_vars=delay_hist_cols,
    var_name='bucket',
    value_name='count'
)
df_delay_hist_long['bucket_idx'] = df_delay_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_delay_hist_long['bucket_label'] = df_delay_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))

# Sort by bucket index for proper ordering
df_delay_hist_long = df_delay_hist_long.sort_values(['tx_type', 'bucket_idx'])

fig = px.bar(
    df_delay_hist_long,
    x='bucket_label',
    y='count',
    color='tx_type_label',
    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
    facet_col='tx_type_label',
    facet_col_wrap=2,
    labels={'bucket_label': 'Delay bucket', 'count': 'Count', 'tx_type_label': 'Type'},
    category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
    margin=dict(l=60, r=30, t=60, b=100),
    showlegend=False,
    height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})

Sentry coverage¶

How much of the set of transaction in the canonical chain each mempool observer (Sentry) captured. The delta represents the difference in visibility between observation points.

Show code

display_sql("sentry_coverage", target_date)

View query

WITH canonical_hashes AS (
    SELECT DISTINCT hash
    FROM canonical_beacon_block_execution_transaction
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
),
total_canonical AS (
    SELECT count() AS total FROM canonical_hashes
)
SELECT
    meta_client_name AS sentry,
    count(DISTINCT hash) AS txs_seen,
    round(count(DISTINCT hash) * 100.0 / (SELECT total FROM total_canonical), 2) AS coverage_pct
FROM mempool_transaction
WHERE meta_network_name = 'mainnet'
  AND event_date_time >= '2025-12-05'::date - INTERVAL 1 HOUR
  AND event_date_time < '2025-12-05'::date + INTERVAL 1 DAY
  AND hash GLOBAL IN (SELECT hash FROM canonical_hashes)
GROUP BY meta_client_name
ORDER BY txs_seen DESC

Show code

df_sentry = load_parquet("sentry_coverage", target_date)

# Shorten sentry names for display
df_sentry["sentry_short"] = df_sentry["sentry"].str.replace("ethpandaops/mainnet/", "")

fig = px.bar(
    df_sentry.head(15),
    x="coverage_pct",
    y="sentry_short",
    orientation="h",
    labels={"coverage_pct": "Coverage (%)", "sentry_short": "Sentry"},
    text="coverage_pct",
)
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside")
fig.update_layout(
    margin=dict(l=250, r=60, t=30, b=60),
    xaxis=dict(range=[0, 105]),
    yaxis=dict(autorange="reversed"),
    height=500,
)
fig.show(config={"responsive": True})