Mempool visibility - 2026-01-06
Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet.
Methodology: A transaction is counted as "seen in mempool" only if it was observed by our sentries before the slot start time of the block that included it. This corrects for transactions that appear in the mempool after block propagation.
Show code
display_sql("mempool_availability", target_date)
View query
Show code
df = load_parquet("mempool_availability", target_date)
df["tx_type_label"] = df["tx_type"].map(TX_TYPE_LABELS)
df["coverage_pct"] = df["seen_before_slot"] / df["total_txs"] * 100
# Calculate never seen (truly private)
df["never_seen"] = df["total_txs"] - df["seen_before_slot"] - df["seen_after_slot"]
# Extract p50 age from percentiles array (index 0)
df["p50_age_ms"] = df["age_percentiles_ms"].apply(lambda x: x[0] if x is not None and len(x) > 0 else np.nan)
df["p50_age_s"] = df["p50_age_ms"] / 1000
# Add hour column for time-series aggregation
df["hour"] = df["slot_start_date_time"].dt.floor("h")
total = df["total_txs"].sum()
before = df["seen_before_slot"].sum()
after = df["seen_after_slot"].sum()
never = total - before - after
print(f"Loaded {len(df):,} slot/type rows")
print(f"Slots: {df['slot'].nunique():,}")
print(f"Total transactions: {total:,}")
print(f" Seen before slot: {before:,} ({100*before/total:.1f}%)")
print(f" Seen after slot: {after:,} ({100*after/total:.1f}%)")
print(f" Never seen: {never:,} ({100*never/total:.1f}%)")
Coverage by transaction typeΒΆ
Percentage of transactions seen in the public mempool before the slot they were included in. Low coverage indicates private or MEV transactions that bypass the public mempool or are submitted just-in-time.
Show code
# Aggregate by type
df_summary = df.groupby(["tx_type", "tx_type_label"]).agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_summary["never_seen"] = df_summary["total_txs"] - df_summary["seen_before_slot"] - df_summary["seen_after_slot"]
df_summary["before_pct"] = df_summary["seen_before_slot"] / df_summary["total_txs"] * 100
df_summary["after_pct"] = df_summary["seen_after_slot"] / df_summary["total_txs"] * 100
df_summary["never_pct"] = df_summary["never_seen"] / df_summary["total_txs"] * 100
# Display summary table
summary_display = df_summary[["tx_type_label", "total_txs", "before_pct", "after_pct", "never_pct"]].copy()
summary_display.columns = ["Type", "Total", "Before slot %", "After slot %", "Never seen %"]
for col in summary_display.columns[2:]:
summary_display[col] = summary_display[col].round(1)
summary_display
Show code
# Coverage stacked bar chart showing before/after/never breakdown
fig = go.Figure()
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["before_pct"],
name="Before slot (public)",
marker_color="#27ae60",
text=df_summary["before_pct"].round(1),
textposition="inside",
))
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["after_pct"],
name="After slot (propagated)",
marker_color="#3498db",
text=df_summary["after_pct"].round(1),
textposition="inside",
))
fig.add_trace(go.Bar(
x=df_summary["tx_type_label"],
y=df_summary["never_pct"],
name="Never seen (private)",
marker_color="#95a5a6",
text=df_summary["never_pct"].round(1),
textposition="inside",
))
fig.update_traces(texttemplate="%{text:.1f}%")
fig.update_layout(
barmode="stack",
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Transaction type"),
yaxis=dict(title="Percentage", range=[0, 105]),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Hourly coverage trendsΒΆ
Mempool visibility percentage over time for each transaction type. Blob transactions (type 3) typically have the highest visibility since they propagate through the public network.
Show code
# Aggregate to hourly for time-series
df_hourly = df.groupby(["hour", "tx_type", "tx_type_label"]).agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_hourly["coverage_pct"] = df_hourly["seen_before_slot"] / df_hourly["total_txs"] * 100
fig = px.line(
df_hourly,
x="hour",
y="coverage_pct",
color="tx_type_label",
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
labels={"hour": "Time", "coverage_pct": "Seen before slot (%)", "tx_type_label": "Type"},
markers=True,
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Transaction volume over timeΒΆ
Hourly transaction counts split by public (seen in mempool) vs private (not seen). The private portion represents MEV bundles and other transactions submitted directly to builders.
Show code
# Aggregate across types by hour - 3-way breakdown
df_volume = df.groupby("hour").agg({
"total_txs": "sum",
"seen_before_slot": "sum",
"seen_after_slot": "sum",
}).reset_index()
df_volume["never_seen"] = df_volume["total_txs"] - df_volume["seen_before_slot"] - df_volume["seen_after_slot"]
fig = go.Figure()
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["seen_before_slot"],
name="Before slot (public)",
marker_color="#27ae60",
))
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["seen_after_slot"],
name="After slot (propagated)",
marker_color="#3498db",
))
fig.add_trace(go.Bar(
x=df_volume["hour"],
y=df_volume["never_seen"],
name="Never seen (private)",
marker_color="#95a5a6",
))
fig.update_layout(
barmode="stack",
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Time"),
yaxis=dict(title="Transaction count"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Coverage heatmapΒΆ
Heatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool).
Show code
# Pivot for heatmap using hourly aggregated data
df_pivot = df_hourly.pivot(index="tx_type_label", columns="hour", values="coverage_pct").fillna(0)
fig = go.Figure(
data=go.Heatmap(
z=df_pivot.values,
x=df_pivot.columns,
y=df_pivot.index,
colorscale="Greens",
colorbar=dict(title=dict(text="Coverage %", side="right")),
)
)
fig.update_layout(
margin=dict(l=100, r=30, t=30, b=60),
xaxis=dict(title="Time"),
yaxis=dict(title="Transaction type"),
height=300,
)
fig.show(config={"responsive": True})
Mempool age distributionΒΆ
How long transactions waited in the mempool before being included in a block. The age is measured from first observation in our sentries to the slot start time. Only transactions seen before their inclusion slot are counted.
Show code
# Extract all percentiles for each type
def extract_percentiles(group):
# Collect all non-null percentile arrays, weighted by seen_before_slot count
pct_arrays = []
for _, row in group.iterrows():
if row['seen_before_slot'] > 0 and row['age_percentiles_ms'] is not None:
pcts = row['age_percentiles_ms']
if not any(np.isnan(pcts)):
pct_arrays.append(pcts)
if not pct_arrays:
return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
# Average percentiles across slots (simple mean for now)
avg_pcts = np.nanmean(pct_arrays, axis=0)
return pd.Series({
'p50': avg_pcts[0] / 1000,
'p75': avg_pcts[1] / 1000,
'p80': avg_pcts[2] / 1000,
'p85': avg_pcts[3] / 1000,
'p90': avg_pcts[4] / 1000,
'p95': avg_pcts[5] / 1000,
'p99': avg_pcts[6] / 1000,
})
df_age = df.groupby(['tx_type', 'tx_type_label']).apply(extract_percentiles, include_groups=False).reset_index()
# Display age table
age_display = df_age[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
age_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in age_display.columns[1:]:
age_display[col] = age_display[col].round(1)
age_display
Show code
# Visualize age percentiles as line chart
df_age_long = df_age.melt(
id_vars=['tx_type', 'tx_type_label'],
value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
var_name='percentile',
value_name='age_s'
)
# Convert percentile labels to numeric for x-axis
df_age_long['pct_num'] = df_age_long['percentile'].str.replace('p', '').astype(int)
fig = px.line(
df_age_long,
x='pct_num',
y='age_s',
color='tx_type_label',
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
markers=True,
log_y=True,
labels={'pct_num': 'Percentile', 'age_s': 'Age (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Show code
# Aggregate histogram buckets across all slots per tx type
hist_cols = [f'age_hist_{i}' for i in range(15)]
df_hist = df.groupby(['tx_type', 'tx_type_label'])[hist_cols].sum().reset_index()
# Melt to long format for plotting
df_hist_long = df_hist.melt(
id_vars=['tx_type', 'tx_type_label'],
value_vars=hist_cols,
var_name='bucket',
value_name='count'
)
df_hist_long['bucket_idx'] = df_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_hist_long['bucket_label'] = df_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))
# Sort by bucket index for proper ordering
df_hist_long = df_hist_long.sort_values(['tx_type', 'bucket_idx'])
fig = px.bar(
df_hist_long,
x='bucket_label',
y='count',
color='tx_type_label',
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
facet_col='tx_type_label',
facet_col_wrap=2,
labels={'bucket_label': 'Age bucket', 'count': 'Count', 'tx_type_label': 'Type'},
category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
margin=dict(l=60, r=30, t=60, b=100),
showlegend=False,
height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})
Propagation delay (seen after slot)ΒΆ
For transactions first seen in the mempool after block inclusion, this measures how long after the slot start they appeared.
Show code
# Extract delay percentiles for transactions seen AFTER slot start
def extract_delay_percentiles(group):
pct_arrays = []
for _, row in group.iterrows():
if row['seen_after_slot'] > 0 and row['delay_percentiles_ms'] is not None:
pcts = row['delay_percentiles_ms']
if not any(np.isnan(pcts)):
pct_arrays.append(pcts)
if not pct_arrays:
return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})
avg_pcts = np.nanmean(pct_arrays, axis=0)
return pd.Series({
'p50': avg_pcts[0] / 1000,
'p75': avg_pcts[1] / 1000,
'p80': avg_pcts[2] / 1000,
'p85': avg_pcts[3] / 1000,
'p90': avg_pcts[4] / 1000,
'p95': avg_pcts[5] / 1000,
'p99': avg_pcts[6] / 1000,
})
df_delay = df.groupby(['tx_type', 'tx_type_label']).apply(extract_delay_percentiles, include_groups=False).reset_index()
# Display delay table
delay_display = df_delay[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()
delay_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']
for col in delay_display.columns[1:]:
delay_display[col] = delay_display[col].round(2)
delay_display
Show code
# Visualize delay percentiles as line chart
df_delay_long = df_delay.melt(
id_vars=['tx_type', 'tx_type_label'],
value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],
var_name='percentile',
value_name='delay_s'
)
# Convert percentile labels to numeric for x-axis
df_delay_long['pct_num'] = df_delay_long['percentile'].str.replace('p', '').astype(int)
fig = px.line(
df_delay_long,
x='pct_num',
y='delay_s',
color='tx_type_label',
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
markers=True,
log_y=True,
labels={'pct_num': 'Percentile', 'delay_s': 'Delay (seconds)', 'tx_type_label': 'Type'},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
height=400,
)
fig.show(config={"responsive": True})
Show code
# Aggregate delay histogram buckets across all slots per tx type
delay_hist_cols = [f'delay_hist_{i}' for i in range(15)]
df_delay_hist = df.groupby(['tx_type', 'tx_type_label'])[delay_hist_cols].sum().reset_index()
# Melt to long format for plotting
df_delay_hist_long = df_delay_hist.melt(
id_vars=['tx_type', 'tx_type_label'],
value_vars=delay_hist_cols,
var_name='bucket',
value_name='count'
)
df_delay_hist_long['bucket_idx'] = df_delay_hist_long['bucket'].str.extract(r'(\d+)').astype(int)
df_delay_hist_long['bucket_label'] = df_delay_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))
# Sort by bucket index for proper ordering
df_delay_hist_long = df_delay_hist_long.sort_values(['tx_type', 'bucket_idx'])
fig = px.bar(
df_delay_hist_long,
x='bucket_label',
y='count',
color='tx_type_label',
color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},
facet_col='tx_type_label',
facet_col_wrap=2,
labels={'bucket_label': 'Delay bucket', 'count': 'Count', 'tx_type_label': 'Type'},
category_orders={'bucket_label': HIST_LABELS},
)
fig.update_yaxes(matches=None, showticklabels=True)
fig.update_layout(
margin=dict(l=60, r=30, t=60, b=100),
showlegend=False,
height=600,
)
fig.update_xaxes(tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show(config={"responsive": True})
Sentry coverageΒΆ
How much of the canonical transaction set each mempool observer (sentry) captured. Higher coverage indicates better mempool visibility from that observation point.
Show code
display_sql("sentry_coverage", target_date)
View query
Show code
df_sentry = load_parquet("sentry_coverage", target_date)
# Shorten sentry names for display
df_sentry["sentry_short"] = df_sentry["sentry"].str.replace("ethpandaops/mainnet/", "")
fig = px.bar(
df_sentry.head(15),
x="coverage_pct",
y="sentry_short",
orientation="h",
labels={"coverage_pct": "Coverage (%)", "sentry_short": "Sentry"},
text="coverage_pct",
)
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside")
fig.update_layout(
margin=dict(l=250, r=60, t=30, b=60),
xaxis=dict(range=[0, 105]),
yaxis=dict(autorange="reversed"),
height=500,
)
fig.show(config={"responsive": True})