Block propagation
Analysis of block propagation timing relative to block size on the wire, with corrected timing that isolates network propagation from block building overhead.
Terminology:
- First seen (raw): Time from slot start until the first sentry observes the block. Includes block building time + network latency.
- Winning bid: Time when the MEV relay received the winning bid for the block. Marks when the block was "ready" to broadcast.
- First seen (corrected): For MEV blocks with bid timing:
first_seen - winning_bid. Isolates network propagation time. - Propagation spread: Time between when the first sentry saw the block and when the last sentry saw it.
- Wire size: Block size after Snappy compression, as transmitted over libp2p gossipsub.
Show code
# This notebook joins two data sources:
# 1. block_propagation_by_size: block sizes and propagation timing
# 2. block_production_timeline: MEV winning bid timing
display_sql("block_propagation_by_size", target_date)
View query
Show code
# Load both datasets and join on slot
df_size = load_parquet("block_propagation_by_size", target_date)
df_timeline = load_parquet("block_production_timeline", target_date)
# Join: size data + winning bid timing from timeline
df = df_size.merge(
df_timeline[["slot", "winning_bid_ms"]],
on="slot",
how="left"
)
# Add derived columns
df["spread_ms"] = df["last_seen_ms"] - df["first_seen_ms"]
df["compression_ratio"] = df["uncompressed_bytes"] / df["compressed_bytes"]
df["compressed_kib"] = df["compressed_bytes"] / 1024
df["uncompressed_kib"] = df["uncompressed_bytes"] / 1024
# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df["corrected_first_seen_ms"] = np.where(
(df["builder_type"] == "MEV") & df["winning_bid_ms"].notna(),
df["first_seen_ms"] - df["winning_bid_ms"],
df["first_seen_ms"]
)
# Size buckets for binning (in KiB)
df["size_bucket"] = pd.cut(
df["compressed_kib"],
bins=[0, 50, 100, 150, float("inf")],
labels=["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
)
SIZE_ORDER = ["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
# Builder category with 3 levels
def categorize_builder(row):
if row["builder_type"] == "Local":
return "Local"
elif pd.notna(row["winning_bid_ms"]):
return "MEV (with bid timing)"
else:
return "MEV (no bid timing)"
df["builder_category"] = df.apply(categorize_builder, axis=1)
# Category ordering and colors (excluding "MEV (no bid timing)" from plots)
CATEGORY_ORDER = ["Local", "MEV (with bid timing)"]
CATEGORY_COLORS = {
"Local": "#3498db",
"MEV (with bid timing)": "#9b59b6",
}
# Summary
print(f"Total blocks: {len(df):,}")
for cat in ["Local", "MEV (with bid timing)"]:
count = (df["builder_category"] == cat).sum()
pct = count / len(df) * 100
print(f" {cat}: {count:,} ({pct:.1f}%)")
# Info: MEV blocks without bid timing (excluded from category-based plots)
mev_no_bid = (df["builder_category"] == "MEV (no bid timing)").sum()
if mev_no_bid > 0:
print(f"\nNote: {mev_no_bid:,} MEV blocks ({mev_no_bid/len(df)*100:.1f}%) lack bid timing data and are excluded from builder category comparisons.")
Size distribution by builder type¶
Histogram comparing the block size distribution between MEV and local blocks. MEV blocks tend to be larger due to MEV extraction strategies.
Show code
fig = px.histogram(
df,
x="compressed_kib",
color="builder_type",
color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
nbins=50,
barmode="overlay",
opacity=0.7,
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Block size on wire (KiB)"),
yaxis=dict(title="Block count"),
legend_title="Builder type",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=400,
)
fig.show(config={"responsive": True})
# Summary
for bt in ["Local", "MEV"]:
subset = df[df["builder_type"] == bt]
print(f"{bt}: median size {subset['compressed_kib'].median():.1f} KiB, "
f"mean {subset['compressed_kib'].mean():.1f} KiB")
Compression ratio¶
Scatter plot showing the relationship between uncompressed SSZ block size and compressed (snappy) wire size. The dashed line shows the linear regression; the dotted line shows 1:1 (no compression).
Show code
fig = go.Figure()
# Scatter points colored by builder type
for bt, color in [("Local", "#3498db"), ("MEV", "#9b59b6")]:
subset = df[df["builder_type"] == bt]
fig.add_trace(go.Scatter(
x=subset["uncompressed_kib"],
y=subset["compressed_kib"],
mode="markers",
name=bt,
marker=dict(color=color, opacity=0.4, size=5),
hovertemplate="<b>Slot %{text}</b><br>Uncompressed: %{x:.1f} KiB<br>Compressed: %{y:.1f} KiB<extra></extra>",
text=subset["slot"],
))
# Regression line (all data)
slope, intercept, r_value, p_value, std_err = stats.linregress(
df["uncompressed_kib"], df["compressed_kib"]
)
x_range = np.array([df["uncompressed_kib"].min(), df["uncompressed_kib"].max()])
y_pred = slope * x_range + intercept
fig.add_trace(go.Scatter(
x=x_range,
y=y_pred,
mode="lines",
name=f"Regression (R\u00b2={r_value**2:.3f})",
line=dict(color="#2ecc71", width=2, dash="dash"),
))
# 1:1 reference line (no compression)
fig.add_trace(go.Scatter(
x=x_range,
y=x_range,
mode="lines",
name="1:1 (no compression)",
line=dict(color="gray", width=1, dash="dot"),
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Uncompressed block size (KiB)"),
yaxis=dict(title="Compressed block size (KiB, on wire)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
# Print compression stats
print(f"Compression ratio: mean {df['compression_ratio'].mean():.2f}x, "
f"median {df['compression_ratio'].median():.2f}x")
print(f"Regression: compressed = {slope:.3f} \u00d7 uncompressed + {intercept:.1f}")
print(f"R\u00b2 = {r_value**2:.4f}")
MEV timing breakdown¶
For MEV blocks with bid timing data, we can decompose the raw first seen time into block building time (winning bid) and network propagation time.
Winning bid timing distribution¶
Distribution of winning bid timing (ms from slot start) for MEV blocks. This shows when blocks are "ready" to broadcast.
Show code
df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"]
fig = px.histogram(
df_mev_bid,
x="winning_bid_ms",
nbins=50,
color_discrete_sequence=["#9b59b6"],
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Winning bid (ms from slot start)"),
yaxis=dict(title="Block count"),
height=400,
)
fig.show(config={"responsive": True})
print(f"Winning bid timing (n={len(df_mev_bid):,}):")
print(f" Median: {df_mev_bid['winning_bid_ms'].median():.0f}ms")
print(f" P5-P95: {df_mev_bid['winning_bid_ms'].quantile(0.05):.0f}ms - {df_mev_bid['winning_bid_ms'].quantile(0.95):.0f}ms")
Block building vs network time¶
Stacked bar showing the breakdown of first seen into block building time (winning bid) and network propagation time (corrected first seen) for MEV blocks.
Show code
df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()
# Aggregate by size bucket
breakdown = df_mev_bid.groupby("size_bucket", observed=True).agg(
building_time=("winning_bid_ms", "median"),
network_time=("corrected_first_seen_ms", "median"),
count=("slot", "count"),
).reset_index()
fig = go.Figure()
fig.add_trace(go.Bar(
y=breakdown["size_bucket"],
x=breakdown["building_time"],
name="Block building (winning bid)",
orientation="h",
marker_color="#e74c3c",
))
fig.add_trace(go.Bar(
y=breakdown["size_bucket"],
x=breakdown["network_time"],
name="Network propagation",
orientation="h",
marker_color="#2ecc71",
))
fig.update_layout(
margin=dict(l=100, r=30, t=30, b=60),
xaxis=dict(title="Time (ms, median)"),
yaxis=dict(title="Block size on wire (KiB)", categoryorder="array", categoryarray=SIZE_ORDER[::-1]),
barmode="stack",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=400,
)
fig.show(config={"responsive": True})
# Print breakdown
print("Median timing breakdown (MEV blocks with bid timing):")
for _, row in breakdown.iterrows():
total = row["building_time"] + row["network_time"]
pct_building = row["building_time"] / total * 100
print(f" {row['size_bucket']}: {row['building_time']:.0f}ms building ({pct_building:.0f}%) + "
f"{row['network_time']:.0f}ms network = {total:.0f}ms total (n={row['count']:,})")
Raw vs corrected comparison¶
Comparison of raw first seen (from slot start) vs corrected first seen (from winning bid) for MEV blocks. The corrected metric isolates network propagation time.
Show code
df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()
fig = go.Figure()
# Raw first seen
fig.add_trace(go.Box(
y=df_mev_bid["first_seen_ms"],
name="Raw (from slot start)",
marker_color="#9b59b6",
boxmean=True,
))
# Corrected first seen
fig.add_trace(go.Box(
y=df_mev_bid["corrected_first_seen_ms"],
name="Corrected (from winning bid)",
marker_color="#2ecc71",
boxmean=True,
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
yaxis=dict(title="First seen (ms)"),
showlegend=True,
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=450,
)
fig.show(config={"responsive": True})
print(f"MEV blocks with bid timing (n={len(df_mev_bid):,}):")
print(f" Raw first seen: median {df_mev_bid['first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['first_seen_ms'].quantile(0.95):.0f}ms")
print(f" Corrected: median {df_mev_bid['corrected_first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['corrected_first_seen_ms'].quantile(0.95):.0f}ms")
print(f" Winning bid: median {df_mev_bid['winning_bid_ms'].median():.0f}ms")
Corrected first seen vs block size¶
Scatter plot using corrected first seen. For MEV blocks with bid timing, this shows pure network propagation time. For Local blocks and MEV blocks without bid timing, this equals raw first seen.
Show code
# Filter to categories we want to plot and sort so Local points render last (on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))
fig = px.scatter(
df_sorted,
x="corrected_first_seen_ms",
y="compressed_kib",
color="builder_category",
category_orders={"builder_category": CATEGORY_ORDER},
color_discrete_map=CATEGORY_COLORS,
opacity=0.5,
hover_data={"slot": True, "proposer_entity": True, "first_seen_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Corrected first seen (ms)"),
yaxis=dict(title="Block size on wire (KiB)"),
legend_title="Builder category",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
Corrected first seen by size bucket¶
Box plot comparing corrected first seen between builder categories across size buckets.
Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.
Show code
# Filter to categories we want to plot
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)]
fig = px.box(
df_plot,
y="size_bucket",
x="corrected_first_seen_ms",
color="builder_category",
orientation="h",
category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_category": CATEGORY_ORDER},
color_discrete_map=CATEGORY_COLORS,
)
fig.update_layout(
margin=dict(l=100, r=30, t=30, b=60),
xaxis=dict(title="Corrected first seen (ms)"),
yaxis=dict(title="Block size on wire (KiB)"),
legend_title="Builder category",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
# Summary stats
for cat in CATEGORY_ORDER:
subset = df[df["builder_category"] == cat]
if len(subset) > 0:
print(f"{cat}: median {subset['corrected_first_seen_ms'].median():.0f}ms, "
f"P95 {subset['corrected_first_seen_ms'].quantile(0.95):.0f}ms, n={len(subset):,}")
Corrected first seen density by builder type¶
Density heatmaps showing the distribution of corrected first seen timing vs block size for Local and MEV blocks.
Show code
# Calculate axis ranges (trim outliers)
x_max = df["corrected_first_seen_ms"].quantile(0.99)
y_max = df["compressed_kib"].quantile(0.99)
# Filter to categories with enough data for meaningful heatmaps
df_heatmap = df[df["builder_category"].isin(["Local", "MEV (with bid timing)"])]
fig = px.density_heatmap(
df_heatmap,
x="corrected_first_seen_ms",
y="compressed_kib",
facet_col="builder_category",
facet_col_spacing=0.08,
category_orders={"builder_category": ["Local", "MEV (with bid timing)"]},
nbinsx=40,
nbinsy=40,
range_x=[0, x_max],
range_y=[0, y_max],
color_continuous_scale="Plasma",
)
fig.update_layout(
margin=dict(l=60, r=30, t=40, b=60),
height=450,
coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
text=a.text.replace("builder_category=", ""),
font_size=12,
))
fig.for_each_xaxis(lambda x: x.update(title="Corrected first seen (ms)"))
fig.for_each_yaxis(lambda y: y.update(title="Wire size (KiB)"))
fig.show(config={"responsive": True})
Regional propagation analysis¶
Comparison of block first-seen timing across geographic regions from two data sources:
- Sentries: EthPandaOps libp2p gossipsub monitoring (~50-100 globally distributed nodes)
- Contributoor: Community beacon API event collection (~875 nodes, primarily data centers)
Both sources capture when blocks are first observed by nodes in each region. Contributoor nodes tend to show faster times due to being primarily in well-connected data centers.
Show code
# Load regional propagation data from both sources
try:
df_region_sentries = load_parquet("block_propagation_by_region", target_date)
has_sentries = True
except FileNotFoundError:
has_sentries = False
print("Note: Sentries regional data not available")
try:
df_region_contributoor = load_parquet("block_propagation_by_region_contributoor", target_date)
has_contributoor = True
except FileNotFoundError:
has_contributoor = False
print("Note: Contributoor regional data not available")
REGION_LABELS = {"EU": "Europe", "NA": "North America", "AS": "Asia", "OC": "Oceania"}
REGION_ORDER = ["EU", "NA", "AS", "OC"]
def add_region_derived_columns(df_in):
"""Add size bucket and corrected timing columns matching the main notebook."""
df_out = df_in.copy()
# Size buckets
df_out["compressed_kib"] = df_out["compressed_bytes"] / 1024
df_out["size_bucket"] = pd.cut(
df_out["compressed_kib"],
bins=[0, 50, 100, 150, float("inf")],
labels=SIZE_ORDER
)
df_out["region_label"] = df_out["region"].map(REGION_LABELS)
# Join with timeline data to get winning bid timing
df_out = df_out.merge(
df_timeline[["slot", "winning_bid_ms"]],
on="slot",
how="left"
)
# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df_out["corrected_first_seen_ms"] = np.where(
(df_out["builder_type"] == "MEV") & df_out["winning_bid_ms"].notna(),
df_out["first_seen_ms"] - df_out["winning_bid_ms"],
df_out["first_seen_ms"]
)
return df_out
if has_sentries:
df_region_sentries = add_region_derived_columns(df_region_sentries)
if has_contributoor:
df_region_contributoor = add_region_derived_columns(df_region_contributoor)
# Print summary
for name, df_r, has_data in [
("Sentries", df_region_sentries if has_sentries else None, has_sentries),
("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
]:
if has_data:
print(f"\n{name}:")
for region in REGION_ORDER:
r = df_r[df_r["region"] == region]
print(f" {REGION_LABELS[region]}: {len(r):,} slot-regions, "
f"median corrected first seen {r['corrected_first_seen_ms'].median():.0f}ms")
Regional timing distribution (corrected)¶
Box plots showing corrected first seen timing distribution by region, comparing Sentries (libp2p) and Contributoor (beacon API) data sources. Faceted by builder type (MEV vs Local).
Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.
Show code
if has_sentries or has_contributoor:
# Combine regional data with source labels
dfs_to_concat = []
if has_sentries:
df_s = df_region_sentries.copy()
df_s["source"] = "Sentries"
dfs_to_concat.append(df_s)
if has_contributoor:
df_c = df_region_contributoor.copy()
df_c["source"] = "Contributoor"
dfs_to_concat.append(df_c)
df_regional_combined = pd.concat(dfs_to_concat, ignore_index=True)
df_regional_combined["region_label"] = df_regional_combined["region"].map(REGION_LABELS)
fig = px.box(
df_regional_combined,
x="region_label",
y="corrected_first_seen_ms",
color="source",
facet_col="builder_type",
color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
category_orders={
"region_label": [REGION_LABELS[r] for r in REGION_ORDER],
"builder_type": ["MEV", "Local"],
},
)
fig.update_layout(
margin=dict(l=60, r=30, t=40, b=60),
xaxis_title="Region",
yaxis_title="Corrected first seen (ms)",
legend_title="Data source",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=450,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.replace("builder_type=", "")))
fig.show(config={"responsive": True})
# Print summary stats
print("Corrected first seen by region (ms):")
for bt in ["MEV", "Local"]:
print(f"\n {bt}:")
for region in REGION_ORDER:
for source in ["Sentries", "Contributoor"]:
subset = df_regional_combined[
(df_regional_combined["region"] == region) &
(df_regional_combined["source"] == source) &
(df_regional_combined["builder_type"] == bt)
]["corrected_first_seen_ms"]
if len(subset) > 0:
print(f" {REGION_LABELS[region]} ({source}): "
f"P50={subset.median():.0f}ms, P95={subset.quantile(0.95):.0f}ms (n={len(subset):,})")
else:
print("No regional data available")
Regional CDF comparison by size (corrected timing)¶
Cumulative distribution functions (CDFs) showing corrected first seen timing by geographic region, faceted by block size bucket and builder type.
How to read these charts:
- X-axis: Corrected first seen time in milliseconds. For MEV blocks, this is
first_seen - winning_bid(isolating network propagation). For Local blocks, this equals raw first seen. - Y-axis: Percentile (0-100%). A point at (300ms, 50%) means 50% of blocks were seen within 300ms.
- Steeper curves = faster, more consistent propagation. The curve climbing quickly to 100% indicates tight timing.
- Right-shifted curves = slower propagation in that region.
- P50 line (horizontal dotted): Where curves cross this line shows median timing per region.
Visual encoding:
- Color = Geographic region (EU=blue, NA=green, AS=red, OC=orange)
- Line style = Data source (solid=Sentries libp2p gossipsub, dashed=Contributoor beacon API)
- Columns = Builder type (MEV vs Local)
- Rows = Block size bucket (smallest at top, largest at bottom)
What to look for:
- Do larger blocks show more spread between regions? (Size impact on propagation)
- Does Contributoor (dashed) consistently show faster times than Sentries (solid)? (Data center vs diverse node placement)
- Which regions lag behind as block size increases? (Geographic disadvantage for large blocks)
Show code
if has_sentries or has_contributoor:
from plotly.subplots import make_subplots
REGION_COLORS = {
"EU": "#3498db", # Blue
"NA": "#2ecc71", # Green
"AS": "#e74c3c", # Red
"OC": "#f39c12", # Orange
}
SOURCE_DASH = {
"Sentries": "solid",
"Contributoor": "dash",
}
BUILDER_TYPES = ["MEV", "Local"]
# Create 4x2 subplots: rows = size buckets, cols = builder types
fig = make_subplots(
rows=len(SIZE_ORDER), cols=len(BUILDER_TYPES),
subplot_titles=[f"{bt}" for bt in BUILDER_TYPES] + [""] * (len(SIZE_ORDER) - 1) * 2,
row_titles=SIZE_ORDER,
horizontal_spacing=0.06,
vertical_spacing=0.06,
)
percentiles = np.arange(0, 101, 1)
def add_cdf_traces(df_r, source_name, builder_type, size_bucket, row, col, show_legend):
subset = df_r[
(df_r["builder_type"] == builder_type) &
(df_r["size_bucket"] == size_bucket)
]
for region in REGION_ORDER:
region_data = subset[subset["region"] == region]["corrected_first_seen_ms"]
if len(region_data) >= 10: # Need enough data for meaningful CDF
values = np.percentile(region_data, percentiles)
fig.add_trace(
go.Scatter(
x=values,
y=percentiles,
mode="lines",
name=f"{REGION_LABELS[region]} ({source_name})",
line=dict(
color=REGION_COLORS[region],
width=2,
dash=SOURCE_DASH[source_name],
),
showlegend=show_legend,
legendgroup=f"{region}_{source_name}",
hovertemplate=f"{REGION_LABELS[region]} ({source_name})<br>%{{x:.0f}}ms = P%{{y}}<extra></extra>",
),
row=row, col=col,
)
# Add traces for each combination
for row_idx, size_bucket in enumerate(SIZE_ORDER, 1):
for col_idx, builder_type in enumerate(BUILDER_TYPES, 1):
# Only show legend on first subplot
show_legend = (row_idx == 1 and col_idx == 1)
if has_sentries:
add_cdf_traces(df_region_sentries, "Sentries", builder_type, size_bucket, row_idx, col_idx, show_legend)
if has_contributoor:
add_cdf_traces(df_region_contributoor, "Contributoor", builder_type, size_bucket, row_idx, col_idx, show_legend)
# Add P50 reference line
fig.add_hline(y=50, line_dash="dot", line_color="gray", line_width=1, row=row_idx, col=col_idx)
fig.update_layout(
margin=dict(l=100, r=30, t=50, b=60),
height=1100,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5,
font_size=10,
),
)
# Update axes
for col in [1, 2]:
fig.update_xaxes(title_text="Corrected first seen (ms)", row=len(SIZE_ORDER), col=col)
fig.update_yaxes(title_text="Percentile", col=1)
fig.show(config={"responsive": True})
# Print summary statistics
print("Line styles: solid = Sentries (libp2p), dashed = Contributoor (beacon API)")
print("\nP50 (median) timing by size bucket and builder type:\n")
for size_bucket in SIZE_ORDER:
print(f" {size_bucket}:")
for bt in BUILDER_TYPES:
for source, df_r, has_data in [
("Sentries", df_region_sentries if has_sentries else None, has_sentries),
("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
]:
if has_data:
subset = df_r[(df_r["builder_type"] == bt) & (df_r["size_bucket"] == size_bucket)]
if len(subset) >= 10:
medians = []
for region in REGION_ORDER:
r = subset[subset["region"] == region]["corrected_first_seen_ms"]
if len(r) > 0:
medians.append(f"{REGION_LABELS[region][:2]}:{r.median():.0f}")
if medians:
print(f" {bt} ({source}): {', '.join(medians)} ms")
else:
print("No regional data available")
First-seen "winner" by region (corrected timing)¶
For each slot, which region observed the block first using corrected timing? This shows the percentage of slots where each region was the first to see the block after accounting for block building time.
Show code
if has_sentries or has_contributoor:
def compute_winner_stats(df_r, source_name):
"""Compute which region saw each slot first using corrected timing."""
# Reset index, sort by slot and corrected first seen
df_sorted = df_r.reset_index(drop=True).sort_values(["slot", "corrected_first_seen_ms"])
# Keep first (fastest) per slot
winner_per_slot = df_sorted.drop_duplicates(subset="slot", keep="first")
# Count wins per region
region_wins = winner_per_slot["region"].value_counts()
total_slots = winner_per_slot["slot"].nunique()
rows = []
for region in REGION_ORDER:
wins = region_wins.get(region, 0)
rows.append({
"source": source_name,
"region": region,
"region_label": REGION_LABELS[region],
"win_count": wins,
"win_pct": wins / total_slots * 100 if total_slots > 0 else 0,
})
return rows
rows = []
if has_sentries:
rows.extend(compute_winner_stats(df_region_sentries, "Sentries"))
if has_contributoor:
rows.extend(compute_winner_stats(df_region_contributoor, "Contributoor"))
df_winners = pd.DataFrame(rows)
fig = px.bar(
df_winners,
x="region_label",
y="win_pct",
color="source",
barmode="group",
color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
category_orders={"region_label": [REGION_LABELS[r] for r in REGION_ORDER]},
text=df_winners["win_pct"].apply(lambda x: f"{x:.1f}%"),
)
fig.update_traces(textposition="outside")
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis_title="Region",
yaxis_title="% of slots first seen (corrected)",
legend_title="Data source",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=400,
)
fig.show(config={"responsive": True})
# Print summary
print("Region 'wins' (first to see block, corrected timing):")
for source in ["Sentries", "Contributoor"]:
subset = df_winners[df_winners["source"] == source]
if len(subset) > 0:
print(f"\n {source}:")
for _, row in subset.sort_values("win_pct", ascending=False).iterrows():
print(f" {row['region_label']}: {row['win_pct']:.1f}% ({row['win_count']:,} slots)")
else:
print("No regional data available")
Region × size interaction (corrected timing)¶
Median corrected first seen timing by region and block size bucket. Shows whether larger blocks disproportionately impact certain regions after accounting for block building time.
Show code
if has_sentries or has_contributoor:
def compute_region_size_matrix(df_r):
"""Compute median corrected first seen by region and size bucket."""
return df_r.groupby(["region", "size_bucket"], observed=True)["corrected_first_seen_ms"].median().unstack()
# Compute matrices for available sources
matrices = {}
if has_sentries:
matrices["Sentries"] = compute_region_size_matrix(df_region_sentries)
if has_contributoor:
matrices["Contributoor"] = compute_region_size_matrix(df_region_contributoor)
# Create side-by-side heatmaps
n_sources = len(matrices)
fig = make_subplots(
rows=1, cols=n_sources,
subplot_titles=list(matrices.keys()),
horizontal_spacing=0.1,
)
# Shared color scale
all_values = np.concatenate([m.values.flatten() for m in matrices.values()])
all_values = all_values[~np.isnan(all_values)]
vmin, vmax = np.percentile(all_values, [5, 95])
for i, (source, matrix) in enumerate(matrices.items(), 1):
# Reorder rows to match REGION_ORDER
matrix = matrix.reindex(REGION_ORDER)
fig.add_trace(
go.Heatmap(
z=matrix.values,
x=[str(c) for c in matrix.columns],
y=[REGION_LABELS[r] for r in matrix.index],
colorscale="Plasma",
zmin=vmin,
zmax=vmax,
text=matrix.values.round(0).astype(int),
texttemplate="%{text}",
textfont={"size": 11},
showscale=(i == n_sources),
colorbar=dict(title="ms") if i == n_sources else None,
hovertemplate="Region: %{y}<br>Size: %{x}<br>Median: %{z:.0f}ms<extra></extra>",
),
row=1, col=i,
)
fig.update_layout(
margin=dict(l=100, r=30, t=60, b=60),
height=350,
)
fig.update_xaxes(title_text="Block size on wire", row=1)
fig.update_yaxes(title_text="Region", col=1)
fig.show(config={"responsive": True})
# Print the data
print("Median corrected first seen (ms) by region and size bucket:\n")
for source, matrix in matrices.items():
print(f"{source}:")
matrix = matrix.reindex(REGION_ORDER)
for region in REGION_ORDER:
row = matrix.loc[region]
values = ", ".join([f"{row[c]:.0f}" for c in SIZE_ORDER if c in row.index and pd.notna(row[c])])
print(f" {REGION_LABELS[region]}: {values}")
print()
else:
print("No regional data available")
Propagation spread¶
Propagation spread is the time between when the first sentry saw the block and when the last sentry saw it. Larger blocks should take longer to propagate across all sentries.
Spread by size (box plot)¶
Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.
Show code
fig = px.box(
df,
y="size_bucket",
x="spread_ms",
color="builder_type",
orientation="h",
category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_type": ["MEV", "Local"]},
color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
)
fig.update_layout(
margin=dict(l=100, r=30, t=30, b=60),
xaxis=dict(title="Propagation spread (last seen - first seen, ms)"),
yaxis=dict(title="Block size on wire (KiB)"),
legend_title="Builder type",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=400,
)
fig.show(config={"responsive": True})
Spread vs size (scatter)¶
Scatter view showing individual blocks.
Show code
fig = px.scatter(
df,
x="spread_ms",
y="compressed_kib",
color="builder_type",
color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
opacity=0.5,
hover_data={"slot": True, "proposer_entity": True, "spread_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Propagation spread (ms)"),
yaxis=dict(title="Block size on wire (KiB)"),
legend_title="Builder type",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
Corrected first seen by proposer entity¶
Top proposer entities ranked by median corrected first seen timing. Diamond markers show P95 timing.
Show code
entity_stats = df.groupby("proposer_entity").agg(
block_count=("slot", "count"),
p50_ms=("corrected_first_seen_ms", "median"),
p75_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.75)),
p90_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.90)),
p95_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.95)),
mev_pct=("builder_type", lambda x: (x == "MEV").mean() * 100),
).reset_index()
# Filter to entities with 20+ blocks, get top 20 by block count
entity_stats = entity_stats[entity_stats["block_count"] >= 20]
top_by_count = entity_stats.nlargest(20, "block_count").sort_values("p50_ms")
fig = go.Figure()
# P50 bars (median)
fig.add_trace(go.Bar(
y=top_by_count["proposer_entity"],
x=top_by_count["p50_ms"],
orientation="h",
name="P50 (median)",
marker_color="#3498db",
text=top_by_count["block_count"].apply(lambda x: f"{x:,}"),
textposition="outside",
hovertemplate="<b>%{y}</b><br>P50: %{x:.0f}ms<br>Blocks: %{text}<extra></extra>",
))
# P75 markers
fig.add_trace(go.Scatter(
y=top_by_count["proposer_entity"],
x=top_by_count["p75_ms"],
mode="markers",
name="P75",
marker=dict(color="#f39c12", size=8, symbol="circle"),
hovertemplate="<b>%{y}</b><br>P75: %{x:.0f}ms<extra></extra>",
))
# P90 markers
fig.add_trace(go.Scatter(
y=top_by_count["proposer_entity"],
x=top_by_count["p90_ms"],
mode="markers",
name="P90",
marker=dict(color="#e67e22", size=8, symbol="square"),
hovertemplate="<b>%{y}</b><br>P90: %{x:.0f}ms<extra></extra>",
))
# P95 markers
fig.add_trace(go.Scatter(
y=top_by_count["proposer_entity"],
x=top_by_count["p95_ms"],
mode="markers",
name="P95",
marker=dict(color="#e74c3c", size=8, symbol="diamond"),
hovertemplate="<b>%{y}</b><br>P95: %{x:.0f}ms<extra></extra>",
))
fig.update_layout(
margin=dict(l=150, r=60, t=30, b=60),
xaxis=dict(title="Corrected first seen (ms)"),
yaxis=dict(title=""),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=600,
barmode="overlay",
)
fig.show(config={"responsive": True})
Top 10 proposer entities (density)¶
Corrected first seen vs block size density for MEV blocks, faceted by top 10 proposer entities (by block count, descending).
Show code
df_mev = df[df["builder_type"] == "MEV"].copy()
# Replace empty/null proposer entities with "(unknown)"
df_mev["proposer_entity"] = df_mev["proposer_entity"].fillna("(unknown)").replace("", "(unknown)")
# Get top 10 entities by block count (value_counts returns descending order)
entity_counts = df_mev["proposer_entity"].value_counts().head(10)
top_entities = entity_counts.index.tolist()
df_top = df_mev[df_mev["proposer_entity"].isin(top_entities)].copy()
# Create legend labels with block counts, ordered descending by count
legend_labels = {entity: f"{entity} ({count:,})" for entity, count in entity_counts.items()}
df_top["entity_label"] = df_top["proposer_entity"].map(legend_labels)
label_order = [legend_labels[e] for e in top_entities] # Descending order by count
# Density heatmap by entity (faceted)
x_max = df_top["corrected_first_seen_ms"].quantile(0.99)
y_max = df_top["compressed_kib"].quantile(0.99)
fig = px.density_heatmap(
df_top,
x="corrected_first_seen_ms",
y="compressed_kib",
facet_col="entity_label",
facet_col_wrap=5,
facet_col_spacing=0.04,
facet_row_spacing=0.08,
category_orders={"entity_label": label_order},
nbinsx=20,
nbinsy=20,
range_x=[0, x_max],
range_y=[0, y_max],
color_continuous_scale="Plasma",
)
fig.update_layout(
margin=dict(l=60, r=30, t=40, b=60),
height=500,
coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
text=a.text.replace("entity_label=", ""),
font_size=10,
))
fig.for_each_xaxis(lambda x: x.update(title=""))
fig.for_each_yaxis(lambda y: y.update(title=""))
fig.add_annotation(
text="Corrected first seen (ms)",
xref="paper", yref="paper",
x=0.5, y=-0.08,
showarrow=False,
font_size=12,
)
fig.add_annotation(
text="Wire size (KiB)",
xref="paper", yref="paper",
x=-0.04, y=0.5,
showarrow=False,
font_size=12,
textangle=-90,
)
fig.show(config={"responsive": True})
Anomaly detection¶
The following charts help identify blocks that propagated slower than expected given their size, using corrected timing.
Corrected first seen residuals¶
Residual = actual corrected first seen - expected based on block size. Positive residuals indicate blocks that were slower than expected for their size. The regression line is fit per builder category.
Show code
# Fit regression per builder category and compute residuals
df["expected_corrected_first_seen"] = np.nan
df["residual_ms"] = np.nan
for cat in CATEGORY_ORDER:
mask = df["builder_category"] == cat
subset = df[mask]
if len(subset) > 10:
slope, intercept, _, _, _ = stats.linregress(
subset["compressed_kib"], subset["corrected_first_seen_ms"]
)
df.loc[mask, "expected_corrected_first_seen"] = slope * df.loc[mask, "compressed_kib"] + intercept
df.loc[mask, "residual_ms"] = df.loc[mask, "corrected_first_seen_ms"] - df.loc[mask, "expected_corrected_first_seen"]
# Filter to categories we want to plot and sort for rendering order (Local on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))
fig = px.scatter(
df_sorted,
x="compressed_kib",
y="residual_ms",
color="builder_category",
category_orders={"builder_category": CATEGORY_ORDER},
color_discrete_map=CATEGORY_COLORS,
opacity=0.5,
hover_data={"slot": True, "proposer_entity": True, "corrected_first_seen_ms": ":.0f", "residual_ms": ":.0f"},
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", annotation_text="Expected")
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Block size on wire (KiB)"),
yaxis=dict(title="Corrected first seen residual (ms)"),
legend_title="Builder category",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
# Count outliers (considering only plotted categories)
df_filtered = df[df["builder_category"].isin(CATEGORY_ORDER)]
outlier_threshold = df_filtered["residual_ms"].quantile(0.95)
outliers = df_filtered[df_filtered["residual_ms"] > outlier_threshold]
print(f"Outlier threshold (P95): {outlier_threshold:.0f}ms")
print(f"Blocks above P95: {len(outliers):,} ({len(outliers)/len(df_filtered)*100:.1f}%)")
Slow blocks (z-score > 2)¶
Blocks with corrected first seen timing more than 2 standard deviations above the mean for their size bucket. These are unusually slow relative to similar-sized blocks.
Show code
from IPython.display import HTML, display
# Calculate z-scores within each size bucket (using corrected timing)
df["zscore"] = df.groupby("size_bucket", observed=True)["corrected_first_seen_ms"].transform(
lambda x: (x - x.mean()) / x.std()
)
# Get blocks with z-score > 2
slow_blocks = df[df["zscore"] > 2].sort_values("zscore", ascending=False).head(20)
if len(slow_blocks) > 0:
rows = []
for _, row in slow_blocks.iterrows():
slot = int(row["slot"])
lab_url = f"https://lab.ethpandaops.io/ethereum/slots/{slot}"
rows.append(f"""
<tr>
<td><a href="{lab_url}" target="_blank">{slot:,}</a></td>
<td>{row['builder_category']}</td>
<td>{row['proposer_entity']}</td>
<td>{row['compressed_kib']:.1f}</td>
<td>{row['corrected_first_seen_ms']:.0f}</td>
<td>{row['zscore']:.1f}\u03c3</td>
</tr>
""")
html = f'''
<style>
.anomaly-table {{ border-collapse: collapse; font-family: monospace; font-size: 13px; width: 100%; }}
.anomaly-table th {{ background: #2c3e50; color: white; padding: 8px; text-align: left; }}
.anomaly-table td {{ padding: 6px 8px; border-bottom: 1px solid #eee; }}
.anomaly-table tr:hover {{ background: #f5f5f5; }}
.anomaly-table a {{ color: #3498db; text-decoration: none; }}
.anomaly-table a:hover {{ text-decoration: underline; }}
</style>
<table class="anomaly-table">
<thead>
<tr><th>Slot</th><th>Builder</th><th>Proposer</th><th>Size (KiB)</th><th>Corrected first seen (ms)</th><th>Z-score</th></tr>
</thead>
<tbody>
{"".join(rows)}
</tbody>
</table>
'''
display(HTML(html))
print(f"\nTotal blocks with z-score > 2: {len(df[df['zscore'] > 2]):,}")
else:
print("No blocks with z-score > 2 found.")
Propagation spread outliers¶
Blocks that were both slow to arrive (high corrected first seen) AND slow to spread across sentries (high propagation spread). The top-right quadrant shows the worst-performing blocks.
Show code
# Calculate percentile thresholds
first_seen_p90 = df["corrected_first_seen_ms"].quantile(0.90)
spread_p90 = df["spread_ms"].quantile(0.90)
# Mark outliers (both metrics above P90)
df["is_double_outlier"] = (df["corrected_first_seen_ms"] > first_seen_p90) & (df["spread_ms"] > spread_p90)
fig = px.scatter(
df,
x="corrected_first_seen_ms",
y="spread_ms",
color="size_bucket",
category_orders={"size_bucket": SIZE_ORDER},
opacity=0.5,
hover_data={"slot": True, "proposer_entity": True, "builder_category": True, "compressed_kib": ":.1f"},
)
# Add quadrant lines
fig.add_vline(x=first_seen_p90, line_dash="dot", line_color="red",
annotation_text=f"P90: {first_seen_p90:.0f}ms", annotation_position="top")
fig.add_hline(y=spread_p90, line_dash="dot", line_color="red",
annotation_text=f"P90: {spread_p90:.0f}ms")
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Corrected first seen (ms)"),
yaxis=dict(title="Propagation spread (ms)"),
legend_title="Size bucket",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
# Count double outliers
double_outliers = df[df["is_double_outlier"]]
print(f"Blocks in top-right quadrant (both > P90): {len(double_outliers):,} ({len(double_outliers)/len(df)*100:.1f}%)")
for cat in CATEGORY_ORDER:
count = (double_outliers["builder_category"] == cat).sum()
print(f" {cat}: {count:,}")
Entity anomaly rate¶
Percentage of each proposer entity's blocks that have corrected first seen > P95. Entities with high anomaly rates may have connectivity or configuration issues.
Show code
# Calculate P95 threshold using corrected timing
p95_threshold = df["corrected_first_seen_ms"].quantile(0.95)
df["is_slow"] = df["corrected_first_seen_ms"] > p95_threshold
# Aggregate by entity
entity_anomaly = df.groupby("proposer_entity").agg(
block_count=("slot", "count"),
slow_count=("is_slow", "sum"),
median_corrected=("corrected_first_seen_ms", "median"),
).reset_index()
entity_anomaly["anomaly_rate"] = entity_anomaly["slow_count"] / entity_anomaly["block_count"] * 100
# Filter to entities with 20+ blocks and sort by anomaly rate
entity_anomaly = entity_anomaly[entity_anomaly["block_count"] >= 20]
top_anomaly = entity_anomaly.nlargest(15, "anomaly_rate")
fig = go.Figure()
fig.add_trace(go.Bar(
y=top_anomaly["proposer_entity"],
x=top_anomaly["anomaly_rate"],
orientation="h",
marker_color="#e74c3c",
text=top_anomaly.apply(lambda r: f"{r['slow_count']:.0f}/{r['block_count']:.0f}", axis=1),
textposition="outside",
hovertemplate="<b>%{y}</b><br>Anomaly rate: %{x:.1f}%<br>Slow blocks: %{text}<extra></extra>",
))
# Add expected rate line (5% by definition of P95)
fig.add_vline(x=5, line_dash="dash", line_color="gray", annotation_text="Expected (5%)")
fig.update_layout(
margin=dict(l=150, r=80, t=30, b=60),
xaxis=dict(title="% of blocks with corrected first seen > P95", range=[0, max(top_anomaly["anomaly_rate"]) * 1.2]),
yaxis=dict(title="", categoryorder="total ascending"),
height=500,
)
fig.show(config={"responsive": True})
print(f"P95 threshold: {p95_threshold:.0f}ms")
print(f"Entities shown: {len(top_anomaly)} (with 20+ blocks, sorted by anomaly rate)")