Block propagation - 2026-01-27

Show all code

Analysis of block propagation timing relative to block size on the wire, with corrected timing that isolates network propagation from block building overhead.

Terminology:

First seen (raw): Time from slot start until the first sentry observes the block. Includes block building time + network latency.
Winning bid: Time when the MEV relay received the winning bid for the block. Marks when the block was "ready" to broadcast.
First seen (corrected): For MEV blocks with bid timing: first_seen - winning_bid. Isolates network propagation time.
Propagation spread: Time between when the first sentry saw the block and when the last sentry saw it.
Wire size: Block size after Snappy compression, as transmitted over libp2p gossipsub.

Show code

# This notebook joins two data sources:
# 1. block_propagation_by_size: block sizes and propagation timing
# 2. block_production_timeline: MEV winning bid timing
display_sql("block_propagation_by_size", target_date)

View query

WITH
-- Get MEV slot list (slots with relay payload delivery)
mev_slots AS (
    SELECT DISTINCT slot
    FROM mev_relay_proposer_payload_delivered FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-01-27' AND slot_start_date_time < '2026-01-27'::date + INTERVAL 1 DAY
),

-- Block metadata (size, proposer)
block_meta AS (
    SELECT DISTINCT
        slot,
        block_root AS block,
        proposer_index,
        block_total_bytes,
        block_total_bytes_compressed
    FROM canonical_beacon_block FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-01-27' AND slot_start_date_time < '2026-01-27'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT index, entity
    FROM ethseer_validator_entity FINAL
    WHERE meta_network_name = 'mainnet'
),

-- Propagation timing aggregated across all sentries
propagation AS (
    SELECT
        slot,
        block,
        min(propagation_slot_start_diff) AS first_seen_ms,
        max(propagation_slot_start_diff) AS last_seen_ms,
        quantile(0.5)(propagation_slot_start_diff) AS median_ms,
        count() AS sentry_count
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-01-27' AND slot_start_date_time < '2026-01-27'::date + INTERVAL 1 DAY
      AND propagation_slot_start_diff < 12000
    GROUP BY slot, block
)

SELECT
    p.slot AS slot,
    bm.block_total_bytes AS uncompressed_bytes,
    bm.block_total_bytes_compressed AS compressed_bytes,
    bm.proposer_index,
    coalesce(pe.entity, 'Unknown') AS proposer_entity,
    -- Use IN for reliable MEV detection on distributed tables
    if(p.slot GLOBAL IN mev_slots, 'MEV', 'Local') AS builder_type,
    p.first_seen_ms AS first_seen_ms,
    p.last_seen_ms AS last_seen_ms,
    p.median_ms AS median_ms,
    p.sentry_count AS sentry_count
FROM propagation p
GLOBAL LEFT JOIN block_meta bm ON p.slot = bm.slot AND p.block = bm.block
GLOBAL LEFT JOIN proposer_entity pe ON bm.proposer_index = pe.index
WHERE bm.block_total_bytes IS NOT NULL
ORDER BY p.slot

Show code

# Load both datasets and join on slot
df_size = load_parquet("block_propagation_by_size", target_date)
df_timeline = load_parquet("block_production_timeline", target_date)

# Join: size data + winning bid timing from timeline
df = df_size.merge(
    df_timeline[["slot", "winning_bid_ms"]],
    on="slot",
    how="left"
)

# Add derived columns
df["spread_ms"] = df["last_seen_ms"] - df["first_seen_ms"]
df["compression_ratio"] = df["uncompressed_bytes"] / df["compressed_bytes"]
df["compressed_kib"] = df["compressed_bytes"] / 1024
df["uncompressed_kib"] = df["uncompressed_bytes"] / 1024

# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df["corrected_first_seen_ms"] = np.where(
    (df["builder_type"] == "MEV") & df["winning_bid_ms"].notna(),
    df["first_seen_ms"] - df["winning_bid_ms"],
    df["first_seen_ms"]
)

# Size buckets for binning (in KiB)
df["size_bucket"] = pd.cut(
    df["compressed_kib"],
    bins=[0, 50, 100, 150, float("inf")],
    labels=["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
)
SIZE_ORDER = ["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]

# Builder category with 3 levels
def categorize_builder(row):
    if row["builder_type"] == "Local":
        return "Local"
    elif pd.notna(row["winning_bid_ms"]):
        return "MEV (with bid timing)"
    else:
        return "MEV (no bid timing)"

df["builder_category"] = df.apply(categorize_builder, axis=1)

# Category ordering and colors (excluding "MEV (no bid timing)" from plots)
CATEGORY_ORDER = ["Local", "MEV (with bid timing)"]
CATEGORY_COLORS = {
    "Local": "#3498db",
    "MEV (with bid timing)": "#9b59b6",
}

# Summary
print(f"Total blocks: {len(df):,}")
for cat in ["Local", "MEV (with bid timing)"]:
    count = (df["builder_category"] == cat).sum()
    pct = count / len(df) * 100
    print(f"  {cat}: {count:,} ({pct:.1f}%)")

# Info: MEV blocks without bid timing (excluded from category-based plots)
mev_no_bid = (df["builder_category"] == "MEV (no bid timing)").sum()
if mev_no_bid > 0:
    print(f"\nNote: {mev_no_bid:,} MEV blocks ({mev_no_bid/len(df)*100:.1f}%) lack bid timing data and are excluded from builder category comparisons.")

Total blocks: 7,170
  Local: 452 (6.3%)
  MEV (with bid timing): 4,015 (56.0%)

Note: 2,703 MEV blocks (37.7%) lack bid timing data and are excluded from builder category comparisons.

Size distribution by builder type¶

Histogram comparing the block size distribution between MEV and local blocks. MEV blocks tend to be larger due to MEV extraction strategies.

Show code

fig = px.histogram(
    df,
    x="compressed_kib",
    color="builder_type",
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    nbins=50,
    barmode="overlay",
    opacity=0.7,
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Block size on wire (KiB)"),
    yaxis=dict(title="Block count"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

# Summary
for bt in ["Local", "MEV"]:
    subset = df[df["builder_type"] == bt]
    print(f"{bt}: median size {subset['compressed_kib'].median():.1f} KiB, "
          f"mean {subset['compressed_kib'].mean():.1f} KiB")

Local: median size 34.3 KiB, mean 38.5 KiB
MEV: median size 80.0 KiB, mean 83.4 KiB

Compression ratio¶

Scatter plot showing the relationship between uncompressed SSZ block size and compressed (snappy) wire size. The dashed line shows the linear regression; the dotted line shows 1:1 (no compression).

Show code

fig = go.Figure()

# Scatter points colored by builder type
for bt, color in [("Local", "#3498db"), ("MEV", "#9b59b6")]:
    subset = df[df["builder_type"] == bt]
    fig.add_trace(go.Scatter(
        x=subset["uncompressed_kib"],
        y=subset["compressed_kib"],
        mode="markers",
        name=bt,
        marker=dict(color=color, opacity=0.4, size=5),
        hovertemplate="<b>Slot %{text}</b><br>Uncompressed: %{x:.1f} KiB<br>Compressed: %{y:.1f} KiB<extra></extra>",
        text=subset["slot"],
    ))

# Regression line (all data)
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df["uncompressed_kib"], df["compressed_kib"]
)
x_range = np.array([df["uncompressed_kib"].min(), df["uncompressed_kib"].max()])
y_pred = slope * x_range + intercept

fig.add_trace(go.Scatter(
    x=x_range,
    y=y_pred,
    mode="lines",
    name=f"Regression (R\u00b2={r_value**2:.3f})",
    line=dict(color="#2ecc71", width=2, dash="dash"),
))

# 1:1 reference line (no compression)
fig.add_trace(go.Scatter(
    x=x_range,
    y=x_range,
    mode="lines",
    name="1:1 (no compression)",
    line=dict(color="gray", width=1, dash="dot"),
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Uncompressed block size (KiB)"),
    yaxis=dict(title="Compressed block size (KiB, on wire)"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Print compression stats
print(f"Compression ratio: mean {df['compression_ratio'].mean():.2f}x, "
      f"median {df['compression_ratio'].median():.2f}x")
print(f"Regression: compressed = {slope:.3f} \u00d7 uncompressed + {intercept:.1f}")
print(f"R\u00b2 = {r_value**2:.4f}")

Compression ratio: mean 2.03x, median 1.99x
Regression: compressed = 0.431 × uncompressed + 10.2
R² = 0.8664

MEV timing breakdown¶

For MEV blocks with bid timing data, we can decompose the raw first seen time into block building time (winning bid) and network propagation time.

Winning bid timing distribution¶

Distribution of winning bid timing (ms from slot start) for MEV blocks. This shows when blocks are "ready" to broadcast.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"]

fig = px.histogram(
    df_mev_bid,
    x="winning_bid_ms",
    nbins=50,
    color_discrete_sequence=["#9b59b6"],
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Winning bid (ms from slot start)"),
    yaxis=dict(title="Block count"),
    height=400,
)
fig.show(config={"responsive": True})

print(f"Winning bid timing (n={len(df_mev_bid):,}):")
print(f"  Median: {df_mev_bid['winning_bid_ms'].median():.0f}ms")
print(f"  P5-P95: {df_mev_bid['winning_bid_ms'].quantile(0.05):.0f}ms - {df_mev_bid['winning_bid_ms'].quantile(0.95):.0f}ms")

Winning bid timing (n=4,015):
  Median: 1006ms
  P5-P95: 293ms - 2463ms

Block building vs network time¶

Stacked bar showing the breakdown of first seen into block building time (winning bid) and network propagation time (corrected first seen) for MEV blocks.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()

# Aggregate by size bucket
breakdown = df_mev_bid.groupby("size_bucket", observed=True).agg(
    building_time=("winning_bid_ms", "median"),
    network_time=("corrected_first_seen_ms", "median"),
    count=("slot", "count"),
).reset_index()

fig = go.Figure()

fig.add_trace(go.Bar(
    y=breakdown["size_bucket"],
    x=breakdown["building_time"],
    name="Block building (winning bid)",
    orientation="h",
    marker_color="#e74c3c",
))

fig.add_trace(go.Bar(
    y=breakdown["size_bucket"],
    x=breakdown["network_time"],
    name="Network propagation",
    orientation="h",
    marker_color="#2ecc71",
))

fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Time (ms, median)"),
    yaxis=dict(title="Block size on wire (KiB)", categoryorder="array", categoryarray=SIZE_ORDER[::-1]),
    barmode="stack",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

# Print breakdown
print("Median timing breakdown (MEV blocks with bid timing):")
for _, row in breakdown.iterrows():
    total = row["building_time"] + row["network_time"]
    pct_building = row["building_time"] / total * 100
    print(f"  {row['size_bucket']}: {row['building_time']:.0f}ms building ({pct_building:.0f}%) + "
          f"{row['network_time']:.0f}ms network = {total:.0f}ms total (n={row['count']:,})")

Median timing breakdown (MEV blocks with bid timing):
  < 50 KiB: 960ms building (64%) + 543ms network = 1502ms total (n=438)
  50-100 KiB: 1015ms building (64%) + 560ms network = 1575ms total (n=2,578)
  100-150 KiB: 1009ms building (62%) + 616ms network = 1625ms total (n=881)
  >= 150 KiB: 975ms building (59%) + 690ms network = 1665ms total (n=118)

Raw vs corrected comparison¶

Comparison of raw first seen (from slot start) vs corrected first seen (from winning bid) for MEV blocks. The corrected metric isolates network propagation time.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()

fig = go.Figure()

# Raw first seen
fig.add_trace(go.Box(
    y=df_mev_bid["first_seen_ms"],
    name="Raw (from slot start)",
    marker_color="#9b59b6",
    boxmean=True,
))

# Corrected first seen
fig.add_trace(go.Box(
    y=df_mev_bid["corrected_first_seen_ms"],
    name="Corrected (from winning bid)",
    marker_color="#2ecc71",
    boxmean=True,
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    yaxis=dict(title="First seen (ms)"),
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=450,
)
fig.show(config={"responsive": True})

print(f"MEV blocks with bid timing (n={len(df_mev_bid):,}):")
print(f"  Raw first seen: median {df_mev_bid['first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['first_seen_ms'].quantile(0.95):.0f}ms")
print(f"  Corrected: median {df_mev_bid['corrected_first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['corrected_first_seen_ms'].quantile(0.95):.0f}ms")
print(f"  Winning bid: median {df_mev_bid['winning_bid_ms'].median():.0f}ms")

MEV blocks with bid timing (n=4,015):
  Raw first seen: median 1682ms, P95 3096ms
  Corrected: median 576ms, P95 1625ms
  Winning bid: median 1006ms

Corrected first seen vs block size¶

Scatter plot using corrected first seen. For MEV blocks with bid timing, this shows pure network propagation time. For Local blocks and MEV blocks without bid timing, this equals raw first seen.

Show code

# Filter to categories we want to plot and sort so Local points render last (on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))

fig = px.scatter(
    df_sorted,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    color="builder_category",
    category_orders={"builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "first_seen_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

Corrected first seen by size bucket¶

Box plot comparing corrected first seen between builder categories across size buckets.

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

# Filter to categories we want to plot
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)]

fig = px.box(
    df_plot,
    y="size_bucket",
    x="corrected_first_seen_ms",
    color="builder_category",
    orientation="h",
    category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Summary stats
for cat in CATEGORY_ORDER:
    subset = df[df["builder_category"] == cat]
    if len(subset) > 0:
        print(f"{cat}: median {subset['corrected_first_seen_ms'].median():.0f}ms, "
              f"P95 {subset['corrected_first_seen_ms'].quantile(0.95):.0f}ms, n={len(subset):,}")

Local: median 1362ms, P95 2272ms, n=452
MEV (with bid timing): median 576ms, P95 1625ms, n=4,015

Corrected first seen density by builder type¶

Density heatmaps showing the distribution of corrected first seen timing vs block size for Local and MEV blocks.

Show code

# Calculate axis ranges (trim outliers)
x_max = df["corrected_first_seen_ms"].quantile(0.99)
y_max = df["compressed_kib"].quantile(0.99)

# Filter to categories with enough data for meaningful heatmaps
df_heatmap = df[df["builder_category"].isin(["Local", "MEV (with bid timing)"])]

fig = px.density_heatmap(
    df_heatmap,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    facet_col="builder_category",
    facet_col_spacing=0.08,
    category_orders={"builder_category": ["Local", "MEV (with bid timing)"]},
    nbinsx=40,
    nbinsy=40,
    range_x=[0, x_max],
    range_y=[0, y_max],
    color_continuous_scale="Plasma",
)
fig.update_layout(
    margin=dict(l=60, r=30, t=40, b=60),
    height=450,
    coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
    text=a.text.replace("builder_category=", ""),
    font_size=12,
))
fig.for_each_xaxis(lambda x: x.update(title="Corrected first seen (ms)"))
fig.for_each_yaxis(lambda y: y.update(title="Wire size (KiB)"))
fig.show(config={"responsive": True})

Regional propagation analysis¶

Comparison of block first-seen timing across geographic regions from two data sources:

Sentries: EthPandaOps libp2p gossipsub monitoring (~50-100 globally distributed nodes)
Contributoor: Community beacon API event collection (~875 nodes, primarily data centers)

Both sources capture when blocks are first observed by nodes in each region. Contributoor nodes tend to show faster times due to being primarily in well-connected data centers.

Show code

# Load regional propagation data from both sources
try:
    df_region_sentries = load_parquet("block_propagation_by_region", target_date)
    has_sentries = True
except FileNotFoundError:
    has_sentries = False
    print("Note: Sentries regional data not available")

try:
    df_region_contributoor = load_parquet("block_propagation_by_region_contributoor", target_date)
    has_contributoor = True
except FileNotFoundError:
    has_contributoor = False
    print("Note: Contributoor regional data not available")

REGION_LABELS = {"EU": "Europe", "NA": "North America", "AS": "Asia", "OC": "Oceania"}
REGION_ORDER = ["EU", "NA", "AS", "OC"]

def add_region_derived_columns(df_in):
    """Add size bucket and corrected timing columns matching the main notebook."""
    df_out = df_in.copy()
    
    # Size buckets
    df_out["compressed_kib"] = df_out["compressed_bytes"] / 1024
    df_out["size_bucket"] = pd.cut(
        df_out["compressed_kib"],
        bins=[0, 50, 100, 150, float("inf")],
        labels=SIZE_ORDER
    )
    df_out["region_label"] = df_out["region"].map(REGION_LABELS)
    
    # Join with timeline data to get winning bid timing
    df_out = df_out.merge(
        df_timeline[["slot", "winning_bid_ms"]],
        on="slot",
        how="left"
    )
    
    # Corrected first seen: subtract winning bid time for MEV blocks with bid timing
    df_out["corrected_first_seen_ms"] = np.where(
        (df_out["builder_type"] == "MEV") & df_out["winning_bid_ms"].notna(),
        df_out["first_seen_ms"] - df_out["winning_bid_ms"],
        df_out["first_seen_ms"]
    )
    
    return df_out

if has_sentries:
    df_region_sentries = add_region_derived_columns(df_region_sentries)
if has_contributoor:
    df_region_contributoor = add_region_derived_columns(df_region_contributoor)

# Print summary
for name, df_r, has_data in [
    ("Sentries", df_region_sentries if has_sentries else None, has_sentries),
    ("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
]:
    if has_data:
        print(f"\n{name}:")
        for region in REGION_ORDER:
            r = df_r[df_r["region"] == region]
            print(f"  {REGION_LABELS[region]}: {len(r):,} slot-regions, "
                  f"median corrected first seen {r['corrected_first_seen_ms'].median():.0f}ms")

Sentries:
  Europe: 7,170 slot-regions, median corrected first seen 1188ms
  North America: 7,170 slot-regions, median corrected first seen 1280ms
  Asia: 7,170 slot-regions, median corrected first seen 1341ms
  Oceania: 7,170 slot-regions, median corrected first seen 1330ms

Contributoor:
  Europe: 29,574 slot-regions, median corrected first seen 1171ms
  North America: 29,574 slot-regions, median corrected first seen 1162ms
  Asia: 29,574 slot-regions, median corrected first seen 1273ms
  Oceania: 29,574 slot-regions, median corrected first seen 1314ms

Regional timing distribution (corrected)¶

Box plots showing corrected first seen timing distribution by region, comparing Sentries (libp2p) and Contributoor (beacon API) data sources. Faceted by builder type (MEV vs Local).

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

if has_sentries or has_contributoor:
    # Combine regional data with source labels
    dfs_to_concat = []
    if has_sentries:
        df_s = df_region_sentries.copy()
        df_s["source"] = "Sentries"
        dfs_to_concat.append(df_s)
    if has_contributoor:
        df_c = df_region_contributoor.copy()
        df_c["source"] = "Contributoor"
        dfs_to_concat.append(df_c)
    
    df_regional_combined = pd.concat(dfs_to_concat, ignore_index=True)
    df_regional_combined["region_label"] = df_regional_combined["region"].map(REGION_LABELS)
    
    fig = px.box(
        df_regional_combined,
        x="region_label",
        y="corrected_first_seen_ms",
        color="source",
        facet_col="builder_type",
        color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
        category_orders={
            "region_label": [REGION_LABELS[r] for r in REGION_ORDER],
            "builder_type": ["MEV", "Local"],
        },
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=40, b=60),
        xaxis_title="Region",
        yaxis_title="Corrected first seen (ms)",
        legend_title="Data source",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.for_each_annotation(lambda a: a.update(text=a.text.replace("builder_type=", "")))
    fig.show(config={"responsive": True})
    
    # Print summary stats
    print("Corrected first seen by region (ms):")
    for bt in ["MEV", "Local"]:
        print(f"\n  {bt}:")
        for region in REGION_ORDER:
            for source in ["Sentries", "Contributoor"]:
                subset = df_regional_combined[
                    (df_regional_combined["region"] == region) & 
                    (df_regional_combined["source"] == source) &
                    (df_regional_combined["builder_type"] == bt)
                ]["corrected_first_seen_ms"]
                if len(subset) > 0:
                    print(f"    {REGION_LABELS[region]} ({source}): "
                          f"P50={subset.median():.0f}ms, P95={subset.quantile(0.95):.0f}ms (n={len(subset):,})")
else:
    print("No regional data available")

Corrected first seen by region (ms):

  MEV:
    Europe (Sentries): P50=1131ms, P95=2738ms (n=6,718)
    Europe (Contributoor): P50=1119ms, P95=2725ms (n=27,698)
    North America (Sentries): P50=1222ms, P95=2871ms (n=6,718)
    North America (Contributoor): P50=1097ms, P95=2766ms (n=27,698)
    Asia (Sentries): P50=1300ms, P95=2926ms (n=6,718)
    Asia (Contributoor): P50=1235ms, P95=2848ms (n=27,698)
    Oceania (Sentries): P50=1286ms, P95=2921ms (n=6,718)
    Oceania (Contributoor): P50=1270ms, P95=2902ms (n=27,698)

  Local:

    Europe (Sentries): P50=1388ms, P95=2272ms (n=452)
    Europe (Contributoor): P50=1375ms, P95=2266ms (n=1,876)
    North America (Sentries): P50=1432ms, P95=2377ms (n=452)
    North America (Contributoor): P50=1354ms, P95=2326ms (n=1,876)
    Asia (Sentries): P50=1484ms, P95=2413ms (n=452)
    Asia (Contributoor): P50=1413ms, P95=2382ms (n=1,876)
    Oceania (Sentries): P50=1456ms, P95=2434ms (n=452)
    Oceania (Contributoor): P50=1425ms, P95=2445ms (n=1,876)

Regional CDF comparison by size (corrected timing)¶

Cumulative distribution functions (CDFs) showing corrected first seen timing by geographic region, faceted by block size bucket and builder type.

How to read these charts:

X-axis: Corrected first seen time in milliseconds. For MEV blocks, this is first_seen - winning_bid (isolating network propagation). For Local blocks, this equals raw first seen.
Y-axis: Percentile (0-100%). A point at (300ms, 50%) means 50% of blocks were seen within 300ms.
Steeper curves = faster, more consistent propagation. The curve climbing quickly to 100% indicates tight timing.
Right-shifted curves = slower propagation in that region.
P50 line (horizontal dotted): Where curves cross this line shows median timing per region.

Visual encoding:

Color = Geographic region (EU=blue, NA=green, AS=red, OC=orange)
Line style = Data source (solid=Sentries libp2p gossipsub, dashed=Contributoor beacon API)
Columns = Builder type (MEV vs Local)
Rows = Block size bucket (smallest at top, largest at bottom)

What to look for:

Do larger blocks show more spread between regions? (Size impact on propagation)
Does Contributoor (dashed) consistently show faster times than Sentries (solid)? (Data center vs diverse node placement)
Which regions lag behind as block size increases? (Geographic disadvantage for large blocks)

Show code

if has_sentries or has_contributoor:
    from plotly.subplots import make_subplots
    
    REGION_COLORS = {
        "EU": "#3498db",  # Blue
        "NA": "#2ecc71",  # Green
        "AS": "#e74c3c",  # Red
        "OC": "#f39c12",  # Orange
    }
    
    SOURCE_DASH = {
        "Sentries": "solid",
        "Contributoor": "dash",
    }
    
    BUILDER_TYPES = ["MEV", "Local"]
    
    # Create 4x2 subplots: rows = size buckets, cols = builder types
    fig = make_subplots(
        rows=len(SIZE_ORDER), cols=len(BUILDER_TYPES),
        subplot_titles=[f"{bt}" for bt in BUILDER_TYPES] + [""] * (len(SIZE_ORDER) - 1) * 2,
        row_titles=SIZE_ORDER,
        horizontal_spacing=0.06,
        vertical_spacing=0.06,
    )
    
    percentiles = np.arange(0, 101, 1)
    
    def add_cdf_traces(df_r, source_name, builder_type, size_bucket, row, col, show_legend):
        subset = df_r[
            (df_r["builder_type"] == builder_type) & 
            (df_r["size_bucket"] == size_bucket)
        ]
        for region in REGION_ORDER:
            region_data = subset[subset["region"] == region]["corrected_first_seen_ms"]
            if len(region_data) >= 10:  # Need enough data for meaningful CDF
                values = np.percentile(region_data, percentiles)
                fig.add_trace(
                    go.Scatter(
                        x=values,
                        y=percentiles,
                        mode="lines",
                        name=f"{REGION_LABELS[region]} ({source_name})",
                        line=dict(
                            color=REGION_COLORS[region], 
                            width=2,
                            dash=SOURCE_DASH[source_name],
                        ),
                        showlegend=show_legend,
                        legendgroup=f"{region}_{source_name}",
                        hovertemplate=f"{REGION_LABELS[region]} ({source_name})<br>%{{x:.0f}}ms = P%{{y}}<extra></extra>",
                    ),
                    row=row, col=col,
                )
    
    # Add traces for each combination
    for row_idx, size_bucket in enumerate(SIZE_ORDER, 1):
        for col_idx, builder_type in enumerate(BUILDER_TYPES, 1):
            # Only show legend on first subplot
            show_legend = (row_idx == 1 and col_idx == 1)
            
            if has_sentries:
                add_cdf_traces(df_region_sentries, "Sentries", builder_type, size_bucket, row_idx, col_idx, show_legend)
            if has_contributoor:
                add_cdf_traces(df_region_contributoor, "Contributoor", builder_type, size_bucket, row_idx, col_idx, show_legend)
            
            # Add P50 reference line
            fig.add_hline(y=50, line_dash="dot", line_color="gray", line_width=1, row=row_idx, col=col_idx)
    
    fig.update_layout(
        margin=dict(l=100, r=30, t=50, b=60),
        height=1100,
        legend=dict(
            orientation="h", 
            yanchor="bottom", 
            y=1.02, 
            xanchor="center", 
            x=0.5,
            font_size=10,
        ),
    )
    
    # Update axes
    for col in [1, 2]:
        fig.update_xaxes(title_text="Corrected first seen (ms)", row=len(SIZE_ORDER), col=col)
    fig.update_yaxes(title_text="Percentile", col=1)
    
    fig.show(config={"responsive": True})
    
    # Print summary statistics
    print("Line styles: solid = Sentries (libp2p), dashed = Contributoor (beacon API)")
    print("\nP50 (median) timing by size bucket and builder type:\n")
    for size_bucket in SIZE_ORDER:
        print(f"  {size_bucket}:")
        for bt in BUILDER_TYPES:
            for source, df_r, has_data in [
                ("Sentries", df_region_sentries if has_sentries else None, has_sentries),
                ("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
            ]:
                if has_data:
                    subset = df_r[(df_r["builder_type"] == bt) & (df_r["size_bucket"] == size_bucket)]
                    if len(subset) >= 10:
                        medians = []
                        for region in REGION_ORDER:
                            r = subset[subset["region"] == region]["corrected_first_seen_ms"]
                            if len(r) > 0:
                                medians.append(f"{REGION_LABELS[region][:2]}:{r.median():.0f}")
                        if medians:
                            print(f"    {bt} ({source}): {', '.join(medians)} ms")
else:
    print("No regional data available")

Line styles: solid = Sentries (libp2p), dashed = Contributoor (beacon API)

P50 (median) timing by size bucket and builder type:

  < 50 KiB:
    MEV (Sentries): Eu:1153, No:1216, As:1290, Oc:1283 ms
    MEV (Contributoor): Eu:1138, No:1098, As:1248, Oc:1274 ms
    Local (Sentries): Eu:1349, No:1389, As:1424, Oc:1406 ms
    Local (Contributoor): Eu:1338, No:1310, As:1363, Oc:1385 ms
  50-100 KiB:
    MEV (Sentries): Eu:1131, No:1226, As:1304, Oc:1279 ms
    MEV (Contributoor): Eu:1119, No:1095, As:1230, Oc:1263 ms
    Local (Sentries): Eu:1464, No:1564, As:1603, Oc:1596 ms
    Local (Contributoor): Eu:1458, No:1462, As:1537, Oc:1544 ms
  100-150 KiB:
    MEV (Sentries): Eu:1060, No:1181, As:1245, Oc:1218 ms
    MEV (Contributoor): Eu:1052, No:1059, As:1196, Oc:1211 ms
    Local (Sentries): Eu:1766, No:1942, As:1973, Oc:1998 ms
    Local (Contributoor): Eu:1764, No:1825, As:1926, Oc:1979 ms
  >= 150 KiB:
    MEV (Sentries): Eu:1300, No:1482, As:1520, Oc:1544 ms
    MEV (Contributoor): Eu:1282, No:1325, As:1451, Oc:1473 ms
    Local (Contributoor): Eu:453, No:656, As:675, Oc:774 ms

First-seen "winner" by region (corrected timing)¶

For each slot, which region observed the block first using corrected timing? This shows the percentage of slots where each region was the first to see the block after accounting for block building time.

Show code

if has_sentries or has_contributoor:
    def compute_winner_stats(df_r, source_name):
        """Compute which region saw each slot first using corrected timing."""
        # Reset index, sort by slot and corrected first seen
        df_sorted = df_r.reset_index(drop=True).sort_values(["slot", "corrected_first_seen_ms"])
        # Keep first (fastest) per slot
        winner_per_slot = df_sorted.drop_duplicates(subset="slot", keep="first")
        
        # Count wins per region
        region_wins = winner_per_slot["region"].value_counts()
        total_slots = winner_per_slot["slot"].nunique()
        
        rows = []
        for region in REGION_ORDER:
            wins = region_wins.get(region, 0)
            rows.append({
                "source": source_name,
                "region": region,
                "region_label": REGION_LABELS[region],
                "win_count": wins,
                "win_pct": wins / total_slots * 100 if total_slots > 0 else 0,
            })
        return rows
    
    rows = []
    if has_sentries:
        rows.extend(compute_winner_stats(df_region_sentries, "Sentries"))
    if has_contributoor:
        rows.extend(compute_winner_stats(df_region_contributoor, "Contributoor"))
    
    df_winners = pd.DataFrame(rows)
    
    fig = px.bar(
        df_winners,
        x="region_label",
        y="win_pct",
        color="source",
        barmode="group",
        color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
        category_orders={"region_label": [REGION_LABELS[r] for r in REGION_ORDER]},
        text=df_winners["win_pct"].apply(lambda x: f"{x:.1f}%"),
    )
    fig.update_traces(textposition="outside")
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis_title="Region",
        yaxis_title="% of slots first seen (corrected)",
        legend_title="Data source",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=400,
    )
    fig.show(config={"responsive": True})
    
    # Print summary
    print("Region 'wins' (first to see block, corrected timing):")
    for source in ["Sentries", "Contributoor"]:
        subset = df_winners[df_winners["source"] == source]
        if len(subset) > 0:
            print(f"\n  {source}:")
            for _, row in subset.sort_values("win_pct", ascending=False).iterrows():
                print(f"    {row['region_label']}: {row['win_pct']:.1f}% ({row['win_count']:,} slots)")
else:
    print("No regional data available")

Region 'wins' (first to see block, corrected timing):

  Sentries:
    Europe: 91.7% (6,578 slots)
    North America: 4.2% (301 slots)
    Asia: 2.2% (156 slots)
    Oceania: 1.9% (135 slots)

  Contributoor:
    Europe: 60.8% (4,357 slots)
    North America: 32.7% (2,345 slots)
    Asia: 5.5% (396 slots)
    Oceania: 1.0% (72 slots)

Region × size interaction (corrected timing)¶

Median corrected first seen timing by region and block size bucket. Shows whether larger blocks disproportionately impact certain regions after accounting for block building time.

Show code

if has_sentries or has_contributoor:
    def compute_region_size_matrix(df_r):
        """Compute median corrected first seen by region and size bucket."""
        return df_r.groupby(["region", "size_bucket"], observed=True)["corrected_first_seen_ms"].median().unstack()
    
    # Compute matrices for available sources
    matrices = {}
    if has_sentries:
        matrices["Sentries"] = compute_region_size_matrix(df_region_sentries)
    if has_contributoor:
        matrices["Contributoor"] = compute_region_size_matrix(df_region_contributoor)
    
    # Create side-by-side heatmaps
    n_sources = len(matrices)
    fig = make_subplots(
        rows=1, cols=n_sources,
        subplot_titles=list(matrices.keys()),
        horizontal_spacing=0.1,
    )
    
    # Shared color scale
    all_values = np.concatenate([m.values.flatten() for m in matrices.values()])
    all_values = all_values[~np.isnan(all_values)]
    vmin, vmax = np.percentile(all_values, [5, 95])
    
    for i, (source, matrix) in enumerate(matrices.items(), 1):
        # Reorder rows to match REGION_ORDER
        matrix = matrix.reindex(REGION_ORDER)
        
        fig.add_trace(
            go.Heatmap(
                z=matrix.values,
                x=[str(c) for c in matrix.columns],
                y=[REGION_LABELS[r] for r in matrix.index],
                colorscale="Plasma",
                zmin=vmin,
                zmax=vmax,
                text=matrix.values.round(0).astype(int),
                texttemplate="%{text}",
                textfont={"size": 11},
                showscale=(i == n_sources),
                colorbar=dict(title="ms") if i == n_sources else None,
                hovertemplate="Region: %{y}<br>Size: %{x}<br>Median: %{z:.0f}ms<extra></extra>",
            ),
            row=1, col=i,
        )
    
    fig.update_layout(
        margin=dict(l=100, r=30, t=60, b=60),
        height=350,
    )
    fig.update_xaxes(title_text="Block size on wire", row=1)
    fig.update_yaxes(title_text="Region", col=1)
    fig.show(config={"responsive": True})
    
    # Print the data
    print("Median corrected first seen (ms) by region and size bucket:\n")
    for source, matrix in matrices.items():
        print(f"{source}:")
        matrix = matrix.reindex(REGION_ORDER)
        for region in REGION_ORDER:
            row = matrix.loc[region]
            values = ", ".join([f"{row[c]:.0f}" for c in SIZE_ORDER if c in row.index and pd.notna(row[c])])
            print(f"  {REGION_LABELS[region]}: {values}")
        print()
else:
    print("No regional data available")

Median corrected first seen (ms) by region and size bucket:

Sentries:
  Europe: 1251, 1162, 1080, 1294
  North America: 1311, 1271, 1198, 1480
  Asia: 1367, 1336, 1278, 1515
  Oceania: 1360, 1315, 1255, 1531

Contributoor:
  Europe: 1238, 1145, 1073, 1274
  North America: 1244, 1124, 1072, 1325
  Asia: 1317, 1254, 1216, 1448
  Oceania: 1335, 1300, 1248, 1468

Propagation spread¶

Propagation spread is the time between when the first sentry saw the block and when the last sentry saw it. Larger blocks should take longer to propagate across all sentries.

Spread by size (box plot)¶

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

fig = px.box(
    df,
    y="size_bucket",
    x="spread_ms",
    color="builder_type",
    orientation="h",
    category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_type": ["MEV", "Local"]},
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Propagation spread (last seen - first seen, ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

Spread vs size (scatter)¶

Scatter view showing individual blocks.

Show code

fig = px.scatter(
    df,
    x="spread_ms",
    y="compressed_kib",
    color="builder_type",
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "spread_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Propagation spread (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

Corrected first seen by proposer entity¶

Top proposer entities ranked by median corrected first seen timing. Diamond markers show P95 timing.

Show code

entity_stats = df.groupby("proposer_entity").agg(
    block_count=("slot", "count"),
    p50_ms=("corrected_first_seen_ms", "median"),
    p75_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.75)),
    p90_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.90)),
    p95_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.95)),
    mev_pct=("builder_type", lambda x: (x == "MEV").mean() * 100),
).reset_index()

# Filter to entities with 20+ blocks, get top 20 by block count
entity_stats = entity_stats[entity_stats["block_count"] >= 20]
top_by_count = entity_stats.nlargest(20, "block_count").sort_values("p50_ms")

fig = go.Figure()

# P50 bars (median)
fig.add_trace(go.Bar(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p50_ms"],
    orientation="h",
    name="P50 (median)",
    marker_color="#3498db",
    text=top_by_count["block_count"].apply(lambda x: f"{x:,}"),
    textposition="outside",
    hovertemplate="<b>%{y}</b><br>P50: %{x:.0f}ms<br>Blocks: %{text}<extra></extra>",
))

# P75 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p75_ms"],
    mode="markers",
    name="P75",
    marker=dict(color="#f39c12", size=8, symbol="circle"),
    hovertemplate="<b>%{y}</b><br>P75: %{x:.0f}ms<extra></extra>",
))

# P90 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p90_ms"],
    mode="markers",
    name="P90",
    marker=dict(color="#e67e22", size=8, symbol="square"),
    hovertemplate="<b>%{y}</b><br>P90: %{x:.0f}ms<extra></extra>",
))

# P95 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p95_ms"],
    mode="markers",
    name="P95",
    marker=dict(color="#e74c3c", size=8, symbol="diamond"),
    hovertemplate="<b>%{y}</b><br>P95: %{x:.0f}ms<extra></extra>",
))

fig.update_layout(
    margin=dict(l=150, r=60, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title=""),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=600,
    barmode="overlay",
)
fig.show(config={"responsive": True})

Top 10 proposer entities (density)¶

Corrected first seen vs block size density for MEV blocks, faceted by top 10 proposer entities (by block count, descending).

Show code

df_mev = df[df["builder_type"] == "MEV"].copy()

# Replace empty/null proposer entities with "(unknown)"
df_mev["proposer_entity"] = df_mev["proposer_entity"].fillna("(unknown)").replace("", "(unknown)")

# Get top 10 entities by block count (value_counts returns descending order)
entity_counts = df_mev["proposer_entity"].value_counts().head(10)
top_entities = entity_counts.index.tolist()
df_top = df_mev[df_mev["proposer_entity"].isin(top_entities)].copy()

# Create legend labels with block counts, ordered descending by count
legend_labels = {entity: f"{entity} ({count:,})" for entity, count in entity_counts.items()}
df_top["entity_label"] = df_top["proposer_entity"].map(legend_labels)
label_order = [legend_labels[e] for e in top_entities]  # Descending order by count

# Density heatmap by entity (faceted)
x_max = df_top["corrected_first_seen_ms"].quantile(0.99)
y_max = df_top["compressed_kib"].quantile(0.99)

fig = px.density_heatmap(
    df_top,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    facet_col="entity_label",
    facet_col_wrap=5,
    facet_col_spacing=0.04,
    facet_row_spacing=0.08,
    category_orders={"entity_label": label_order},
    nbinsx=20,
    nbinsy=20,
    range_x=[0, x_max],
    range_y=[0, y_max],
    color_continuous_scale="Plasma",
)
fig.update_layout(
    margin=dict(l=60, r=30, t=40, b=60),
    height=500,
    coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
    text=a.text.replace("entity_label=", ""),
    font_size=10,
))
fig.for_each_xaxis(lambda x: x.update(title=""))
fig.for_each_yaxis(lambda y: y.update(title=""))
fig.add_annotation(
    text="Corrected first seen (ms)",
    xref="paper", yref="paper",
    x=0.5, y=-0.08,
    showarrow=False,
    font_size=12,
)
fig.add_annotation(
    text="Wire size (KiB)",
    xref="paper", yref="paper",
    x=-0.04, y=0.5,
    showarrow=False,
    font_size=12,
    textangle=-90,
)
fig.show(config={"responsive": True})

Anomaly detection¶

The following charts help identify blocks that propagated slower than expected given their size, using corrected timing.

Corrected first seen residuals¶

Residual = actual corrected first seen - expected based on block size. Positive residuals indicate blocks that were slower than expected for their size. The regression line is fit per builder category.

Show code

# Fit regression per builder category and compute residuals
df["expected_corrected_first_seen"] = np.nan
df["residual_ms"] = np.nan

for cat in CATEGORY_ORDER:
    mask = df["builder_category"] == cat
    subset = df[mask]
    if len(subset) > 10:
        slope, intercept, _, _, _ = stats.linregress(
            subset["compressed_kib"], subset["corrected_first_seen_ms"]
        )
        df.loc[mask, "expected_corrected_first_seen"] = slope * df.loc[mask, "compressed_kib"] + intercept
        df.loc[mask, "residual_ms"] = df.loc[mask, "corrected_first_seen_ms"] - df.loc[mask, "expected_corrected_first_seen"]

# Filter to categories we want to plot and sort for rendering order (Local on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))

fig = px.scatter(
    df_sorted,
    x="compressed_kib",
    y="residual_ms",
    color="builder_category",
    category_orders={"builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "corrected_first_seen_ms": ":.0f", "residual_ms": ":.0f"},
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", annotation_text="Expected")
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Block size on wire (KiB)"),
    yaxis=dict(title="Corrected first seen residual (ms)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Count outliers (considering only plotted categories)
df_filtered = df[df["builder_category"].isin(CATEGORY_ORDER)]
outlier_threshold = df_filtered["residual_ms"].quantile(0.95)
outliers = df_filtered[df_filtered["residual_ms"] > outlier_threshold]
print(f"Outlier threshold (P95): {outlier_threshold:.0f}ms")
print(f"Blocks above P95: {len(outliers):,} ({len(outliers)/len(df_filtered)*100:.1f}%)")

Outlier threshold (P95): 914ms
Blocks above P95: 224 (5.0%)

Slow blocks (z-score > 2)¶

Blocks with corrected first seen timing more than 2 standard deviations above the mean for their size bucket. These are unusually slow relative to similar-sized blocks.

Show code

from IPython.display import HTML, display

# Calculate z-scores within each size bucket (using corrected timing)
df["zscore"] = df.groupby("size_bucket", observed=True)["corrected_first_seen_ms"].transform(
    lambda x: (x - x.mean()) / x.std()
)

# Get blocks with z-score > 2
slow_blocks = df[df["zscore"] > 2].sort_values("zscore", ascending=False).head(20)

if len(slow_blocks) > 0:
    rows = []
    for _, row in slow_blocks.iterrows():
        slot = int(row["slot"])
        lab_url = f"https://lab.ethpandaops.io/ethereum/slots/{slot}"
        rows.append(f"""
            <tr>
                <td><a href="{lab_url}" target="_blank">{slot:,}</a></td>
                <td>{row['builder_category']}</td>
                <td>{row['proposer_entity']}</td>
                <td>{row['compressed_kib']:.1f}</td>
                <td>{row['corrected_first_seen_ms']:.0f}</td>
                <td>{row['zscore']:.1f}\u03c3</td>
            </tr>
        """)

    html = f'''
    <style>
    .anomaly-table {{ border-collapse: collapse; font-family: monospace; font-size: 13px; width: 100%; }}
    .anomaly-table th {{ background: #2c3e50; color: white; padding: 8px; text-align: left; }}
    .anomaly-table td {{ padding: 6px 8px; border-bottom: 1px solid #eee; }}
    .anomaly-table tr:hover {{ background: #f5f5f5; }}
    .anomaly-table a {{ color: #3498db; text-decoration: none; }}
    .anomaly-table a:hover {{ text-decoration: underline; }}
    </style>
    <table class="anomaly-table">
    <thead>
        <tr><th>Slot</th><th>Builder</th><th>Proposer</th><th>Size (KiB)</th><th>Corrected first seen (ms)</th><th>Z-score</th></tr>
    </thead>
    <tbody>
        {"".join(rows)}
    </tbody>
    </table>
    '''
    display(HTML(html))
    print(f"\nTotal blocks with z-score > 2: {len(df[df['zscore'] > 2]):,}")
else:
    print("No blocks with z-score > 2 found.")

Slot	Builder	Proposer	Size (KiB)	Corrected first seen (ms)	Z-score
13,558,176	Local	whale_0x4f7e	81.0	5666	5.5σ
13,554,276	Local	lido	60.3	4306	3.8σ
13,559,176	MEV (no bid timing)	stakefish	47.0	3709	3.5σ
13,557,651	MEV (no bid timing)		45.6	3573	3.3σ
13,557,610	Local	solo_stakers	43.4	3570	3.3σ
13,557,870	MEV (no bid timing)	blockdaemon	38.1	3569	3.3σ
13,555,615	MEV (no bid timing)	ether.fi	57.3	3746	3.1σ
13,554,496	MEV (no bid timing)	stakingfacilities_lido	113.6	3805	3.1σ
13,555,002	Local		42.5	3407	3.1σ
13,560,513	MEV (with bid timing)	coinbase	34.1	3380	3.1σ
13,560,640	MEV (with bid timing)	senseinode_lido	50.0	3662	3.0σ
13,556,486	MEV (no bid timing)	blockdaemon	96.0	3661	3.0σ
13,560,660	MEV (no bid timing)		78.3	3658	3.0σ
13,556,317	MEV (no bid timing)		67.8	3651	3.0σ
13,558,580	MEV (no bid timing)		51.7	3639	3.0σ
13,556,162	MEV (no bid timing)		135.0	3689	3.0σ
13,557,047	MEV (no bid timing)		68.7	3627	3.0σ
13,556,626	MEV (no bid timing)		94.3	3626	3.0σ
13,560,708	MEV (no bid timing)		99.0	3625	3.0σ
13,559,126	MEV (no bid timing)		59.1	3623	3.0σ

Total blocks with z-score > 2: 303

Propagation spread outliers¶

Blocks that were both slow to arrive (high corrected first seen) AND slow to spread across sentries (high propagation spread). The top-right quadrant shows the worst-performing blocks.

Show code

# Calculate percentile thresholds
first_seen_p90 = df["corrected_first_seen_ms"].quantile(0.90)
spread_p90 = df["spread_ms"].quantile(0.90)

# Mark outliers (both metrics above P90)
df["is_double_outlier"] = (df["corrected_first_seen_ms"] > first_seen_p90) & (df["spread_ms"] > spread_p90)

fig = px.scatter(
    df,
    x="corrected_first_seen_ms",
    y="spread_ms",
    color="size_bucket",
    category_orders={"size_bucket": SIZE_ORDER},
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "builder_category": True, "compressed_kib": ":.1f"},
)

# Add quadrant lines
fig.add_vline(x=first_seen_p90, line_dash="dot", line_color="red", 
              annotation_text=f"P90: {first_seen_p90:.0f}ms", annotation_position="top")
fig.add_hline(y=spread_p90, line_dash="dot", line_color="red",
              annotation_text=f"P90: {spread_p90:.0f}ms")

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Propagation spread (ms)"),
    legend_title="Size bucket",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Count double outliers
double_outliers = df[df["is_double_outlier"]]
print(f"Blocks in top-right quadrant (both > P90): {len(double_outliers):,} ({len(double_outliers)/len(df)*100:.1f}%)")
for cat in CATEGORY_ORDER:
    count = (double_outliers["builder_category"] == cat).sum()
    print(f"  {cat}: {count:,}")

Blocks in top-right quadrant (both > P90): 69 (1.0%)
  Local: 2
  MEV (with bid timing): 0

Entity anomaly rate¶

Percentage of each proposer entity's blocks that have corrected first seen > P95. Entities with high anomaly rates may have connectivity or configuration issues.

Show code

# Calculate P95 threshold using corrected timing
p95_threshold = df["corrected_first_seen_ms"].quantile(0.95)
df["is_slow"] = df["corrected_first_seen_ms"] > p95_threshold

# Aggregate by entity
entity_anomaly = df.groupby("proposer_entity").agg(
    block_count=("slot", "count"),
    slow_count=("is_slow", "sum"),
    median_corrected=("corrected_first_seen_ms", "median"),
).reset_index()
entity_anomaly["anomaly_rate"] = entity_anomaly["slow_count"] / entity_anomaly["block_count"] * 100

# Filter to entities with 20+ blocks and sort by anomaly rate
entity_anomaly = entity_anomaly[entity_anomaly["block_count"] >= 20]
top_anomaly = entity_anomaly.nlargest(15, "anomaly_rate")

fig = go.Figure()

fig.add_trace(go.Bar(
    y=top_anomaly["proposer_entity"],
    x=top_anomaly["anomaly_rate"],
    orientation="h",
    marker_color="#e74c3c",
    text=top_anomaly.apply(lambda r: f"{r['slow_count']:.0f}/{r['block_count']:.0f}", axis=1),
    textposition="outside",
    hovertemplate="<b>%{y}</b><br>Anomaly rate: %{x:.1f}%<br>Slow blocks: %{text}<extra></extra>",
))

# Add expected rate line (5% by definition of P95)
fig.add_vline(x=5, line_dash="dash", line_color="gray", annotation_text="Expected (5%)")

fig.update_layout(
    margin=dict(l=150, r=80, t=30, b=60),
    xaxis=dict(title="% of blocks with corrected first seen > P95", range=[0, max(top_anomaly["anomaly_rate"]) * 1.2]),
    yaxis=dict(title="", categoryorder="total ascending"),
    height=500,
)
fig.show(config={"responsive": True})

print(f"P95 threshold: {p95_threshold:.0f}ms")
print(f"Entities shown: {len(top_anomaly)} (with 20+ blocks, sorted by anomaly rate)")

P95 threshold: 2713ms
Entities shown: 15 (with 20+ blocks, sorted by anomaly rate)