MEV pipeline

Show all code

Analysis of MEV pipeline timing and its effect on block propagation on Ethereum mainnet.

View query

WITH
-- Base slots using proposer duty as the source of truth
slots AS (
    SELECT DISTINCT
        slot,
        slot_start_date_time,
        proposer_validator_index
    FROM canonical_beacon_proposer_duty
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT
        index,
        entity
    FROM ethseer_validator_entity
    WHERE meta_network_name = 'mainnet'
),

-- Blob count per slot
blob_count AS (
    SELECT
        slot,
        uniq(blob_index) AS blob_count
    FROM canonical_beacon_blob_sidecar
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Canonical block hash (to verify MEV payload was actually used)
canonical_block AS (
    SELECT DISTINCT
        slot,
        execution_payload_block_hash
    FROM canonical_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
),

-- MEV bid timing using timestamp_ms
mev_bids AS (
    SELECT
        slot,
        slot_start_date_time,
        min(timestamp_ms) AS first_bid_timestamp_ms,
        max(timestamp_ms) AS last_bid_timestamp_ms
    FROM mev_relay_bid_trace
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
    GROUP BY slot, slot_start_date_time
),

-- MEV payload delivery - join canonical block with delivered payloads
-- Note: Use is_mev flag because ClickHouse LEFT JOIN returns 0 (not NULL) for non-matching rows
-- Get value from proposer_payload_delivered (not bid_trace, which may not have the winning block)
mev_payload AS (
    SELECT
        cb.slot,
        cb.execution_payload_block_hash AS winning_block_hash,
        1 AS is_mev,
        max(pd.value) AS winning_bid_value,
        groupArray(DISTINCT pd.relay_name) AS relay_names,
        any(pd.builder_pubkey) AS winning_builder
    FROM canonical_block cb
    GLOBAL INNER JOIN mev_relay_proposer_payload_delivered pd
        ON cb.slot = pd.slot AND cb.execution_payload_block_hash = pd.block_hash
    WHERE pd.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
    GROUP BY cb.slot, cb.execution_payload_block_hash
),

-- Winning bid timing from bid_trace (may not exist for all MEV blocks)
winning_bid AS (
    SELECT
        bt.slot,
        bt.slot_start_date_time,
        argMin(bt.timestamp_ms, bt.event_date_time) AS winning_bid_timestamp_ms
    FROM mev_relay_bid_trace bt
    GLOBAL INNER JOIN mev_payload mp ON bt.slot = mp.slot AND bt.block_hash = mp.winning_block_hash
    WHERE bt.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
    GROUP BY bt.slot, bt.slot_start_date_time
),

-- Block gossip timing with spread
block_gossip AS (
    SELECT
        slot,
        min(event_date_time) AS block_first_seen,
        max(event_date_time) AS block_last_seen
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Column arrival timing: first arrival per column, then min/max of those
column_gossip AS (
    SELECT
        slot,
        min(first_seen) AS first_column_first_seen,
        max(first_seen) AS last_column_first_seen
    FROM (
        SELECT
            slot,
            column_index,
            min(event_date_time) AS first_seen
        FROM libp2p_gossipsub_data_column_sidecar
        WHERE meta_network_name = 'mainnet'
          AND slot_start_date_time >= '2026-03-11' AND slot_start_date_time < '2026-03-11'::date + INTERVAL 1 DAY
          AND event_date_time > '1970-01-01 00:00:01'
        GROUP BY slot, column_index
    )
    GROUP BY slot
)

SELECT
    s.slot AS slot,
    s.slot_start_date_time AS slot_start_date_time,
    pe.entity AS proposer_entity,

    -- Blob count
    coalesce(bc.blob_count, 0) AS blob_count,

    -- MEV bid timing (absolute and relative to slot start)
    fromUnixTimestamp64Milli(mb.first_bid_timestamp_ms) AS first_bid_at,
    mb.first_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS first_bid_ms,
    fromUnixTimestamp64Milli(mb.last_bid_timestamp_ms) AS last_bid_at,
    mb.last_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS last_bid_ms,

    -- Winning bid timing (from bid_trace, may be NULL if block hash not in bid_trace)
    if(wb.slot != 0, fromUnixTimestamp64Milli(wb.winning_bid_timestamp_ms), NULL) AS winning_bid_at,
    if(wb.slot != 0, wb.winning_bid_timestamp_ms - toInt64(toUnixTimestamp(s.slot_start_date_time)) * 1000, NULL) AS winning_bid_ms,

    -- MEV payload info (from proposer_payload_delivered, always present for MEV blocks)
    if(mp.is_mev = 1, mp.winning_bid_value, NULL) AS winning_bid_value,
    if(mp.is_mev = 1, mp.relay_names, []) AS winning_relays,
    if(mp.is_mev = 1, mp.winning_builder, NULL) AS winning_builder,

    -- Block gossip timing with spread
    bg.block_first_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_first_seen) AS block_first_seen_ms,
    bg.block_last_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_last_seen) AS block_last_seen_ms,
    dateDiff('millisecond', bg.block_first_seen, bg.block_last_seen) AS block_spread_ms,

    -- Column arrival timing (NULL when no blobs)
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.first_column_first_seen) AS first_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.first_column_first_seen)) AS first_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.last_column_first_seen) AS last_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.last_column_first_seen)) AS last_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', cg.first_column_first_seen, cg.last_column_first_seen)) AS column_spread_ms

FROM slots s
GLOBAL LEFT JOIN proposer_entity pe ON s.proposer_validator_index = pe.index
GLOBAL LEFT JOIN blob_count bc ON s.slot = bc.slot
GLOBAL LEFT JOIN mev_bids mb ON s.slot = mb.slot
GLOBAL LEFT JOIN mev_payload mp ON s.slot = mp.slot
GLOBAL LEFT JOIN winning_bid wb ON s.slot = wb.slot
GLOBAL LEFT JOIN block_gossip bg ON s.slot = bg.slot
GLOBAL LEFT JOIN column_gossip cg ON s.slot = cg.slot

ORDER BY s.slot DESC

Show code

df = load_parquet("block_production_timeline", target_date)

# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})

print(f"Total slots: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")

Total slots: 7,203
MEV blocks: 5,967 (82.8%)
Local blocks: 1,236 (17.2%)

Bid trace coverage¶

MEV block data comes from two sources with different coverage:

Payload delivered (mev_relay_proposer_payload_delivered): Records when a relay delivers a block to a proposer. Has value, builder, and relay info for all MEV blocks.
Bid trace (mev_relay_bid_trace): Records individual bids during the auction. Has bid timing but may not include the winning block if it was a late bid or data gap.

The chart below shows what proportion of each relay's blocks have bid timing data available.

Show code

# Bid trace coverage analysis
df_trace = df[df["has_mev"]].copy()
df_trace["relay"] = df_trace["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Unknown")
df_trace["has_bid_timing"] = df_trace["winning_bid_ms"].notna()

# Aggregate by relay
relay_coverage = df_trace.groupby("relay").agg(
    total=("slot", "count"),
    with_timing=("has_bid_timing", "sum"),
).reset_index()
relay_coverage["without_timing"] = relay_coverage["total"] - relay_coverage["with_timing"]
relay_coverage["pct_with_timing"] = (relay_coverage["with_timing"] / relay_coverage["total"] * 100).round(1)
relay_coverage = relay_coverage.sort_values("total", ascending=True)

# Summary stats
total_mev = relay_coverage["total"].sum()
total_with_timing = relay_coverage["with_timing"].sum()
print(f"MEV blocks: {total_mev:,}")
print(f"With bid timing: {total_with_timing:,} ({total_with_timing/total_mev*100:.1f}%)")
print(f"Without bid timing: {total_mev - total_with_timing:,} ({(total_mev - total_with_timing)/total_mev*100:.1f}%)")

MEV blocks: 5,967
With bid timing: 3,588 (60.1%)
Without bid timing: 2,379 (39.9%)

Show code

# Stacked horizontal bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    y=relay_coverage["relay"],
    x=relay_coverage["with_timing"],
    name="With bid timing",
    orientation="h",
    marker_color="#2ecc71",
    text=relay_coverage.apply(lambda r: f"{r['pct_with_timing']:.0f}%" if r['with_timing'] > 0 else "", axis=1),
    textposition="inside",
    hovertemplate="<b>%{y}</b><br>With timing: %{x:,}<extra></extra>",
))

fig.add_trace(go.Bar(
    y=relay_coverage["relay"],
    x=relay_coverage["without_timing"],
    name="Without bid timing",
    orientation="h",
    marker_color="#e74c3c",
    hovertemplate="<b>%{y}</b><br>Without timing: %{x:,}<extra></extra>",
))

fig.update_layout(
    barmode="stack",
    margin=dict(l=150, r=30, t=30, b=60),
    xaxis=dict(title="Number of blocks"),
    yaxis=dict(title=""),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=350,
)
fig.show(config={"responsive": True})

MEV pipeline and blob count effects¶

How do blob count and MEV pipeline characteristics interact to affect block propagation? All scatter plots use blob count as the color dimension.

Show code

# Prepare data for MEV analysis
df_mev = df.copy()

# Filter out missed slots (block never produced - shows as invalid timestamps)
# A valid block_first_seen_ms should be positive and reasonable (< 60 seconds)
df_mev = df_mev[df_mev["block_first_seen_ms"].notna()]
df_mev = df_mev[(df_mev["block_first_seen_ms"] >= 0) & (df_mev["block_first_seen_ms"] < 60000)]

# Flag MEV vs local blocks
df_mev["has_mev"] = df_mev["winning_bid_value"].notna()
df_mev["block_type"] = df_mev["has_mev"].map({True: "MEV", False: "Local"})

# Calculate bidding window duration
df_mev["bidding_duration_ms"] = df_mev["last_bid_ms"] - df_mev["first_bid_ms"]

# Calculate block to column delay (for slots with blobs)
df_mev["block_to_column_ms"] = df_mev["first_column_first_seen_ms"] - df_mev["block_first_seen_ms"]

# Calculate bid to block delay (time from winning bid to block appearing)
df_mev["bid_to_block_ms"] = df_mev["block_first_seen_ms"] - df_mev["winning_bid_ms"]

# Dynamic blob count bins based on actual data
max_blobs = df_mev["blob_count"].max()
bin_size = 3
# Create bins: [-1, 0, 3, 6, 9, 12, 15, ...] to match 0, 1-3, 4-6, etc.
bins = [-1, 0] + list(range(bin_size, max_blobs + bin_size, bin_size))
if bins[-1] < max_blobs:
    bins.append(((max_blobs // bin_size) + 1) * bin_size)
labels = ["0"] + [f"{bins[i]+1}-{bins[i+1]}" for i in range(1, len(bins)-1)]
df_mev["blob_bin"] = pd.cut(df_mev["blob_count"], bins=bins, labels=labels)
BLOB_BIN_ORDER = labels  # Store for use in charts

# Generate Plasma-based discrete colors, truncated to avoid light yellow (poor contrast)
# Sample from 0.0 to 0.70 of the Plasma scale (more aggressive truncation)
PLASMA_MAX = 0.70
sample_points = [i / (len(labels) - 1) * PLASMA_MAX for i in range(len(labels))]
BLOB_COLORS = dict(zip(labels, px.colors.sample_colorscale("Plasma", sample_points)))

# Create truncated Plasma colorscale for continuous use (avoids light yellow)
PLASMA_TRUNCATED = px.colors.sample_colorscale("Plasma", [i/10 * PLASMA_MAX for i in range(11)])

# MEV-only subset for MEV timing plots
df_mev_only = df_mev[df_mev["has_mev"]].copy()

print(f"Total slots in data: {len(df):,}")
print(f"Slots with valid blocks: {len(df_mev):,} ({len(df_mev)/len(df)*100:.1f}%)")
print(f"MEV blocks: {df_mev['has_mev'].sum():,} ({df_mev['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df_mev['has_mev']).sum():,} ({(~df_mev['has_mev']).mean()*100:.1f}%)")
print(f"Max blob count: {max_blobs}, bins: {labels}")

Total slots in data: 7,203
Slots with valid blocks: 7,172 (99.6%)
MEV blocks: 5,967 (83.2%)
Local blocks: 1,205 (16.8%)
Max blob count: 21, bins: ['0', '1-3', '4-6', '7-9', '10-12', '13-15', '16-18', '19-21']

Winning bid timing vs block arrival¶

Does late bidding combined with high blob count delay block propagation?

Show code

if len(df_mev_only) > 0:
    # Extract first relay from array for display
    df_plot = df_mev_only.dropna(subset=["winning_bid_ms", "block_first_seen_ms"]).copy()
    df_plot["relay"] = df_plot["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
    df_plot["blob_count_f"] = df_plot["blob_count"].astype(float)  # Force continuous color

    fig = px.scatter(
        df_plot,
        x="winning_bid_ms",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid timing (ms from slot start)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})
else:
    print("No MEV data available.")

Bid to block delay¶

How long does it take from when the winning bid is submitted until the block is first seen on the network? This measures the latency through the MEV pipeline: bid submission → relay processing → proposer signing → network propagation.

Show code

# Prepare bid to block delay data
df_bid_delay = df_mev_only.dropna(subset=["bid_to_block_ms"]).copy()

# Filter to reasonable range (positive delays, < 5 seconds)
df_bid_delay = df_bid_delay[(df_bid_delay["bid_to_block_ms"] > 0) & (df_bid_delay["bid_to_block_ms"] < 5000)]
df_bid_delay["relay"] = df_bid_delay["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
df_bid_delay["blob_count_f"] = df_bid_delay["blob_count"].astype(float)

# Summary stats
median_delay = df_bid_delay["bid_to_block_ms"].median()
p95_delay = df_bid_delay["bid_to_block_ms"].quantile(0.95)
print(f"Bid to block delay (MEV blocks): median {median_delay:.0f}ms, P95 {p95_delay:.0f}ms, n={len(df_bid_delay):,}")

Bid to block delay (MEV blocks): median 558ms, P95 1702ms, n=3,564

Show code

if len(df_bid_delay) > 0:
    # Scatter plot: bid_to_block_ms vs blob_count
    fig = px.scatter(
        df_bid_delay,
        x="blob_count",
        y="bid_to_block_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.5,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    
    # Add median line per blob count
    median_by_blob = df_bid_delay.groupby("blob_count")["bid_to_block_ms"].median().reset_index()
    fig.add_trace(go.Scatter(
        x=median_by_blob["blob_count"],
        y=median_by_blob["bid_to_block_ms"],
        mode="lines+markers",
        line=dict(color="white", width=2),
        marker=dict(size=8, color="white", line=dict(width=1, color="black")),
        name="Median",
        hovertemplate="Blobs: %{x}<br>Median: %{y:.0f}ms<extra></extra>",
    ))
    
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Blob count", dtick=3),
        yaxis=dict(title="Bid to block delay (ms)"),
        coloraxis_colorbar=dict(title="Blobs"),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.show(config={"responsive": True})
else:
    print("No bid timing data available.")

Winning bid value vs block arrival¶

Do high-value blocks with more blobs behave differently?

Show code

if len(df_mev_only) > 0:
    # Convert winning_bid_value from wei to ETH
    df_plot = df_mev_only.copy()
    df_plot["winning_bid_eth"] = df_plot["winning_bid_value"].astype(float) / 1e18
    df_plot["relay"] = df_plot["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
    df_plot["blob_count_f"] = df_plot["blob_count"].astype(float)  # Force continuous color
    df_plot = df_plot[df_plot["winning_bid_eth"] > 0].dropna(subset=["block_first_seen_ms"])  # Filter for log scale

    fig = px.scatter(
        df_plot,
        x="winning_bid_eth",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        log_x=True,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid value (ETH, log scale)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})

Bidding window duration vs block arrival¶

Does competitive bidding (longer bidding window) affect propagation differently by blob count?

Show code

df_bidding = df_mev_only.dropna(subset=["bidding_duration_ms", "block_first_seen_ms"])
if len(df_bidding) > 0:
    df_plot = df_bidding.copy()
    df_plot["relay"] = df_plot["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
    df_plot["blob_count_f"] = df_plot["blob_count"].astype(float)  # Force continuous color

    fig = px.scatter(
        df_plot,
        x="bidding_duration_ms",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Bidding window duration (ms)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})

Block arrival by relay and blob count¶

Do some relays handle blob-heavy blocks better than others?

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

# Extract first relay from array for analysis
df_relay = df_mev_only.copy()
df_relay["relay"] = df_relay["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
df_relay = df_relay.dropna(subset=["relay"])

if len(df_relay) > 0:
    # Get top relays by volume
    top_relays = df_relay["relay"].value_counts().head(8).index.tolist()
    df_relay_top = df_relay[df_relay["relay"].isin(top_relays)].copy()
    
    fig = px.box(
        df_relay_top,
        x="relay",
        y="block_first_seen_ms",
        color="blob_bin",
        category_orders={"blob_bin": BLOB_BIN_ORDER},
        color_discrete_map=BLOB_COLORS,
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=100),
        xaxis=dict(title="Relay", tickangle=45),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        legend_title="Blob count",
        height=500,
    )
    fig.show(config={"responsive": True})

MEV vs local block comparison¶

Do MEV blocks propagate differently than locally-built blocks? The following charts compare timing distributions by blob count.

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

# Prepare data for MEV vs Local comparison
df_compare = df_mev.copy()

# Create ordered list from 0 to max_blobs (reversed for bottom-to-top display)
all_blob_counts = list(range(int(max_blobs) + 1))
blob_count_order = [str(b) for b in all_blob_counts]
# Reverse for category_orders (Plotly categorical y-axis goes top-to-bottom by default)
blob_count_order_reversed = blob_count_order[::-1]

# Convert blob_count to string for proper categorical ordering
df_compare["blob_count_str"] = df_compare["blob_count"].astype(str)

# Ensure x-axis is numeric (not datetime)
df_compare["block_first_seen_ms"] = df_compare["block_first_seen_ms"].astype(float)

# Summary stats
mev_median = df_compare[df_compare["block_type"] == "MEV"]["block_first_seen_ms"].median()
local_median = df_compare[df_compare["block_type"] == "Local"]["block_first_seen_ms"].median()
print(f"Block first seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

Block first seen median - MEV: 1505ms, Local: 1535ms

Show code

if len(df_compare) > 0:
    fig = px.box(
        df_compare,
        y="blob_count_str",
        x="block_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Block first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=blob_count_order_reversed,
            ticktext=blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=800,
    )
    fig.show(config={"responsive": True})

Show code

# Prepare first column first seen data
df_col_first = df_mev.dropna(subset=["first_column_first_seen_ms"]).copy()
df_col_first = df_col_first[df_col_first["blob_count"] > 0]  # Only slots with blobs

# Create ordered list from 1 to max_blobs (reversed for bottom-to-top display)
col_blob_counts = list(range(1, int(max_blobs) + 1))
col_blob_count_order = [str(b) for b in col_blob_counts]
col_blob_count_order_reversed = col_blob_count_order[::-1]

# Convert blob_count to string for proper categorical ordering
df_col_first["blob_count_str"] = df_col_first["blob_count"].astype(str)

# Ensure x-axis is numeric
df_col_first["first_column_first_seen_ms"] = df_col_first["first_column_first_seen_ms"].astype(float)

# Summary stats
mev_median = df_col_first[df_col_first["block_type"] == "MEV"]["first_column_first_seen_ms"].median()
local_median = df_col_first[df_col_first["block_type"] == "Local"]["first_column_first_seen_ms"].median()
print(f"First column seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

First column seen median - MEV: 1412ms, Local: 1488ms

Show code

if len(df_col_first) > 0:
    fig = px.box(
        df_col_first,
        y="blob_count_str",
        x="first_column_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": col_blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="First column first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=col_blob_count_order_reversed,
            ticktext=col_blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=750,
    )
    fig.show(config={"responsive": True})
else:
    print("No column timing data available.")

Show code

# Prepare last column first seen data
df_col_last = df_mev.dropna(subset=["last_column_first_seen_ms"]).copy()
df_col_last = df_col_last[df_col_last["blob_count"] > 0]  # Only slots with blobs

# Convert blob_count to string for proper categorical ordering
df_col_last["blob_count_str"] = df_col_last["blob_count"].astype(str)

# Ensure x-axis is numeric
df_col_last["last_column_first_seen_ms"] = df_col_last["last_column_first_seen_ms"].astype(float)

# Summary stats
mev_median = df_col_last[df_col_last["block_type"] == "MEV"]["last_column_first_seen_ms"].median()
local_median = df_col_last[df_col_last["block_type"] == "Local"]["last_column_first_seen_ms"].median()
print(f"Last column seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

Last column seen median - MEV: 1573ms, Local: 1639ms

Show code

if len(df_col_last) > 0:
    fig = px.box(
        df_col_last,
        y="blob_count_str",
        x="last_column_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": col_blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Last column first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=col_blob_count_order_reversed,
            ticktext=col_blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=750,
    )
    fig.show(config={"responsive": True})
else:
    print("No column timing data available.")

Block propagation by builder¶

Do different builders produce blocks that propagate at different speeds? Each panel shows one builder's blocks.

Show code

# Prepare builder data for heatmap
df_builder = df_mev_only.dropna(subset=["winning_builder", "block_first_seen_ms"]).copy()

# Shorten builder pubkeys for display (first 8 + last 4 chars)
df_builder["builder_short"] = df_builder["winning_builder"].apply(
    lambda x: f"{x[:10]}...{x[-4:]}" if len(x) > 14 else x
)

# Get top builders by block count, sorted descending
builder_counts = df_builder["builder_short"].value_counts()
top_builders = builder_counts.head(9).index.tolist()
builder_order = top_builders  # Already sorted by count descending
df_builder_top = df_builder[df_builder["builder_short"].isin(top_builders)].copy()

# Print builder stats
print("Top builders by block count:")
for builder in builder_order:
    count = builder_counts[builder]
    median_ms = df_builder_top[df_builder_top["builder_short"] == builder]["block_first_seen_ms"].median()
    print(f"  {builder}: {count:,} blocks, median {median_ms:.0f}ms")

Top builders by block count:
  0xb26f9666...e681: 864 blocks, median 1572ms
  0xb67eaa5e...ab08: 662 blocks, median 1506ms
  0x8527d16c...0069: 427 blocks, median 1619ms
  0x855b00e6...742e: 384 blocks, median 1433ms
  0x850b00e0...0ee7: 382 blocks, median 1482ms
  0x853b0078...4cde: 376 blocks, median 1536ms
  0x856b0004...3f46: 367 blocks, median 1511ms
  0x8db2a99d...be26: 357 blocks, median 1556ms
  0x88a53ec4...b76b: 329 blocks, median 1518ms

Show code

if len(df_builder_top) > 0:
    n_builders = len(top_builders)
    n_cols = 3
    n_rows = (n_builders + n_cols - 1) // n_cols
    
    # Create 100ms bins for x-axis (block timing)
    x_max = df_builder_top["block_first_seen_ms"].quantile(0.99)  # Trim outliers
    x_bins = int(x_max // 100) + 1
    
    fig = px.density_heatmap(
        df_builder_top,
        x="block_first_seen_ms",
        y="blob_count",
        facet_col="builder_short",
        facet_col_wrap=n_cols,
        facet_row_spacing=0.10,
        facet_col_spacing=0.05,
        category_orders={"builder_short": builder_order},
        nbinsx=x_bins,
        nbinsy=int(max_blobs) + 1,
        range_x=[0, x_max],
        range_y=[-0.5, int(max_blobs) + 0.5],
        color_continuous_scale=PLASMA_TRUNCATED,
        histnorm="percent",
    )
    fig.update_layout(
        margin=dict(l=70, r=30, t=40, b=50),
        height=280 * n_rows,
        coloraxis_colorbar=dict(title="% of<br>blocks"),
    )
    # Clean up facet titles - add block count
    for ann in fig.layout.annotations:
        builder = ann.text.replace("builder_short=", "")
        count = builder_counts.get(builder, 0)
        ann.update(text=f"{builder}<br>({count:,} blocks)", font_size=9, yshift=8)
    
    # Add axis titles and ensure ticks are visible on all panels
    fig.for_each_xaxis(lambda x: x.update(
        title="Block seen (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[-0.5, int(max_blobs) + 0.5],
    ))
    fig.update_yaxes(title="Blob count", col=1)
    for i in range(2, n_cols + 1):
        fig.update_yaxes(title="", col=i)
    
    fig.show(config={"responsive": True})

Block propagation by relay¶

Same analysis but grouped by winning relay instead of builder.

Show code

# Prepare relay data for heatmap
df_relay_heat = df_mev_only.dropna(subset=["block_first_seen_ms"]).copy()
df_relay_heat["relay"] = df_relay_heat["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
df_relay_heat = df_relay_heat.dropna(subset=["relay"])

# Get top relays by block count, sorted descending
relay_counts = df_relay_heat["relay"].value_counts()
top_relays = relay_counts.head(9).index.tolist()
relay_order = top_relays  # Already sorted by count descending
df_relay_top = df_relay_heat[df_relay_heat["relay"].isin(top_relays)].copy()

# Print relay stats
print("Top relays by block count:")
for relay in relay_order:
    count = relay_counts[relay]
    median_ms = df_relay_top[df_relay_top["relay"] == relay]["block_first_seen_ms"].median()
    print(f"  {relay}: {count:,} blocks, median {median_ms:.0f}ms")

Top relays by block count:
  BloXroute Max Profit: 1,784 blocks, median 1480ms
  Ultra Sound: 1,274 blocks, median 1534ms
  Titan Relay: 1,153 blocks, median 1452ms
  Aestus: 546 blocks, median 1638ms
  BloXroute Regulated: 475 blocks, median 1548ms
  Flashbots: 396 blocks, median 1446ms
  EthGas: 244 blocks, median 1967ms
  Agnostic Gnosis: 95 blocks, median 1609ms

Show code

if len(df_relay_top) > 0:
    n_relays = len(top_relays)
    n_cols = 3
    n_rows = (n_relays + n_cols - 1) // n_cols
    
    # Create 100ms bins for x-axis (block timing)
    x_max = df_relay_top["block_first_seen_ms"].quantile(0.99)  # Trim outliers
    x_bins = int(x_max // 100) + 1
    
    fig = px.density_heatmap(
        df_relay_top,
        x="block_first_seen_ms",
        y="blob_count",
        facet_col="relay",
        facet_col_wrap=n_cols,
        facet_row_spacing=0.10,
        facet_col_spacing=0.05,
        category_orders={"relay": relay_order},
        nbinsx=x_bins,
        nbinsy=int(max_blobs) + 1,
        range_x=[0, x_max],
        range_y=[-0.5, int(max_blobs) + 0.5],
        color_continuous_scale=PLASMA_TRUNCATED,
        histnorm="percent",
    )
    fig.update_layout(
        margin=dict(l=70, r=30, t=40, b=50),
        height=280 * n_rows,
        coloraxis_colorbar=dict(title="% of<br>blocks"),
    )
    # Clean up facet titles - add block count
    for ann in fig.layout.annotations:
        relay = ann.text.replace("relay=", "")
        count = relay_counts.get(relay, 0)
        ann.update(text=f"{relay}<br>({count:,} blocks)", font_size=9, yshift=8)
    
    # Add axis titles and ensure ticks are visible on all panels
    fig.for_each_xaxis(lambda x: x.update(
        title="Block seen (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[-0.5, int(max_blobs) + 0.5],
    ))
    fig.update_yaxes(title="Blob count", col=1)
    for i in range(2, n_cols + 1):
        fig.update_yaxes(title="", col=i)
    
    fig.show(config={"responsive": True})

Bid timing density¶

Where do most blocks land in the bid timing vs propagation delay space? Contour shows density, white circles highlight outliers (P95+).

Show code

# Density contour with outlier markers
df_timing = df_mev_only.dropna(subset=["winning_bid_ms", "bid_to_block_ms"]).copy()
df_timing = df_timing[(df_timing["bid_to_block_ms"] > 0) & (df_timing["bid_to_block_ms"] < 5000)]

if len(df_timing) > 0:
    fig = go.Figure()
    
    # Density contour base
    contour = px.density_contour(df_timing, x="winning_bid_ms", y="bid_to_block_ms")
    for trace in contour.data:
        trace.update(
            contours_coloring="fill", 
            colorscale="Plasma", 
            showscale=True,
            colorbar=dict(title="Density"),
            line=dict(width=0.5, color="rgba(255,255,255,0.3)"),
        )
        fig.add_trace(trace)
    
    # Outliers (P95+ on either axis)
    q95_x = df_timing["winning_bid_ms"].quantile(0.95)
    q95_y = df_timing["bid_to_block_ms"].quantile(0.95)
    outliers = df_timing[(df_timing["winning_bid_ms"] > q95_x) | (df_timing["bid_to_block_ms"] > q95_y)]
    
    fig.add_trace(go.Scatter(
        x=outliers["winning_bid_ms"],
        y=outliers["bid_to_block_ms"],
        mode="markers",
        marker=dict(
            size=6,
            color="rgba(255,255,255,0.8)",
            line=dict(width=1.5, color="rgba(0,0,0,0.4)"),
        ),
        name=f"Outliers ({len(outliers)})",
        customdata=outliers["slot"],
        hovertemplate="<b>Slot %{customdata}</b><br>Bid: %{x:.0f}ms<br>Propagation: %{y:.0f}ms<extra></extra>",
    ))
    
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid timing (ms from slot start)"),
        yaxis=dict(title="Bid to block delay (ms)"),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.show(config={"responsive": True})

Bid timing by blob count¶

Does the bid timing → propagation delay relationship change with blob count? Each panel shows density for a different blob count range.

Show code

# Faceted heatmap by blob count (same bins as other charts)
df_timing = df_mev_only.dropna(subset=["winning_bid_ms", "bid_to_block_ms"]).copy()
df_timing = df_timing[(df_timing["bid_to_block_ms"] > 0) & (df_timing["bid_to_block_ms"] < 5000)]

if len(df_timing) > 0:
    n_rows = (len(BLOB_BIN_ORDER) + 2) // 3
    
    # Calculate axis ranges
    x_max = df_timing["winning_bid_ms"].quantile(0.99)
    y_max = df_timing["bid_to_block_ms"].quantile(0.99)

    fig = px.density_heatmap(
        df_timing,
        x="winning_bid_ms",
        y="bid_to_block_ms",
        facet_col="blob_bin",
        facet_col_wrap=3,
        facet_row_spacing=0.12,
        facet_col_spacing=0.06,
        category_orders={"blob_bin": BLOB_BIN_ORDER},
        nbinsx=25,
        nbinsy=25,
        range_x=[0, x_max],
        range_y=[0, y_max],
        color_continuous_scale=PLASMA_TRUNCATED,
    )
    fig.update_layout(
        margin=dict(l=80, r=30, t=40, b=50),
        height=320 * n_rows,
        coloraxis_colorbar=dict(title="Count"),
    )
    # Clean up facet titles
    fig.for_each_annotation(lambda a: a.update(
        text=a.text.replace("blob_bin=", "") + " blobs",
        font_size=11,
        yshift=5,
    ))
    # Add x-axis title to all subplots, with smaller tick fonts and explicit ranges
    fig.for_each_xaxis(lambda x: x.update(
        title="Bid timing (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    # Y-axis title only on leftmost column, with explicit ranges
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, y_max],
    ))
    fig.update_yaxes(title="Propagation (ms)", col=1)
    for i in range(2, 4):
        fig.update_yaxes(title="", col=i)
    fig.show(config={"responsive": True})