Fri, Dec 5, 2025

Propagation anomalies - 2025-12-05

Detection of blocks that propagated slower than expected given their blob count.

Show code
display_sql("block_production_timeline", target_date)
View query
WITH
-- Base slots using proposer duty as the source of truth
slots AS (
    SELECT DISTINCT
        slot,
        slot_start_date_time,
        proposer_validator_index
    FROM canonical_beacon_proposer_duty
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT
        index,
        entity
    FROM ethseer_validator_entity
    WHERE meta_network_name = 'mainnet'
),

-- Blob count per slot
blob_count AS (
    SELECT
        slot,
        uniq(blob_index) AS blob_count
    FROM canonical_beacon_blob_sidecar
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Canonical block hash (to verify MEV payload was actually used)
canonical_block AS (
    SELECT
        slot,
        execution_payload_block_hash
    FROM canonical_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
),

-- MEV bid timing using timestamp_ms
mev_bids AS (
    SELECT
        slot,
        slot_start_date_time,
        min(timestamp_ms) AS first_bid_timestamp_ms,
        max(timestamp_ms) AS last_bid_timestamp_ms
    FROM mev_relay_bid_trace
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
    GROUP BY slot, slot_start_date_time
),

-- MEV payload delivery - join canonical block with delivered payloads
-- Note: Use is_mev flag because ClickHouse LEFT JOIN returns 0 (not NULL) for non-matching rows
-- Get value from proposer_payload_delivered (not bid_trace, which may not have the winning block)
mev_payload AS (
    SELECT
        cb.slot,
        cb.execution_payload_block_hash AS winning_block_hash,
        1 AS is_mev,
        max(pd.value) AS winning_bid_value,
        groupArray(DISTINCT pd.relay_name) AS relay_names,
        any(pd.builder_pubkey) AS winning_builder
    FROM canonical_block cb
    GLOBAL INNER JOIN mev_relay_proposer_payload_delivered pd
        ON cb.slot = pd.slot AND cb.execution_payload_block_hash = pd.block_hash
    WHERE pd.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
    GROUP BY cb.slot, cb.execution_payload_block_hash
),

-- Winning bid timing from bid_trace (may not exist for all MEV blocks)
winning_bid AS (
    SELECT
        bt.slot,
        bt.slot_start_date_time,
        argMin(bt.timestamp_ms, bt.event_date_time) AS winning_bid_timestamp_ms
    FROM mev_relay_bid_trace bt
    GLOBAL INNER JOIN mev_payload mp ON bt.slot = mp.slot AND bt.block_hash = mp.winning_block_hash
    WHERE bt.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
    GROUP BY bt.slot, bt.slot_start_date_time
),

-- Block gossip timing with spread
block_gossip AS (
    SELECT
        slot,
        min(event_date_time) AS block_first_seen,
        max(event_date_time) AS block_last_seen
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Column arrival timing: first arrival per column, then min/max of those
column_gossip AS (
    SELECT
        slot,
        min(first_seen) AS first_column_first_seen,
        max(first_seen) AS last_column_first_seen
    FROM (
        SELECT
            slot,
            column_index,
            min(event_date_time) AS first_seen
        FROM libp2p_gossipsub_data_column_sidecar
        WHERE meta_network_name = 'mainnet'
          AND slot_start_date_time >= '2025-12-05' AND slot_start_date_time < '2025-12-05'::date + INTERVAL 1 DAY
          AND event_date_time > '1970-01-01 00:00:01'
        GROUP BY slot, column_index
    )
    GROUP BY slot
)

SELECT
    s.slot AS slot,
    s.slot_start_date_time AS slot_start_date_time,
    pe.entity AS proposer_entity,

    -- Blob count
    coalesce(bc.blob_count, 0) AS blob_count,

    -- MEV bid timing (absolute and relative to slot start)
    fromUnixTimestamp64Milli(mb.first_bid_timestamp_ms) AS first_bid_at,
    mb.first_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS first_bid_ms,
    fromUnixTimestamp64Milli(mb.last_bid_timestamp_ms) AS last_bid_at,
    mb.last_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS last_bid_ms,

    -- Winning bid timing (from bid_trace, may be NULL if block hash not in bid_trace)
    if(wb.slot != 0, fromUnixTimestamp64Milli(wb.winning_bid_timestamp_ms), NULL) AS winning_bid_at,
    if(wb.slot != 0, wb.winning_bid_timestamp_ms - toInt64(toUnixTimestamp(s.slot_start_date_time)) * 1000, NULL) AS winning_bid_ms,

    -- MEV payload info (from proposer_payload_delivered, always present for MEV blocks)
    if(mp.is_mev = 1, mp.winning_bid_value, NULL) AS winning_bid_value,
    if(mp.is_mev = 1, mp.relay_names, []) AS winning_relays,
    if(mp.is_mev = 1, mp.winning_builder, NULL) AS winning_builder,

    -- Block gossip timing with spread
    bg.block_first_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_first_seen) AS block_first_seen_ms,
    bg.block_last_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_last_seen) AS block_last_seen_ms,
    dateDiff('millisecond', bg.block_first_seen, bg.block_last_seen) AS block_spread_ms,

    -- Column arrival timing (NULL when no blobs)
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.first_column_first_seen) AS first_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.first_column_first_seen)) AS first_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.last_column_first_seen) AS last_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.last_column_first_seen)) AS last_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', cg.first_column_first_seen, cg.last_column_first_seen)) AS column_spread_ms

FROM slots s
GLOBAL LEFT JOIN proposer_entity pe ON s.proposer_validator_index = pe.index
GLOBAL LEFT JOIN blob_count bc ON s.slot = bc.slot
GLOBAL LEFT JOIN mev_bids mb ON s.slot = mb.slot
GLOBAL LEFT JOIN mev_payload mp ON s.slot = mp.slot
GLOBAL LEFT JOIN winning_bid wb ON s.slot = wb.slot
GLOBAL LEFT JOIN block_gossip bg ON s.slot = bg.slot
GLOBAL LEFT JOIN column_gossip cg ON s.slot = cg.slot

ORDER BY s.slot DESC
Show code
df = load_parquet("block_production_timeline", target_date)

# Filter to valid blocks (exclude missed slots)
df = df[df["block_first_seen_ms"].notna()]
df = df[(df["block_first_seen_ms"] >= 0) & (df["block_first_seen_ms"] < 60000)]

# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})

# Get max blob count for charts
max_blobs = df["blob_count"].max()

print(f"Total valid blocks: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")
Total valid blocks: 7,117
MEV blocks: 6,382 (89.7%)
Local blocks: 735 (10.3%)

Anomaly detection method

Blocks that are slow relative to their blob count are more interesting than blocks that are simply slow. A 500ms block with 15 blobs may be normal; with 0 blobs it's anomalous.

The method:

  1. Fit linear regression: block_first_seen_ms ~ blob_count
  2. Calculate residuals (actual - expected)
  3. Flag blocks with residuals > 2σ as anomalies

Points above the ±2σ band propagated slower than expected given their blob count.

Show code
# Conditional outliers: blocks slow relative to their blob count
df_anomaly = df.copy()

# Fit regression: block_first_seen_ms ~ blob_count
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df_anomaly["blob_count"].astype(float), df_anomaly["block_first_seen_ms"]
)

# Calculate expected value and residual
df_anomaly["expected_ms"] = intercept + slope * df_anomaly["blob_count"].astype(float)
df_anomaly["residual_ms"] = df_anomaly["block_first_seen_ms"] - df_anomaly["expected_ms"]

# Calculate residual standard deviation
residual_std = df_anomaly["residual_ms"].std()

# Flag anomalies: residual > 2σ (unexpectedly slow)
df_anomaly["is_anomaly"] = df_anomaly["residual_ms"] > 2 * residual_std

n_anomalies = df_anomaly["is_anomaly"].sum()
pct_anomalies = n_anomalies / len(df_anomaly) * 100

# Prepare outliers dataframe
df_outliers = df_anomaly[df_anomaly["is_anomaly"]].copy()
df_outliers["relay"] = df_outliers["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Local")

print(f"Regression: block_ms = {intercept:.1f} + {slope:.2f} × blob_count (R² = {r_value**2:.3f})")
print(f"Residual σ = {residual_std:.1f}ms")
print(f"Anomalies (>2σ slow): {n_anomalies:,} ({pct_anomalies:.1f}%)")
Regression: block_ms = 1756.9 + 13.65 × blob_count (R² = 0.003)
Residual σ = 671.0ms
Anomalies (>2σ slow): 130 (1.8%)
Show code
# Create scatter plot with regression band
x_range = np.array([0, int(max_blobs)])
y_pred = intercept + slope * x_range
y_upper = y_pred + 2 * residual_std
y_lower = y_pred - 2 * residual_std

fig = go.Figure()

# Add ±2σ band
fig.add_trace(go.Scatter(
    x=np.concatenate([x_range, x_range[::-1]]),
    y=np.concatenate([y_upper, y_lower[::-1]]),
    fill="toself",
    fillcolor="rgba(100,100,100,0.2)",
    line=dict(width=0),
    name="±2σ band",
    hoverinfo="skip",
))

# Add regression line
fig.add_trace(go.Scatter(
    x=x_range,
    y=y_pred,
    mode="lines",
    line=dict(color="white", width=2, dash="dash"),
    name="Expected",
))

# Normal points (sample to avoid overplotting)
df_normal = df_anomaly[~df_anomaly["is_anomaly"]]
if len(df_normal) > 2000:
    df_normal = df_normal.sample(2000, random_state=42)

fig.add_trace(go.Scatter(
    x=df_normal["blob_count"],
    y=df_normal["block_first_seen_ms"],
    mode="markers",
    marker=dict(size=4, color="rgba(100,150,200,0.4)"),
    name=f"Normal ({len(df_anomaly) - n_anomalies:,})",
    hoverinfo="skip",
))

# Anomaly points
fig.add_trace(go.Scatter(
    x=df_outliers["blob_count"],
    y=df_outliers["block_first_seen_ms"],
    mode="markers",
    marker=dict(
        size=7,
        color="#e74c3c",
        line=dict(width=1, color="white"),
    ),
    name=f"Anomalies ({n_anomalies:,})",
    customdata=np.column_stack([
        df_outliers["slot"],
        df_outliers["residual_ms"].round(0),
        df_outliers["relay"],
    ]),
    hovertemplate="<b>Slot %{customdata[0]}</b><br>Blobs: %{x}<br>Actual: %{y:.0f}ms<br>+%{customdata[1]}ms vs expected<br>Relay: %{customdata[2]}<extra></extra>",
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Blob count", range=[-0.5, int(max_blobs) + 0.5]),
    yaxis=dict(title="Block first seen (ms from slot start)"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

All propagation anomalies

Blocks that propagated much slower than expected given their blob count, sorted by residual (worst first).

Show code
# All anomalies table with selectable text and Lab links
if n_anomalies > 0:
    df_table = df_outliers.sort_values("residual_ms", ascending=False)[
        ["slot", "blob_count", "block_first_seen_ms", "expected_ms", "residual_ms", "relay"]
    ].copy()
    df_table["block_first_seen_ms"] = df_table["block_first_seen_ms"].round(0).astype(int)
    df_table["expected_ms"] = df_table["expected_ms"].round(0).astype(int)
    df_table["residual_ms"] = df_table["residual_ms"].round(0).astype(int)
    
    # Create Lab links
    df_table["lab_link"] = df_table["slot"].apply(
        lambda s: f'<a href="https://lab.ethpandaops.io/ethereum/slots/{s}" target="_blank">View</a>'
    )
    
    # Build HTML table
    html = '''
    <style>
    .anomaly-table { border-collapse: collapse; width: 100%; font-family: monospace; font-size: 13px; }
    .anomaly-table th { background: #2c3e50; color: white; padding: 8px 12px; text-align: left; position: sticky; top: 0; }
    .anomaly-table td { padding: 6px 12px; border-bottom: 1px solid #eee; }
    .anomaly-table tr:hover { background: #f5f5f5; }
    .anomaly-table .num { text-align: right; }
    .anomaly-table .delta { background: #ffebee; color: #c62828; font-weight: bold; }
    .anomaly-table a { color: #1976d2; text-decoration: none; }
    .anomaly-table a:hover { text-decoration: underline; }
    .table-container { max-height: 600px; overflow-y: auto; }
    </style>
    <div class="table-container">
    <table class="anomaly-table">
    <thead>
    <tr><th>Slot</th><th class="num">Blobs</th><th class="num">Actual (ms)</th><th class="num">Expected (ms)</th><th class="num">Δ (ms)</th><th>Relay</th><th>Lab</th></tr>
    </thead>
    <tbody>
    '''
    
    for _, row in df_table.iterrows():
        html += f'''<tr>
            <td>{row["slot"]}</td>
            <td class="num">{row["blob_count"]}</td>
            <td class="num">{row["block_first_seen_ms"]}</td>
            <td class="num">{row["expected_ms"]}</td>
            <td class="num delta">+{row["residual_ms"]}</td>
            <td>{row["relay"]}</td>
            <td>{row["lab_link"]}</td>
        </tr>'''
    
    html += '</tbody></table></div>'
    display(HTML(html))
    print(f"\nTotal anomalies: {len(df_table):,}")
else:
    print("No anomalies detected.")
SlotBlobsActual (ms)Expected (ms)Δ (ms)RelayLab
13178944 0 22378 1757 +20621 Local View
13178276 0 9740 1757 +7983 Local View
13173985 0 8992 1757 +7235 Local View
13177590 0 8891 1757 +7134 Local View
13174913 0 8084 1757 +6327 Local View
13173729 0 4794 1757 +3037 Local View
13172912 0 4787 1757 +3030 Local View
13179522 0 4645 1757 +2888 Local View
13178482 4 4585 1811 +2774 Local View
13179122 9 4309 1880 +2429 Ultra Sound View
13172448 0 4164 1757 +2407 Local View
13177930 0 4100 1757 +2343 Local View
13174586 0 4040 1757 +2283 Local View
13173927 0 4016 1757 +2259 Local View
13174496 0 3877 1757 +2120 Local View
13177536 9 3949 1880 +2069 BloXroute Regulated View
13178720 7 3893 1852 +2041 Ultra Sound View
13178564 4 3835 1811 +2024 Aestus View
13178648 4 3743 1811 +1932 Titan Relay View
13172832 0 3636 1757 +1879 Local View
13174645 6 3680 1839 +1841 EthGas View
13176499 5 3557 1825 +1732 Titan Relay View
13178912 8 3583 1866 +1717 BloXroute Regulated View
13177088 3 3498 1798 +1700 Ultra Sound View
13173024 8 3558 1866 +1692 BloXroute Max Profit View
13175648 1 3459 1771 +1688 Local View
13174228 6 3527 1839 +1688 EthGas View
13178399 3 3476 1798 +1678 BloXroute Max Profit View
13178736 4 3475 1811 +1664 Titan Relay View
13174034 6 3490 1839 +1651 BloXroute Regulated View
13175817 8 3505 1866 +1639 Ultra Sound View
13177115 0 3374 1757 +1617 Local View
13179456 0 3352 1757 +1595 BloXroute Max Profit View
13177444 9 3469 1880 +1589 BloXroute Regulated View
13175913 8 3444 1866 +1578 Ultra Sound View
13179253 6 3397 1839 +1558 Local View
13173991 2 3339 1784 +1555 Titan Relay View
13178400 6 3389 1839 +1550 BloXroute Max Profit View
13176610 9 3429 1880 +1549 Titan Relay View
13176640 9 3429 1880 +1549 Ultra Sound View
13175933 3 3345 1798 +1547 Local View
13175117 0 3297 1757 +1540 BloXroute Regulated View
13179296 0 3283 1757 +1526 Ultra Sound View
13174176 8 3383 1866 +1517 Ultra Sound View
13173500 6 3349 1839 +1510 BloXroute Max Profit View
13179407 6 3343 1839 +1504 Titan Relay View
13176340 6 3341 1839 +1502 Ultra Sound View
13179189 3 3295 1798 +1497 Aestus View
13179041 6 3335 1839 +1496 Titan Relay View
13177504 9 3371 1880 +1491 Ultra Sound View
13176249 6 3324 1839 +1485 Titan Relay View
13173862 7 3336 1852 +1484 BloXroute Regulated View
13174126 3 3277 1798 +1479 BloXroute Regulated View
13174336 3 3276 1798 +1478 BloXroute Max Profit View
13174765 4 3287 1811 +1476 BloXroute Regulated View
13174566 6 3313 1839 +1474 BloXroute Regulated View
13176729 6 3309 1839 +1470 Titan Relay View
13178048 6 3305 1839 +1466 Titan Relay View
13172682 9 3345 1880 +1465 Titan Relay View
13175198 0 3222 1757 +1465 BloXroute Max Profit View
13172897 9 3343 1880 +1463 Titan Relay View
13174546 6 3301 1839 +1462 BloXroute Max Profit View
13178844 7 3313 1852 +1461 Titan Relay View
13176124 8 3326 1866 +1460 Agnostic Gnosis View
13178861 0 3216 1757 +1459 Ultra Sound View
13178251 6 3294 1839 +1455 BloXroute Regulated View
13176006 6 3290 1839 +1451 BloXroute Regulated View
13178039 3 3244 1798 +1446 Flashbots View
13174299 2 3230 1784 +1446 Flashbots View
13173937 8 3310 1866 +1444 BloXroute Regulated View
13172792 6 3282 1839 +1443 Flashbots View
13178748 4 3253 1811 +1442 Ultra Sound View
13178195 4 3252 1811 +1441 BloXroute Regulated View
13175975 7 3291 1852 +1439 Titan Relay View
13179184 4 3247 1811 +1436 Ultra Sound View
13173966 3 3227 1798 +1429 BloXroute Regulated View
13176287 5 3254 1825 +1429 Ultra Sound View
13172608 3 3224 1798 +1426 Ultra Sound View
13177915 6 3264 1839 +1425 Titan Relay View
13175006 7 3275 1852 +1423 BloXroute Regulated View
13172667 0 3179 1757 +1422 Ultra Sound View
13176007 3 3214 1798 +1416 Ultra Sound View
13175615 6 3251 1839 +1412 BloXroute Max Profit View
13178189 7 3260 1852 +1408 BloXroute Regulated View
13173060 9 3285 1880 +1405 Agnostic Gnosis View
13179524 9 3284 1880 +1404 Titan Relay View
13177468 9 3284 1880 +1404 Titan Relay View
13173293 7 3253 1852 +1401 Flashbots View
13176753 6 3239 1839 +1400 Ultra Sound View
13172486 9 3278 1880 +1398 BloXroute Max Profit View
13176266 4 3205 1811 +1394 Aestus View
13174702 7 3245 1852 +1393 Ultra Sound View
13177657 9 3272 1880 +1392 BloXroute Regulated View
13174743 4 3201 1811 +1390 BloXroute Regulated View
13179342 6 3222 1839 +1383 BloXroute Regulated View
13177720 0 3140 1757 +1383 Flashbots View
13175655 9 3262 1880 +1382 Ultra Sound View
13178903 4 3193 1811 +1382 Aestus View
13173284 3 3176 1798 +1378 Titan Relay View
13176372 6 3214 1839 +1375 Ultra Sound View
13176627 3 3172 1798 +1374 BloXroute Max Profit View
13177306 8 3239 1866 +1373 Ultra Sound View
13176915 2 3156 1784 +1372 Ultra Sound View
13178356 9 3251 1880 +1371 BloXroute Max Profit View
13174357 8 3237 1866 +1371 EthGas View
13175429 3 3165 1798 +1367 BloXroute Max Profit View
13178284 9 3246 1880 +1366 BloXroute Regulated View
13175649 4 3177 1811 +1366 Ultra Sound View
13173078 9 3245 1880 +1365 BloXroute Max Profit View
13172424 4 3174 1811 +1363 Titan Relay View
13178198 6 3201 1839 +1362 Titan Relay View
13175439 3 3160 1798 +1362 Ultra Sound View
13174003 3 3160 1798 +1362 Ultra Sound View
13174021 8 3227 1866 +1361 BloXroute Regulated View
13177814 4 3171 1811 +1360 BloXroute Regulated View
13176842 9 3239 1880 +1359 Agnostic Gnosis View
13172663 0 3116 1757 +1359 BloXroute Regulated View
13177148 7 3211 1852 +1359 Flashbots View
13175521 3 3156 1798 +1358 Ultra Sound View
13177027 0 3111 1757 +1354 Agnostic Gnosis View
13175748 4 3165 1811 +1354 BloXroute Max Profit View
13176622 9 3233 1880 +1353 BloXroute Max Profit View
13176807 9 3232 1880 +1352 Ultra Sound View
13178185 9 3226 1880 +1346 Titan Relay View
13175937 5 3171 1825 +1346 Ultra Sound View
13174908 7 3198 1852 +1346 Titan Relay View
13177739 0 3101 1757 +1344 Aestus View
13175305 0 3100 1757 +1343 Ultra Sound View
13176330 9 3222 1880 +1342 Ultra Sound View
13179278 9 3222 1880 +1342 BloXroute Max Profit View
Total anomalies: 130

Anomalies by relay

Which relays have the most propagation anomalies?

Show code
if n_anomalies > 0:
    # Count anomalies by relay
    relay_counts = df_outliers["relay"].value_counts().reset_index()
    relay_counts.columns = ["relay", "anomaly_count"]
    
    # Get total blocks per relay for context
    df_anomaly["relay"] = df_anomaly["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Local")
    total_by_relay = df_anomaly.groupby("relay").size().reset_index(name="total_blocks")
    
    relay_counts = relay_counts.merge(total_by_relay, on="relay")
    relay_counts["anomaly_rate"] = relay_counts["anomaly_count"] / relay_counts["total_blocks"] * 100
    relay_counts = relay_counts.sort_values("anomaly_count", ascending=True)
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        y=relay_counts["relay"],
        x=relay_counts["anomaly_count"],
        orientation="h",
        marker_color="#e74c3c",
        text=relay_counts.apply(lambda r: f"{r['anomaly_count']} ({r['anomaly_rate']:.1f}%)", axis=1),
        textposition="outside",
        hovertemplate="<b>%{y}</b><br>Anomalies: %{x}<br>Total blocks: %{customdata[0]:,}<br>Rate: %{customdata[1]:.1f}%<extra></extra>",
        customdata=np.column_stack([relay_counts["total_blocks"], relay_counts["anomaly_rate"]]),
    ))
    
    fig.update_layout(
        margin=dict(l=150, r=80, t=30, b=60),
        xaxis=dict(title="Number of anomalies"),
        yaxis=dict(title=""),
        height=350,
    )
    fig.show(config={"responsive": True})

Anomalies by blob count

Are anomalies more common at certain blob counts?

Show code
if n_anomalies > 0:
    # Count anomalies by blob count
    blob_anomalies = df_outliers.groupby("blob_count").size().reset_index(name="anomaly_count")
    blob_total = df_anomaly.groupby("blob_count").size().reset_index(name="total_blocks")
    
    blob_stats = blob_total.merge(blob_anomalies, on="blob_count", how="left").fillna(0)
    blob_stats["anomaly_count"] = blob_stats["anomaly_count"].astype(int)
    blob_stats["anomaly_rate"] = blob_stats["anomaly_count"] / blob_stats["total_blocks"] * 100
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=blob_stats["blob_count"],
        y=blob_stats["anomaly_count"],
        marker_color="#e74c3c",
        hovertemplate="<b>%{x} blobs</b><br>Anomalies: %{y}<br>Total: %{customdata[0]:,}<br>Rate: %{customdata[1]:.1f}%<extra></extra>",
        customdata=np.column_stack([blob_stats["total_blocks"], blob_stats["anomaly_rate"]]),
    ))
    
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Blob count", dtick=1),
        yaxis=dict(title="Number of anomalies"),
        height=350,
    )
    fig.show(config={"responsive": True})