Propagation anomalies
Detection of blocks that propagated slower than expected given their blob count.
Show code
display_sql("block_production_timeline", target_date)
View query
Show code
df = load_parquet("block_production_timeline", target_date)
# Filter to valid blocks (exclude missed slots)
df = df[df["block_first_seen_ms"].notna()]
df = df[(df["block_first_seen_ms"] >= 0) & (df["block_first_seen_ms"] < 60000)]
# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})
# Get max blob count for charts
max_blobs = df["blob_count"].max()
print(f"Total valid blocks: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")
Anomaly detection method¶
Blocks that are slow relative to their blob count are more interesting than blocks that are simply slow. A 500ms block with 15 blobs may be normal; with 0 blobs it's anomalous.
The method:
- Fit linear regression:
block_first_seen_ms ~ blob_count - Calculate residuals (actual - expected)
- Flag blocks with residuals > 2σ as anomalies
Points above the ±2σ band propagated slower than expected given their blob count.
Show code
# Conditional outliers: blocks slow relative to their blob count
df_anomaly = df.copy()
# Fit regression: block_first_seen_ms ~ blob_count
slope, intercept, r_value, p_value, std_err = stats.linregress(
df_anomaly["blob_count"].astype(float), df_anomaly["block_first_seen_ms"]
)
# Calculate expected value and residual
df_anomaly["expected_ms"] = intercept + slope * df_anomaly["blob_count"].astype(float)
df_anomaly["residual_ms"] = df_anomaly["block_first_seen_ms"] - df_anomaly["expected_ms"]
# Calculate residual standard deviation
residual_std = df_anomaly["residual_ms"].std()
# Flag anomalies: residual > 2σ (unexpectedly slow)
df_anomaly["is_anomaly"] = df_anomaly["residual_ms"] > 2 * residual_std
n_anomalies = df_anomaly["is_anomaly"].sum()
pct_anomalies = n_anomalies / len(df_anomaly) * 100
# Prepare outliers dataframe
df_outliers = df_anomaly[df_anomaly["is_anomaly"]].copy()
df_outliers["relay"] = df_outliers["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Local")
print(f"Regression: block_ms = {intercept:.1f} + {slope:.2f} × blob_count (R² = {r_value**2:.3f})")
print(f"Residual σ = {residual_std:.1f}ms")
print(f"Anomalies (>2σ slow): {n_anomalies:,} ({pct_anomalies:.1f}%)")
Show code
# Create scatter plot with regression band
x_range = np.array([0, int(max_blobs)])
y_pred = intercept + slope * x_range
y_upper = y_pred + 2 * residual_std
y_lower = y_pred - 2 * residual_std
fig = go.Figure()
# Add ±2σ band
fig.add_trace(go.Scatter(
x=np.concatenate([x_range, x_range[::-1]]),
y=np.concatenate([y_upper, y_lower[::-1]]),
fill="toself",
fillcolor="rgba(100,100,100,0.2)",
line=dict(width=0),
name="±2σ band",
hoverinfo="skip",
))
# Add regression line
fig.add_trace(go.Scatter(
x=x_range,
y=y_pred,
mode="lines",
line=dict(color="white", width=2, dash="dash"),
name="Expected",
))
# Normal points (sample to avoid overplotting)
df_normal = df_anomaly[~df_anomaly["is_anomaly"]]
if len(df_normal) > 2000:
df_normal = df_normal.sample(2000, random_state=42)
fig.add_trace(go.Scatter(
x=df_normal["blob_count"],
y=df_normal["block_first_seen_ms"],
mode="markers",
marker=dict(size=4, color="rgba(100,150,200,0.4)"),
name=f"Normal ({len(df_anomaly) - n_anomalies:,})",
hoverinfo="skip",
))
# Anomaly points
fig.add_trace(go.Scatter(
x=df_outliers["blob_count"],
y=df_outliers["block_first_seen_ms"],
mode="markers",
marker=dict(
size=7,
color="#e74c3c",
line=dict(width=1, color="white"),
),
name=f"Anomalies ({n_anomalies:,})",
customdata=np.column_stack([
df_outliers["slot"],
df_outliers["residual_ms"].round(0),
df_outliers["relay"],
]),
hovertemplate="<b>Slot %{customdata[0]}</b><br>Blobs: %{x}<br>Actual: %{y:.0f}ms<br>+%{customdata[1]}ms vs expected<br>Relay: %{customdata[2]}<extra></extra>",
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Blob count", range=[-0.5, int(max_blobs) + 0.5]),
yaxis=dict(title="Block first seen (ms from slot start)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
All propagation anomalies¶
Blocks that propagated much slower than expected given their blob count, sorted by residual (worst first).
Show code
# All anomalies table with selectable text and Lab links
if n_anomalies > 0:
df_table = df_outliers.sort_values("residual_ms", ascending=False)[
["slot", "blob_count", "block_first_seen_ms", "expected_ms", "residual_ms", "relay"]
].copy()
df_table["block_first_seen_ms"] = df_table["block_first_seen_ms"].round(0).astype(int)
df_table["expected_ms"] = df_table["expected_ms"].round(0).astype(int)
df_table["residual_ms"] = df_table["residual_ms"].round(0).astype(int)
# Create Lab links
df_table["lab_link"] = df_table["slot"].apply(
lambda s: f'<a href="https://lab.ethpandaops.io/ethereum/slots/{s}" target="_blank">View</a>'
)
# Build HTML table
html = '''
<style>
.anomaly-table { border-collapse: collapse; width: 100%; font-family: monospace; font-size: 13px; }
.anomaly-table th { background: #2c3e50; color: white; padding: 8px 12px; text-align: left; position: sticky; top: 0; }
.anomaly-table td { padding: 6px 12px; border-bottom: 1px solid #eee; }
.anomaly-table tr:hover { background: #f5f5f5; }
.anomaly-table .num { text-align: right; }
.anomaly-table .delta { background: #ffebee; color: #c62828; font-weight: bold; }
.anomaly-table a { color: #1976d2; text-decoration: none; }
.anomaly-table a:hover { text-decoration: underline; }
.table-container { max-height: 600px; overflow-y: auto; }
</style>
<div class="table-container">
<table class="anomaly-table">
<thead>
<tr><th>Slot</th><th class="num">Blobs</th><th class="num">Actual (ms)</th><th class="num">Expected (ms)</th><th class="num">Δ (ms)</th><th>Relay</th><th>Lab</th></tr>
</thead>
<tbody>
'''
for _, row in df_table.iterrows():
html += f'''<tr>
<td>{row["slot"]}</td>
<td class="num">{row["blob_count"]}</td>
<td class="num">{row["block_first_seen_ms"]}</td>
<td class="num">{row["expected_ms"]}</td>
<td class="num delta">+{row["residual_ms"]}</td>
<td>{row["relay"]}</td>
<td>{row["lab_link"]}</td>
</tr>'''
html += '</tbody></table></div>'
display(HTML(html))
print(f"\nTotal anomalies: {len(df_table):,}")
else:
print("No anomalies detected.")
Anomalies by relay¶
Which relays have the most propagation anomalies?
Show code
if n_anomalies > 0:
# Count anomalies by relay
relay_counts = df_outliers["relay"].value_counts().reset_index()
relay_counts.columns = ["relay", "anomaly_count"]
# Get total blocks per relay for context
df_anomaly["relay"] = df_anomaly["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Local")
total_by_relay = df_anomaly.groupby("relay").size().reset_index(name="total_blocks")
relay_counts = relay_counts.merge(total_by_relay, on="relay")
relay_counts["anomaly_rate"] = relay_counts["anomaly_count"] / relay_counts["total_blocks"] * 100
relay_counts = relay_counts.sort_values("anomaly_count", ascending=True)
fig = go.Figure()
fig.add_trace(go.Bar(
y=relay_counts["relay"],
x=relay_counts["anomaly_count"],
orientation="h",
marker_color="#e74c3c",
text=relay_counts.apply(lambda r: f"{r['anomaly_count']} ({r['anomaly_rate']:.1f}%)", axis=1),
textposition="outside",
hovertemplate="<b>%{y}</b><br>Anomalies: %{x}<br>Total blocks: %{customdata[0]:,}<br>Rate: %{customdata[1]:.1f}%<extra></extra>",
customdata=np.column_stack([relay_counts["total_blocks"], relay_counts["anomaly_rate"]]),
))
fig.update_layout(
margin=dict(l=150, r=80, t=30, b=60),
xaxis=dict(title="Number of anomalies"),
yaxis=dict(title=""),
height=350,
)
fig.show(config={"responsive": True})
Anomalies by blob count¶
Are anomalies more common at certain blob counts?
Show code
if n_anomalies > 0:
# Count anomalies by blob count
blob_anomalies = df_outliers.groupby("blob_count").size().reset_index(name="anomaly_count")
blob_total = df_anomaly.groupby("blob_count").size().reset_index(name="total_blocks")
blob_stats = blob_total.merge(blob_anomalies, on="blob_count", how="left").fillna(0)
blob_stats["anomaly_count"] = blob_stats["anomaly_count"].astype(int)
blob_stats["anomaly_rate"] = blob_stats["anomaly_count"] / blob_stats["total_blocks"] * 100
fig = go.Figure()
fig.add_trace(go.Bar(
x=blob_stats["blob_count"],
y=blob_stats["anomaly_count"],
marker_color="#e74c3c",
hovertemplate="<b>%{x} blobs</b><br>Anomalies: %{y}<br>Total: %{customdata[0]:,}<br>Rate: %{customdata[1]:.1f}%<extra></extra>",
customdata=np.column_stack([blob_stats["total_blocks"], blob_stats["anomaly_rate"]]),
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Blob count", dtick=1),
yaxis=dict(title="Number of anomalies"),
height=350,
)
fig.show(config={"responsive": True})