Use actual plan edge for Entry PM training

This commit is contained in:
Codex
2026-06-28 07:26:59 +08:00
parent 3c0f2d0d91
commit 2a86a6e2fa
6 changed files with 129 additions and 63 deletions
+13 -11
View File
@@ -50,13 +50,16 @@ def _label_summary(root) -> dict[str, Any]:
"future_return_bps_quantile": _quantiles(direction_split["future_return_bps"], (0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99)),
}
if not entry_split.empty:
if "actual_plan_net_edge_bps" not in entry_split.columns:
raise ValueError("entry_labels is missing actual_plan_net_edge_bps for diagnostics")
grouped = entry_split.groupby("side", observed=False)
item["entry"] = {
"rows": len(entry_split),
"target_rate_by_side": grouped["entry_target"].mean().round(6).to_dict(),
"edge_mean_by_side": grouped["expected_net_edge_bps"].mean().round(6).to_dict(),
"edge_column": "actual_plan_net_edge_bps",
"edge_mean_by_side": grouped["actual_plan_net_edge_bps"].mean().round(6).to_dict(),
"edge_quantile_by_side": {
str(side): _quantiles(group["expected_net_edge_bps"], (0.05, 0.5, 0.95))
str(side): _quantiles(group["actual_plan_net_edge_bps"], (0.05, 0.5, 0.95))
for side, group in grouped
},
}
@@ -98,8 +101,8 @@ def _score_distribution(frame: pd.DataFrame) -> dict[str, dict[str, float]]:
"pred_short_expected_net_edge_bps",
"model_pred_long_expected_net_edge_bps",
"model_pred_short_expected_net_edge_bps",
"actual_long_expected_net_edge_bps",
"actual_short_expected_net_edge_bps",
"actual_long_plan_edge_bps",
"actual_short_plan_edge_bps",
]
return {column: _quantiles(frame[column], (0.0, 0.05, 0.5, 0.95, 1.0)) for column in columns if column in frame.columns}
@@ -141,11 +144,10 @@ def _cumulative_gate_counts(steps: dict[str, pd.Series], total_rows: int) -> dic
def _relaxed_variants(frame: pd.DataFrame) -> dict[str, Any]:
variants = {
"no_risk_no_edge": {"prob": 0.54, "entry": 0.50, "margin": 0.02, "risk": 1.0, "edge": -99.0},
"rare_entry_low_prob": {"prob": 0.50, "entry": 0.03, "margin": 0.02, "risk": 0.98, "edge": 0.0},
"entry_only_55": {"prob": 0.0, "entry": 0.55, "margin": -99.0, "risk": 1.0, "edge": -99.0},
"direction_only_54": {"prob": 0.54, "entry": 0.0, "margin": 0.02, "risk": 1.0, "edge": -99.0},
"very_loose": {"prob": 0.50, "entry": 0.45, "margin": 0.0, "risk": 1.0, "edge": -99.0},
"entry_30_positive_edge": {"prob": 0.50, "entry": 0.30, "margin": 0.02, "risk": 0.65, "edge": 3.0},
"entry_50_positive_edge": {"prob": 0.50, "entry": 0.50, "margin": 0.02, "risk": 0.65, "edge": 3.0},
"entry_70_positive_edge": {"prob": 0.50, "entry": 0.70, "margin": 0.02, "risk": 0.65, "edge": 3.0},
"direction_only_control": {"prob": 0.54, "entry": 0.0, "margin": 0.02, "risk": 1.0, "edge": -99.0},
}
result: dict[str, Any] = {}
for name, thresholds in variants.items():
@@ -200,8 +202,8 @@ def _top_bucket_edge(frame: pd.DataFrame) -> dict[str, Any]:
direction_top[str(fraction)] = _plain_trade_metrics(top.rename(columns={"actual_edge_bps": "actual_edge_bps"}))
return {
"direction_top_score": direction_top,
"long_entry_prob_deciles": _decile_edge(frame, "long_entry_prob", "actual_long_expected_net_edge_bps", "long_entry_target"),
"short_entry_prob_deciles": _decile_edge(frame, "short_entry_prob", "actual_short_expected_net_edge_bps", "short_entry_target"),
"long_entry_prob_deciles": _decile_edge(frame, "long_entry_prob", "actual_long_plan_edge_bps", "long_entry_target"),
"short_entry_prob_deciles": _decile_edge(frame, "short_entry_prob", "actual_short_plan_edge_bps", "short_entry_target"),
}