Use actual plan edge for Entry PM training

2026-06-28 07:26:59 +08:00
parent 3c0f2d0d91
commit 2a86a6e2fa
6 changed files with 129 additions and 63 deletions
@@ -50,13 +50,16 @@ def _label_summary(root) -> dict[str, Any]:
                "future_return_bps_quantile": _quantiles(direction_split["future_return_bps"], (0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99)),
            }
        if not entry_split.empty:
+            if "actual_plan_net_edge_bps" not in entry_split.columns:
+                raise ValueError("entry_labels is missing actual_plan_net_edge_bps for diagnostics")
            grouped = entry_split.groupby("side", observed=False)
            item["entry"] = {
                "rows": len(entry_split),
                "target_rate_by_side": grouped["entry_target"].mean().round(6).to_dict(),
-                "edge_mean_by_side": grouped["expected_net_edge_bps"].mean().round(6).to_dict(),
+                "edge_column": "actual_plan_net_edge_bps",
+                "edge_mean_by_side": grouped["actual_plan_net_edge_bps"].mean().round(6).to_dict(),
                "edge_quantile_by_side": {
-                    str(side): _quantiles(group["expected_net_edge_bps"], (0.05, 0.5, 0.95))
+                    str(side): _quantiles(group["actual_plan_net_edge_bps"], (0.05, 0.5, 0.95))
                    for side, group in grouped
                },
            }
@@ -98,8 +101,8 @@ def _score_distribution(frame: pd.DataFrame) -> dict[str, dict[str, float]]:
        "pred_short_expected_net_edge_bps",
        "model_pred_long_expected_net_edge_bps",
        "model_pred_short_expected_net_edge_bps",
-        "actual_long_expected_net_edge_bps",
-        "actual_short_expected_net_edge_bps",
+        "actual_long_plan_edge_bps",
+        "actual_short_plan_edge_bps",
    ]
    return {column: _quantiles(frame[column], (0.0, 0.05, 0.5, 0.95, 1.0)) for column in columns if column in frame.columns}

@@ -141,11 +144,10 @@ def _cumulative_gate_counts(steps: dict[str, pd.Series], total_rows: int) -> dic

 def _relaxed_variants(frame: pd.DataFrame) -> dict[str, Any]:
    variants = {
-        "no_risk_no_edge": {"prob": 0.54, "entry": 0.50, "margin": 0.02, "risk": 1.0, "edge": -99.0},
-        "rare_entry_low_prob": {"prob": 0.50, "entry": 0.03, "margin": 0.02, "risk": 0.98, "edge": 0.0},
-        "entry_only_55": {"prob": 0.0, "entry": 0.55, "margin": -99.0, "risk": 1.0, "edge": -99.0},
-        "direction_only_54": {"prob": 0.54, "entry": 0.0, "margin": 0.02, "risk": 1.0, "edge": -99.0},
-        "very_loose": {"prob": 0.50, "entry": 0.45, "margin": 0.0, "risk": 1.0, "edge": -99.0},
+        "entry_30_positive_edge": {"prob": 0.50, "entry": 0.30, "margin": 0.02, "risk": 0.65, "edge": 3.0},
+        "entry_50_positive_edge": {"prob": 0.50, "entry": 0.50, "margin": 0.02, "risk": 0.65, "edge": 3.0},
+        "entry_70_positive_edge": {"prob": 0.50, "entry": 0.70, "margin": 0.02, "risk": 0.65, "edge": 3.0},
+        "direction_only_control": {"prob": 0.54, "entry": 0.0, "margin": 0.02, "risk": 1.0, "edge": -99.0},
    }
    result: dict[str, Any] = {}
    for name, thresholds in variants.items():
@@ -200,8 +202,8 @@ def _top_bucket_edge(frame: pd.DataFrame) -> dict[str, Any]:
        direction_top[str(fraction)] = _plain_trade_metrics(top.rename(columns={"actual_edge_bps": "actual_edge_bps"}))
    return {
        "direction_top_score": direction_top,
-        "long_entry_prob_deciles": _decile_edge(frame, "long_entry_prob", "actual_long_expected_net_edge_bps", "long_entry_target"),
-        "short_entry_prob_deciles": _decile_edge(frame, "short_entry_prob", "actual_short_expected_net_edge_bps", "short_entry_target"),
+        "long_entry_prob_deciles": _decile_edge(frame, "long_entry_prob", "actual_long_plan_edge_bps", "long_entry_target"),
+        "short_entry_prob_deciles": _decile_edge(frame, "short_entry_prob", "actual_short_plan_edge_bps", "short_entry_target"),
    }