Improve Trader V4 training pipeline

Align entry labels with max future edge, tune direction labeling, and harden regression evaluation. Add training diagnostics, price-plan search, feature screening, and nonlinear benchmark scripts.
2026-06-27 19:57:29 +08:00
parent e58e4a5572
commit 9acb3460a1
27 changed files with 2059 additions and 341 deletions
@@ -228,12 +228,12 @@ def _pm_frame(root, split_id: str) -> pd.DataFrame:

 def _threshold_candidates() -> list[dict[str, float]]:
    values = itertools.product(
-        [0.54, 0.56, 0.58, 0.60],
-        [0.54, 0.56, 0.58, 0.60],
-        [0.50, 0.52, 0.55, 0.58],
-        [0.35, 0.45, 0.55],
-        [1.0, 2.0, 3.0, 5.0],
-        [0.02, 0.03, 0.05],
+        [0.50, 0.52, 0.54, 0.56, 0.58],
+        [0.50, 0.52, 0.54, 0.56, 0.58],
+        [0.10, 0.12, 0.14, 0.16, 0.20, 0.30, 0.50],
+        [0.55, 0.75, 0.90, 1.00],
+        [-8.0, -4.0, 0.0, 1.0, 3.0],
+        [0.00, 0.01, 0.02, 0.05],
    )
    return [
        {
@@ -398,12 +398,18 @@ def _backtest_status(metrics: dict[str, dict[str, Any]]) -> tuple[str, list[str]
 def _score_thresholds(metrics: dict[str, Any]) -> float:
    if metrics["trade_count"] == 0:
        return -1_000_000.0
-    low_sample_penalty = max(0, 20 - int(metrics["trade_count"])) * 5.0
+    # 最终上线门槛要求 validation_locked 至少 80 笔；调参区如果只挑几十笔，
+    # 很容易是运气好，不是稳定规则，所以这里提前惩罚小样本阈值。
+    low_sample_penalty = max(0, 120 - int(metrics["trade_count"])) * 1.5
+    profit_factor_penalty = max(0.0, 1.15 - float(metrics["profit_factor"])) * 20.0
+    negative_edge_penalty = max(0.0, -float(metrics["avg_weighted_edge_bps"])) * 40.0
    return (
        metrics["avg_weighted_edge_bps"] * np.sqrt(metrics["trade_count"])
        + metrics["total_weighted_edge_bps"] * 0.05
        - metrics["max_drawdown_bps"] * 0.25
        - low_sample_penalty
+        - profit_factor_penalty
+        - negative_edge_penalty
    )