From dc4d00a373ced3a09ff604fcbf604a2f76977a96 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 28 Jun 2026 07:29:17 +0800 Subject: [PATCH] Require actual plan edge in Entry screening --- training/tests/test_training_contract.py | 6 ++++++ training/trader_training/entry_feature_screen.py | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/training/tests/test_training_contract.py b/training/tests/test_training_contract.py index 290dfab..0a9b21c 100644 --- a/training/tests/test_training_contract.py +++ b/training/tests/test_training_contract.py @@ -50,6 +50,12 @@ class TrainingContractTest(unittest.TestCase): self.assertEqual("long_actual_plan_net_edge_bps", _screen_edge_column(dataset, "LONG")) self.assertEqual("short_actual_plan_net_edge_bps", _screen_edge_column(dataset, "SHORT")) + def test_entry_feature_screen_requires_actual_plan_edge(self) -> None: + dataset = pd.DataFrame({"long_expected_net_edge_bps": [20.0]}) + + with self.assertRaises(ValueError): + _screen_edge_column(dataset, "LONG") + def test_entry_regression_heads_train_on_actual_plan_edge(self) -> None: heads = {head[0]: head[2] for head in TARGETS["ENTRY"]["heads"]} diff --git a/training/trader_training/entry_feature_screen.py b/training/trader_training/entry_feature_screen.py index 5a4317d..056c543 100644 --- a/training/trader_training/entry_feature_screen.py +++ b/training/trader_training/entry_feature_screen.py @@ -17,7 +17,7 @@ ALL_SPLITS = (FIT_SPLIT, TUNE_SPLIT, VALIDATION_LOCKED_SPLIT, LATEST_STRESS_SPLI def screen_entry_features(args: Any) -> None: root = run_root(args) dataset = read_parquet(root / "dataset" / "entry_train.parquet") - required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_expected_net_edge_bps", "short_expected_net_edge_bps"} + required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_actual_plan_net_edge_bps", "short_actual_plan_net_edge_bps"} missing = sorted(required.difference(dataset.columns)) if missing: raise ValueError(f"entry feature screen missing required columns: {missing}") @@ -67,7 +67,7 @@ def _screen_edge_column(dataset: pd.DataFrame, side: str) -> str: actual_col = f"{prefix}_actual_plan_net_edge_bps" if actual_col in dataset.columns: return actual_col - return f"{prefix}_expected_net_edge_bps" + raise ValueError(f"entry feature screen requires actual plan edge column: {actual_col}") def _split_baselines(dataset: pd.DataFrame, target_col: str, edge_col: str) -> dict[str, dict[str, float]]: @@ -247,7 +247,7 @@ def _markdown_report(result: dict[str, Any], candidates: pd.DataFrame) -> str: "", "这份报告只回答一个问题:历史数据里,单个特征的某些区间有没有稳定变好。", "", - "- 如果数据里有真实出场净收益,本报告用真实出场净收益;没有时才退回训练收益标签。", + "- 本报告只使用真实出场净收益;缺少真实收益列时直接失败。", "- `tune_inner` 用来挑候选区间。", "- `validation_locked` 和 `latest_stress` 用来检查这个区间是不是出了训练样本也还能站住。", "- `stable_positive_edge=true` 代表这个区间在三个检查集里的平均净收益都大于 0。",