Require actual plan edge in Entry screening

This commit is contained in:
Codex
2026-06-28 07:29:17 +08:00
parent 2a86a6e2fa
commit dc4d00a373
2 changed files with 9 additions and 3 deletions
+6
View File
@@ -50,6 +50,12 @@ class TrainingContractTest(unittest.TestCase):
self.assertEqual("long_actual_plan_net_edge_bps", _screen_edge_column(dataset, "LONG"))
self.assertEqual("short_actual_plan_net_edge_bps", _screen_edge_column(dataset, "SHORT"))
def test_entry_feature_screen_requires_actual_plan_edge(self) -> None:
dataset = pd.DataFrame({"long_expected_net_edge_bps": [20.0]})
with self.assertRaises(ValueError):
_screen_edge_column(dataset, "LONG")
def test_entry_regression_heads_train_on_actual_plan_edge(self) -> None:
heads = {head[0]: head[2] for head in TARGETS["ENTRY"]["heads"]}
@@ -17,7 +17,7 @@ ALL_SPLITS = (FIT_SPLIT, TUNE_SPLIT, VALIDATION_LOCKED_SPLIT, LATEST_STRESS_SPLI
def screen_entry_features(args: Any) -> None:
root = run_root(args)
dataset = read_parquet(root / "dataset" / "entry_train.parquet")
required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_expected_net_edge_bps", "short_expected_net_edge_bps"}
required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_actual_plan_net_edge_bps", "short_actual_plan_net_edge_bps"}
missing = sorted(required.difference(dataset.columns))
if missing:
raise ValueError(f"entry feature screen missing required columns: {missing}")
@@ -67,7 +67,7 @@ def _screen_edge_column(dataset: pd.DataFrame, side: str) -> str:
actual_col = f"{prefix}_actual_plan_net_edge_bps"
if actual_col in dataset.columns:
return actual_col
return f"{prefix}_expected_net_edge_bps"
raise ValueError(f"entry feature screen requires actual plan edge column: {actual_col}")
def _split_baselines(dataset: pd.DataFrame, target_col: str, edge_col: str) -> dict[str, dict[str, float]]:
@@ -247,7 +247,7 @@ def _markdown_report(result: dict[str, Any], candidates: pd.DataFrame) -> str:
"",
"这份报告只回答一个问题:历史数据里,单个特征的某些区间有没有稳定变好。",
"",
"- 如果数据里有真实出场净收益,本报告用真实出场净收益;没有时才退回训练收益标签",
"- 本报告只使用真实出场净收益;缺少真实收益列时直接失败",
"- `tune_inner` 用来挑候选区间。",
"- `validation_locked` 和 `latest_stress` 用来检查这个区间是不是出了训练样本也还能站住。",
"- `stable_positive_edge=true` 代表这个区间在三个检查集里的平均净收益都大于 0。",