Require actual plan edge in Entry screening
This commit is contained in:
@@ -50,6 +50,12 @@ class TrainingContractTest(unittest.TestCase):
|
|||||||
self.assertEqual("long_actual_plan_net_edge_bps", _screen_edge_column(dataset, "LONG"))
|
self.assertEqual("long_actual_plan_net_edge_bps", _screen_edge_column(dataset, "LONG"))
|
||||||
self.assertEqual("short_actual_plan_net_edge_bps", _screen_edge_column(dataset, "SHORT"))
|
self.assertEqual("short_actual_plan_net_edge_bps", _screen_edge_column(dataset, "SHORT"))
|
||||||
|
|
||||||
|
def test_entry_feature_screen_requires_actual_plan_edge(self) -> None:
|
||||||
|
dataset = pd.DataFrame({"long_expected_net_edge_bps": [20.0]})
|
||||||
|
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_screen_edge_column(dataset, "LONG")
|
||||||
|
|
||||||
def test_entry_regression_heads_train_on_actual_plan_edge(self) -> None:
|
def test_entry_regression_heads_train_on_actual_plan_edge(self) -> None:
|
||||||
heads = {head[0]: head[2] for head in TARGETS["ENTRY"]["heads"]}
|
heads = {head[0]: head[2] for head in TARGETS["ENTRY"]["heads"]}
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ ALL_SPLITS = (FIT_SPLIT, TUNE_SPLIT, VALIDATION_LOCKED_SPLIT, LATEST_STRESS_SPLI
|
|||||||
def screen_entry_features(args: Any) -> None:
|
def screen_entry_features(args: Any) -> None:
|
||||||
root = run_root(args)
|
root = run_root(args)
|
||||||
dataset = read_parquet(root / "dataset" / "entry_train.parquet")
|
dataset = read_parquet(root / "dataset" / "entry_train.parquet")
|
||||||
required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_expected_net_edge_bps", "short_expected_net_edge_bps"}
|
required = {"split_id", *FEATURE_ORDER, "long_entry_target", "short_entry_target", "long_actual_plan_net_edge_bps", "short_actual_plan_net_edge_bps"}
|
||||||
missing = sorted(required.difference(dataset.columns))
|
missing = sorted(required.difference(dataset.columns))
|
||||||
if missing:
|
if missing:
|
||||||
raise ValueError(f"entry feature screen missing required columns: {missing}")
|
raise ValueError(f"entry feature screen missing required columns: {missing}")
|
||||||
@@ -67,7 +67,7 @@ def _screen_edge_column(dataset: pd.DataFrame, side: str) -> str:
|
|||||||
actual_col = f"{prefix}_actual_plan_net_edge_bps"
|
actual_col = f"{prefix}_actual_plan_net_edge_bps"
|
||||||
if actual_col in dataset.columns:
|
if actual_col in dataset.columns:
|
||||||
return actual_col
|
return actual_col
|
||||||
return f"{prefix}_expected_net_edge_bps"
|
raise ValueError(f"entry feature screen requires actual plan edge column: {actual_col}")
|
||||||
|
|
||||||
|
|
||||||
def _split_baselines(dataset: pd.DataFrame, target_col: str, edge_col: str) -> dict[str, dict[str, float]]:
|
def _split_baselines(dataset: pd.DataFrame, target_col: str, edge_col: str) -> dict[str, dict[str, float]]:
|
||||||
@@ -247,7 +247,7 @@ def _markdown_report(result: dict[str, Any], candidates: pd.DataFrame) -> str:
|
|||||||
"",
|
"",
|
||||||
"这份报告只回答一个问题:历史数据里,单个特征的某些区间有没有稳定变好。",
|
"这份报告只回答一个问题:历史数据里,单个特征的某些区间有没有稳定变好。",
|
||||||
"",
|
"",
|
||||||
"- 如果数据里有真实出场净收益,本报告用真实出场净收益;没有时才退回训练收益标签。",
|
"- 本报告只使用真实出场净收益;缺少真实收益列时直接失败。",
|
||||||
"- `tune_inner` 用来挑候选区间。",
|
"- `tune_inner` 用来挑候选区间。",
|
||||||
"- `validation_locked` 和 `latest_stress` 用来检查这个区间是不是出了训练样本也还能站住。",
|
"- `validation_locked` 和 `latest_stress` 用来检查这个区间是不是出了训练样本也还能站住。",
|
||||||
"- `stable_positive_edge=true` 代表这个区间在三个检查集里的平均净收益都大于 0。",
|
"- `stable_positive_edge=true` 代表这个区间在三个检查集里的平均净收益都大于 0。",
|
||||||
|
|||||||
Reference in New Issue
Block a user