Use actual plan edge for Entry PM training

This commit is contained in:
Codex
2026-06-28 07:26:59 +08:00
parent 3c0f2d0d91
commit 2a86a6e2fa
6 changed files with 129 additions and 63 deletions
@@ -338,36 +338,21 @@ def _load_direction_dataset(baseline_root: Path, feature: pd.DataFrame) -> pd.Da
def _load_entry_dataset(baseline_root: Path, feature: pd.DataFrame) -> pd.DataFrame:
dataset_path = baseline_root / "dataset" / "entry_train.parquet"
if dataset_path.is_file():
labels = read_parquet(dataset_path)
required = {
"sample_id",
"long_entry_target",
"short_entry_target",
"long_actual_plan_net_edge_bps",
"short_actual_plan_net_edge_bps",
}
missing = sorted(required.difference(labels.columns))
if missing:
raise ValueError(f"entry_train dataset missing columns: {missing}")
dataset = feature.merge(labels[list(required)], on="sample_id", how="inner")
logging.info("trader.training.ofi_entry_dataset_loaded source=entry_train rowCount=%s", len(dataset))
return dataset
labels = read_parquet(baseline_root / "label" / "entry_labels.parquet")
required = {"sample_id", "side", "entry_target", "expected_net_edge_bps"}
if not dataset_path.is_file():
raise FileNotFoundError(f"entry_train dataset is required for OFI experiment: {dataset_path}")
labels = read_parquet(dataset_path)
required = {
"sample_id",
"long_entry_target",
"short_entry_target",
"long_actual_plan_net_edge_bps",
"short_actual_plan_net_edge_bps",
}
missing = sorted(required.difference(labels.columns))
if missing:
raise ValueError(f"entry labels missing columns: {missing}")
long = labels[labels["side"].eq("LONG")][["sample_id", "entry_target", "expected_net_edge_bps"]].rename(
columns={"entry_target": "long_entry_target", "expected_net_edge_bps": "long_expected_net_edge_bps"}
)
short = labels[labels["side"].eq("SHORT")][["sample_id", "entry_target", "expected_net_edge_bps"]].rename(
columns={"entry_target": "short_entry_target", "expected_net_edge_bps": "short_expected_net_edge_bps"}
)
pivot = long.merge(short, on="sample_id", how="inner")
dataset = feature.merge(pivot, on="sample_id", how="inner")
logging.info("trader.training.ofi_entry_dataset_loaded source=entry_labels_legacy rowCount=%s", len(dataset))
raise ValueError(f"entry_train dataset missing columns: {missing}")
dataset = feature.merge(labels[list(required)], on="sample_id", how="inner")
logging.info("trader.training.ofi_entry_dataset_loaded source=entry_train rowCount=%s", len(dataset))
return dataset