Improve Trader entry quality training diagnostics
This commit is contained in:
@@ -24,9 +24,11 @@ def screen_entry_features(args: Any) -> None:
|
||||
|
||||
min_bucket_rows = int(args.min_bucket_rows or 300)
|
||||
rows: list[dict[str, Any]] = []
|
||||
edge_source_by_side: dict[str, str] = {}
|
||||
for side in ("LONG", "SHORT"):
|
||||
target_col = "long_entry_target" if side == "LONG" else "short_entry_target"
|
||||
edge_col = "long_expected_net_edge_bps" if side == "LONG" else "short_expected_net_edge_bps"
|
||||
edge_col = _screen_edge_column(dataset, side)
|
||||
edge_source_by_side[side] = edge_col
|
||||
baselines = _split_baselines(dataset, target_col, edge_col)
|
||||
for feature in FEATURE_ORDER:
|
||||
rows.extend(_feature_rows(dataset, feature, side, target_col, edge_col, baselines))
|
||||
@@ -43,6 +45,7 @@ def screen_entry_features(args: Any) -> None:
|
||||
"bucket_metric_count": int(len(bucket_metrics)),
|
||||
"candidate_count": int(len(candidates)),
|
||||
"min_bucket_rows": min_bucket_rows,
|
||||
"edge_source_by_side": edge_source_by_side,
|
||||
"selection_rule": "bucket boundaries are learned on fit_inner; candidate is picked by tune_inner and checked on validation_locked/latest_stress",
|
||||
}
|
||||
write_json(root / "diagnostics" / "entry_feature_screen_result.json", result)
|
||||
@@ -59,6 +62,14 @@ def screen_entry_features(args: Any) -> None:
|
||||
)
|
||||
|
||||
|
||||
def _screen_edge_column(dataset: pd.DataFrame, side: str) -> str:
|
||||
prefix = "long" if side == "LONG" else "short"
|
||||
actual_col = f"{prefix}_actual_plan_net_edge_bps"
|
||||
if actual_col in dataset.columns:
|
||||
return actual_col
|
||||
return f"{prefix}_expected_net_edge_bps"
|
||||
|
||||
|
||||
def _split_baselines(dataset: pd.DataFrame, target_col: str, edge_col: str) -> dict[str, dict[str, float]]:
|
||||
baselines: dict[str, dict[str, float]] = {}
|
||||
for split_id in ALL_SPLITS:
|
||||
@@ -225,6 +236,7 @@ def _markdown_report(result: dict[str, Any], candidates: pd.DataFrame) -> str:
|
||||
"",
|
||||
"这份报告只回答一个问题:历史数据里,单个特征的某些区间有没有稳定变好。",
|
||||
"",
|
||||
"- 如果数据里有真实出场净收益,本报告用真实出场净收益;没有时才退回训练收益标签。",
|
||||
"- `tune_inner` 用来挑候选区间。",
|
||||
"- `validation_locked` 和 `latest_stress` 用来检查这个区间是不是出了训练样本也还能站住。",
|
||||
"- `stable_positive_edge=true` 代表这个区间在三个检查集里的平均净收益都大于 0。",
|
||||
@@ -237,6 +249,8 @@ def _markdown_report(result: dict[str, Any], candidates: pd.DataFrame) -> str:
|
||||
f"- 分桶明细数: `{result['bucket_metric_count']}`",
|
||||
f"- 候选数: `{result['candidate_count']}`",
|
||||
f"- 最小分桶行数: `{result['min_bucket_rows']}`",
|
||||
f"- 做多收益来源: `{result['edge_source_by_side'].get('LONG')}`",
|
||||
f"- 做空收益来源: `{result['edge_source_by_side'].get('SHORT')}`",
|
||||
"",
|
||||
]
|
||||
if candidates.empty:
|
||||
|
||||
Reference in New Issue
Block a user