Improve Trader entry quality training diagnostics
This commit is contained in:
@@ -6,8 +6,8 @@ from typing import Any
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from trader_training.io_utils import read_parquet, run_root, write_json, write_text
|
||||
from trader_training.pm import _pm_frame, _simulate_open_trades, _threshold_candidates, _trade_metrics
|
||||
from trader_training.io_utils import read_json, read_parquet, run_root, write_json, write_text
|
||||
from trader_training.pm import _pm_frame, _price_plan_context, _simulate_open_trades, _threshold_candidates, _thresholds_from_config, _trade_metrics, default_pm_config
|
||||
from trader_training.schemas import FIT_SPLIT, LATEST_STRESS_SPLIT, TUNE_SPLIT, VALIDATION_LOCKED_SPLIT
|
||||
|
||||
|
||||
@@ -66,12 +66,19 @@ def _label_summary(root) -> dict[str, Any]:
|
||||
|
||||
def _pm_summary(root) -> dict[str, Any]:
|
||||
summary: dict[str, Any] = {}
|
||||
config_path = root / "pm-search" / "position_manager_config.json"
|
||||
config = read_json(config_path)["config"] if config_path.is_file() else default_pm_config()
|
||||
thresholds = _thresholds_from_config(config)
|
||||
price_plan = _price_plan_context(root)
|
||||
for split_id in PM_EVAL_SPLITS:
|
||||
frame = _pm_frame(root, split_id)
|
||||
selected_trades = _simulate_open_trades(frame, thresholds, config, price_plan)
|
||||
item = {
|
||||
"rows": len(frame),
|
||||
"score_distribution": _score_distribution(frame),
|
||||
"gate_funnel": _gate_funnel(frame),
|
||||
"active_thresholds": thresholds,
|
||||
"gate_funnel": _gate_funnel(frame, thresholds),
|
||||
"selected_trade_metrics": _trade_metrics(selected_trades),
|
||||
"relaxed_variants": _relaxed_variants(frame),
|
||||
"top_bucket_edge": _top_bucket_edge(frame),
|
||||
"grid_search_any_trade": _grid_trade_summary(frame),
|
||||
@@ -89,34 +96,29 @@ def _score_distribution(frame: pd.DataFrame) -> dict[str, dict[str, float]]:
|
||||
"market_risk_prob",
|
||||
"pred_long_expected_net_edge_bps",
|
||||
"pred_short_expected_net_edge_bps",
|
||||
"model_pred_long_expected_net_edge_bps",
|
||||
"model_pred_short_expected_net_edge_bps",
|
||||
"actual_long_expected_net_edge_bps",
|
||||
"actual_short_expected_net_edge_bps",
|
||||
]
|
||||
return {column: _quantiles(frame[column], (0.0, 0.05, 0.5, 0.95, 1.0)) for column in columns}
|
||||
return {column: _quantiles(frame[column], (0.0, 0.05, 0.5, 0.95, 1.0)) for column in columns if column in frame.columns}
|
||||
|
||||
|
||||
def _gate_funnel(frame: pd.DataFrame) -> dict[str, Any]:
|
||||
thresholds = {
|
||||
"long_open_prob": 0.54,
|
||||
"short_open_prob": 0.54,
|
||||
"min_entry_prob": 0.50,
|
||||
"max_market_risk_prob": 0.55,
|
||||
"min_expected_edge_bps": 1.0,
|
||||
"min_direction_margin": 0.02,
|
||||
}
|
||||
def _gate_funnel(frame: pd.DataFrame, thresholds: dict[str, float]) -> dict[str, Any]:
|
||||
direction_margin = (frame["long_prob"] - frame["short_prob"]).abs()
|
||||
long_steps = {
|
||||
"long_prob >= 0.54": frame["long_prob"] >= thresholds["long_open_prob"],
|
||||
"long_prob - short_prob >= 0.02": (frame["long_prob"] - frame["short_prob"]) >= thresholds["min_direction_margin"],
|
||||
"long_entry_prob >= 0.50": frame["long_entry_prob"] >= thresholds["min_entry_prob"],
|
||||
"market_risk_prob <= 0.55": frame["market_risk_prob"] <= thresholds["max_market_risk_prob"],
|
||||
"pred_long_expected_net_edge_bps >= 1.0": frame["pred_long_expected_net_edge_bps"] >= thresholds["min_expected_edge_bps"],
|
||||
f"long_prob > {thresholds['long_open_prob']}": frame["long_prob"] > thresholds["long_open_prob"],
|
||||
f"abs(long_prob - short_prob) > {thresholds['min_direction_margin']}": direction_margin > thresholds["min_direction_margin"],
|
||||
f"long_entry_prob > {thresholds['min_entry_prob']}": frame["long_entry_prob"] > thresholds["min_entry_prob"],
|
||||
f"market_risk_prob < {thresholds['max_market_risk_prob']}": frame["market_risk_prob"] < thresholds["max_market_risk_prob"],
|
||||
f"pred_long_expected_net_edge_bps > {thresholds['min_expected_edge_bps']}": frame["pred_long_expected_net_edge_bps"] > thresholds["min_expected_edge_bps"],
|
||||
}
|
||||
short_steps = {
|
||||
"short_prob >= 0.54": frame["short_prob"] >= thresholds["short_open_prob"],
|
||||
"short_prob - long_prob >= 0.02": (frame["short_prob"] - frame["long_prob"]) >= thresholds["min_direction_margin"],
|
||||
"short_entry_prob >= 0.50": frame["short_entry_prob"] >= thresholds["min_entry_prob"],
|
||||
"market_risk_prob <= 0.55": frame["market_risk_prob"] <= thresholds["max_market_risk_prob"],
|
||||
"pred_short_expected_net_edge_bps >= 1.0": frame["pred_short_expected_net_edge_bps"] >= thresholds["min_expected_edge_bps"],
|
||||
f"short_prob > {thresholds['short_open_prob']}": frame["short_prob"] > thresholds["short_open_prob"],
|
||||
f"abs(long_prob - short_prob) > {thresholds['min_direction_margin']}": direction_margin > thresholds["min_direction_margin"],
|
||||
f"short_entry_prob > {thresholds['min_entry_prob']}": frame["short_entry_prob"] > thresholds["min_entry_prob"],
|
||||
f"market_risk_prob < {thresholds['max_market_risk_prob']}": frame["market_risk_prob"] < thresholds["max_market_risk_prob"],
|
||||
f"pred_short_expected_net_edge_bps > {thresholds['min_expected_edge_bps']}": frame["pred_short_expected_net_edge_bps"] > thresholds["min_expected_edge_bps"],
|
||||
}
|
||||
return {
|
||||
"thresholds": thresholds,
|
||||
@@ -140,6 +142,7 @@ def _cumulative_gate_counts(steps: dict[str, pd.Series], total_rows: int) -> dic
|
||||
def _relaxed_variants(frame: pd.DataFrame) -> dict[str, Any]:
|
||||
variants = {
|
||||
"no_risk_no_edge": {"prob": 0.54, "entry": 0.50, "margin": 0.02, "risk": 1.0, "edge": -99.0},
|
||||
"rare_entry_low_prob": {"prob": 0.50, "entry": 0.03, "margin": 0.02, "risk": 0.98, "edge": 0.0},
|
||||
"entry_only_55": {"prob": 0.0, "entry": 0.55, "margin": -99.0, "risk": 1.0, "edge": -99.0},
|
||||
"direction_only_54": {"prob": 0.54, "entry": 0.0, "margin": 0.02, "risk": 1.0, "edge": -99.0},
|
||||
"very_loose": {"prob": 0.50, "entry": 0.45, "margin": 0.0, "risk": 1.0, "edge": -99.0},
|
||||
@@ -168,10 +171,10 @@ def _variant_trades(frame: pd.DataFrame, thresholds: dict[str, float]) -> pd.Dat
|
||||
)
|
||||
long = frame.loc[long_mask].copy()
|
||||
long["side"] = "LONG"
|
||||
long["actual_edge_bps"] = long["actual_long_expected_net_edge_bps"]
|
||||
long["actual_edge_bps"] = long["long_trade_net_edge_bps"]
|
||||
short = frame.loc[short_mask].copy()
|
||||
short["side"] = "SHORT"
|
||||
short["actual_edge_bps"] = short["actual_short_expected_net_edge_bps"]
|
||||
short["actual_edge_bps"] = short["short_trade_net_edge_bps"]
|
||||
return pd.concat([long, short], ignore_index=True)
|
||||
|
||||
|
||||
@@ -189,7 +192,7 @@ def _plain_trade_metrics(trades: pd.DataFrame) -> dict[str, Any]:
|
||||
def _top_bucket_edge(frame: pd.DataFrame) -> dict[str, Any]:
|
||||
side = np.where(frame["long_prob"] >= frame["short_prob"], "LONG", "SHORT")
|
||||
side_prob = np.where(side == "LONG", frame["long_prob"], frame["short_prob"])
|
||||
side_edge = np.where(side == "LONG", frame["actual_long_expected_net_edge_bps"], frame["actual_short_expected_net_edge_bps"])
|
||||
side_edge = np.where(side == "LONG", frame["long_trade_net_edge_bps"], frame["short_trade_net_edge_bps"])
|
||||
direction_frame = pd.DataFrame({"score": side_prob, "actual_edge_bps": side_edge, "side": side})
|
||||
direction_top = {}
|
||||
for fraction in (0.01, 0.02, 0.05, 0.10):
|
||||
@@ -243,27 +246,19 @@ def _grid_trade_summary(frame: pd.DataFrame) -> dict[str, Any]:
|
||||
|
||||
|
||||
def _diagnostic_conclusion(pm_summary: dict[str, Any]) -> dict[str, Any]:
|
||||
tune = pm_summary.get(TUNE_SPLIT, {})
|
||||
gate = tune.get("gate_funnel", {})
|
||||
long_single = gate.get("long", {}).get("single_gate_pass", {})
|
||||
short_single = gate.get("short", {}).get("single_gate_pass", {})
|
||||
pred_edge_blocked = (
|
||||
long_single.get("pred_long_expected_net_edge_bps >= 1.0", 0) == 0
|
||||
and short_single.get("pred_short_expected_net_edge_bps >= 1.0", 0) == 0
|
||||
)
|
||||
relaxed = tune.get("relaxed_variants", {})
|
||||
any_relaxed_positive = any(item.get("avg_actual_edge_bps", 0.0) > 0 for item in relaxed.values())
|
||||
if pred_edge_blocked and not any_relaxed_positive:
|
||||
validation = pm_summary.get(VALIDATION_LOCKED_SPLIT, {}).get("selected_trade_metrics", {})
|
||||
stress = pm_summary.get(LATEST_STRESS_SPLIT, {}).get("selected_trade_metrics", {})
|
||||
if validation.get("trade_count", 0) == 0:
|
||||
return {
|
||||
"status": "MODEL_SIGNAL_NOT_TRADABLE",
|
||||
"plain_reason": "Entry 预测的净收益基本都是负数;即使放松风险和收益门槛,选出来的样本平均仍亏。",
|
||||
"next_action": "优先重查 Entry 标签和价格计划,再考虑更强模型;不要直接放松 PM 阈值上线。",
|
||||
"status": "NO_VALIDATION_TRADE",
|
||||
"plain_reason": "当前 PM 阈值在验证集没有选出交易,主要要看挡单漏斗。",
|
||||
"next_action": "先看 Direction、Risk、Entry 哪个门槛挡住,再做阈值实验。",
|
||||
}
|
||||
if pred_edge_blocked:
|
||||
if validation.get("avg_weighted_edge_bps", 0.0) <= 0 and stress.get("avg_weighted_edge_bps", 0.0) <= 0:
|
||||
return {
|
||||
"status": "ENTRY_EDGE_GATE_BLOCKED",
|
||||
"plain_reason": "PM 没有交易主要是 Entry 预测净收益过低。",
|
||||
"next_action": "重训 Entry 或调整价格计划后再搜索 PM 阈值。",
|
||||
"status": "PRICE_PLAN_OR_ENTRY_NOT_TRADABLE",
|
||||
"plain_reason": "按固定止盈止损真实收益算,验证集和压力集选出来的交易平均都不赚钱。",
|
||||
"next_action": "优先重新搜索价格计划,再重建 Entry 标签和模型;不要只放松 PM 阈值。",
|
||||
}
|
||||
return {
|
||||
"status": "NEEDS_MANUAL_REVIEW",
|
||||
@@ -312,6 +307,8 @@ def _markdown_report(payload: dict[str, Any]) -> str:
|
||||
lines.append(f"### {split_id}")
|
||||
lines.append("")
|
||||
lines.append(f"- 样本数: {item['rows']}")
|
||||
lines.append(f"- 当前阈值: `{item['active_thresholds']}`")
|
||||
lines.append(f"- 当前阈值选中交易: `{item['selected_trade_metrics']}`")
|
||||
lines.append(f"- 网格里有交易的候选数: {item['grid_search_any_trade']['candidates_with_trade']} / {item['grid_search_any_trade']['candidate_count']}")
|
||||
lines.append("")
|
||||
for side in ("long", "short"):
|
||||
|
||||
Reference in New Issue
Block a user