Improve Trader entry quality training diagnostics

2026-06-28 00:50:37 +08:00
parent 87849a66a7
commit 340d1dd91b
11 changed files with 1895 additions and 110 deletions
@@ -24,7 +24,21 @@ from trader_training.schemas import LABEL_VERSION

 DEFAULT_LABEL_CONFIG = {
    "direction": {"horizon_minutes": 45, "long_threshold_bps": 5.0, "short_threshold_bps": -5.0},
-    "entry": {"max_hold_minutes": 45, "target_bps": 12.0, "stop_bps": 8.0, "min_expected_net_edge_bps": 3.0},
+    "entry": {
+        "max_hold_minutes": 45,
+        "target_bps": 12.0,
+        "stop_bps": 8.0,
+        "min_expected_net_edge_bps": 3.0,
+        "plan_method": "FIXED_TARGET_STOP_V1",
+        "target_method": "PRICE_PLAN_OUTCOME_V1",
+        "min_plan_net_edge_bps": 0.0,
+        "max_entry_mae_bps": 12.0,
+        "partial_take_1_ratio": 0.50,
+        "partial_take_2_ratio": 0.25,
+        "second_target_bps": 24.0,
+        "trailing_stop_bps": 10.0,
+        "breakeven_after_first_target": True,
+    },
    "continue": {"horizon_minutes": 45, "min_expected_continue_edge_bps": 5.0},
    "exit": {"horizon_minutes": 45, "adverse_move_bps": 20.0, "stagnation_abs_return_bps": 5.0},
    "risk": {
@@ -92,6 +106,7 @@ PATH_STAT_COLUMNS = [
    "ambiguous_hit",
    "time_to_target_ms",
    "time_to_stop_ms",
+    "time_to_exit_ms",
    "gross_edge_bps",
    "future_return_bps",
    "mfe_bps",
@@ -112,7 +127,14 @@ def _first_hit_index(hit_window: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    return hit_any, first_idx


-def _path_stats_for_group(group: pd.DataFrame, side: str, horizon: int, target_bps: float, stop_bps: float) -> pd.DataFrame:
+def _path_stats_for_group(
+    group: pd.DataFrame,
+    side: str,
+    horizon: int,
+    target_bps: float,
+    stop_bps: float,
+    plan_config: dict[str, Any] | None = None,
+) -> pd.DataFrame:
    if len(group) <= horizon:
        return _empty_path_stats_frame()

@@ -151,6 +173,29 @@ def _path_stats_for_group(group: pd.DataFrame, side: str, horizon: int, target_b
    else:
        realized_vol_bps = np.full(len(entry), np.nan)

+    method = str((plan_config or {}).get("plan_method", "FIXED_TARGET_STOP_V1"))
+    if method == "DYNAMIC_TRAILING_V1":
+        return _dynamic_path_stats_for_group(
+            grouped,
+            side,
+            horizon,
+            target_bps,
+            stop_bps,
+            close,
+            high,
+            low,
+            spread,
+            open_ms,
+            valid,
+            future_high,
+            future_low,
+            future_spread,
+            future_realized_vol_bps=realized_vol_bps,
+            plan_config=plan_config or {},
+        )
+    if method != "FIXED_TARGET_STOP_V1":
+        raise ValueError(f"unsupported entry plan_method: {method}")
+
    if side == "LONG":
        target_price = entry * (1.0 + target_bps / 10000.0)
        stop_price = entry * (1.0 - stop_bps / 10000.0)
@@ -175,6 +220,11 @@ def _path_stats_for_group(group: pd.DataFrame, side: str, horizon: int, target_b
    stop_hit = stop_any & (first_stop_idx <= first_target_idx)
    timeout_hit = ~(target_hit | stop_hit)
    gross_edge_bps = np.where(target_hit, target_bps, np.where(stop_hit, -stop_bps, future_return_bps))
+    time_to_exit_ms = np.where(
+        target_hit,
+        (first_target_idx + 1) * 60_000,
+        np.where(stop_hit, (first_stop_idx + 1) * 60_000, horizon * 60_000),
+    )

    out = pd.DataFrame(
        {
@@ -187,6 +237,7 @@ def _path_stats_for_group(group: pd.DataFrame, side: str, horizon: int, target_b
            "ambiguous_hit": ambiguous_hit.astype("int8"),
            "time_to_target_ms": np.where(target_hit, (first_target_idx + 1) * 60_000, -1).astype("int64"),
            "time_to_stop_ms": np.where(stop_hit, (first_stop_idx + 1) * 60_000, -1).astype("int64"),
+            "time_to_exit_ms": time_to_exit_ms.astype("int64"),
            "gross_edge_bps": gross_edge_bps.astype("float64"),
            "future_return_bps": future_return_bps.astype("float64"),
            "mfe_bps": mfe_bps.astype("float64"),
@@ -198,27 +249,184 @@ def _path_stats_for_group(group: pd.DataFrame, side: str, horizon: int, target_b
    return out.loc[valid, PATH_STAT_COLUMNS].reset_index(drop=True)


-def _build_path_stats(replay: pd.DataFrame, horizon: int, target_bps: float, stop_bps: float) -> pd.DataFrame:
+def _dynamic_path_stats_for_group(
+    grouped: pd.DataFrame,
+    side: str,
+    horizon: int,
+    target_bps: float,
+    stop_bps: float,
+    close: np.ndarray,
+    high: np.ndarray,
+    low: np.ndarray,
+    spread: np.ndarray,
+    open_ms: np.ndarray,
+    valid: np.ndarray,
+    future_high: np.ndarray,
+    future_low: np.ndarray,
+    future_spread: np.ndarray,
+    future_realized_vol_bps: np.ndarray,
+    plan_config: dict[str, Any],
+) -> pd.DataFrame:
+    entry = close[:-horizon]
+    exit_price = close[horizon:]
+    current_open_ms = open_ms[:-horizon]
+    future_close = sliding_window_view(close[1:], horizon)
+    with np.errstate(all="ignore"):
+        high_max = np.nanmax(future_high, axis=1)
+        low_min = np.nanmin(future_low, axis=1)
+        spread_p80 = np.nanquantile(future_spread, 0.8, axis=1)
+
+    take1_ratio = float(plan_config.get("partial_take_1_ratio", 0.50))
+    take2_ratio = float(plan_config.get("partial_take_2_ratio", 0.25))
+    take1_ratio = float(np.clip(take1_ratio, 0.0, 1.0))
+    take2_ratio = float(np.clip(take2_ratio, 0.0, max(0.0, 1.0 - take1_ratio)))
+    target2_bps = float(plan_config.get("second_target_bps", target_bps * 2.0))
+    trailing_stop_bps = float(plan_config.get("trailing_stop_bps", stop_bps))
+    breakeven_after_first = bool(plan_config.get("breakeven_after_first_target", True))
+
+    n = len(entry)
+    active = np.ones(n, dtype=bool)
+    first_target_done = np.zeros(n, dtype=bool)
+    second_target_done = np.zeros(n, dtype=bool)
+    bad_stop_done = np.zeros(n, dtype=bool)
+    trailing_exit_done = np.zeros(n, dtype=bool)
+    remaining = np.ones(n, dtype="float64")
+    gross = np.zeros(n, dtype="float64")
+    first_target_idx = np.full(n, horizon + 1, dtype="int64")
+    stop_idx = np.full(n, horizon + 1, dtype="int64")
+    exit_idx = np.full(n, horizon, dtype="int64")
+
+    if side == "LONG":
+        high_water = entry.copy()
+        for step in range(horizon):
+            h = future_high[:, step]
+            l = future_low[:, step]
+            prior_high_water = high_water.copy()
+            trailing_stop_price = prior_high_water * (1.0 - trailing_stop_bps / 10000.0)
+            if breakeven_after_first:
+                trailing_stop_price = np.maximum(trailing_stop_price, entry)
+            stop_price = np.where(first_target_done, trailing_stop_price, entry * (1.0 - stop_bps / 10000.0))
+            stop_now = active & (l <= stop_price)
+            stop_gross = (stop_price / entry - 1.0) * 10000.0
+            gross = np.where(stop_now, gross + remaining * stop_gross, gross)
+            trailing_exit_done |= stop_now & first_target_done
+            bad_stop_done |= stop_now & (~first_target_done)
+            stop_idx = np.where(stop_now, step, stop_idx)
+            exit_idx = np.where(stop_now, step, exit_idx)
+            remaining = np.where(stop_now, 0.0, remaining)
+            active &= ~stop_now
+
+            first_now = active & (~first_target_done) & (h >= entry * (1.0 + target_bps / 10000.0))
+            gross = np.where(first_now, gross + take1_ratio * target_bps, gross)
+            remaining = np.where(first_now, remaining - take1_ratio, remaining)
+            first_target_done |= first_now
+            first_target_idx = np.where(first_now, step, first_target_idx)
+
+            second_now = active & first_target_done & (~second_target_done) & (h >= entry * (1.0 + target2_bps / 10000.0))
+            gross = np.where(second_now, gross + take2_ratio * target2_bps, gross)
+            remaining = np.where(second_now, remaining - take2_ratio, remaining)
+            second_target_done |= second_now
+
+            high_water = np.maximum(high_water, h)
+        timeout_return = (exit_price / entry - 1.0) * 10000.0
+        future_return_bps = timeout_return
+        mfe_bps = np.maximum((high_max / entry - 1.0) * 10000.0, 0.0)
+        mae_bps = np.maximum((entry / low_min - 1.0) * 10000.0, 0.0)
+    else:
+        low_water = entry.copy()
+        for step in range(horizon):
+            h = future_high[:, step]
+            l = future_low[:, step]
+            prior_low_water = low_water.copy()
+            trailing_stop_price = prior_low_water * (1.0 + trailing_stop_bps / 10000.0)
+            if breakeven_after_first:
+                trailing_stop_price = np.minimum(trailing_stop_price, entry)
+            stop_price = np.where(first_target_done, trailing_stop_price, entry * (1.0 + stop_bps / 10000.0))
+            stop_now = active & (h >= stop_price)
+            stop_gross = (entry / stop_price - 1.0) * 10000.0
+            gross = np.where(stop_now, gross + remaining * stop_gross, gross)
+            trailing_exit_done |= stop_now & first_target_done
+            bad_stop_done |= stop_now & (~first_target_done)
+            stop_idx = np.where(stop_now, step, stop_idx)
+            exit_idx = np.where(stop_now, step, exit_idx)
+            remaining = np.where(stop_now, 0.0, remaining)
+            active &= ~stop_now
+
+            first_now = active & (~first_target_done) & (l <= entry * (1.0 - target_bps / 10000.0))
+            gross = np.where(first_now, gross + take1_ratio * target_bps, gross)
+            remaining = np.where(first_now, remaining - take1_ratio, remaining)
+            first_target_done |= first_now
+            first_target_idx = np.where(first_now, step, first_target_idx)
+
+            second_now = active & first_target_done & (~second_target_done) & (l <= entry * (1.0 - target2_bps / 10000.0))
+            gross = np.where(second_now, gross + take2_ratio * target2_bps, gross)
+            remaining = np.where(second_now, remaining - take2_ratio, remaining)
+            second_target_done |= second_now
+
+            low_water = np.minimum(low_water, l)
+        timeout_return = (entry / exit_price - 1.0) * 10000.0
+        future_return_bps = timeout_return
+        mfe_bps = np.maximum((entry / low_min - 1.0) * 10000.0, 0.0)
+        mae_bps = np.maximum((high_max / entry - 1.0) * 10000.0, 0.0)
+
+    timeout_now = active
+    gross = np.where(timeout_now, gross + remaining * timeout_return, gross)
+    exit_idx = np.where(timeout_now, horizon - 1, exit_idx)
+    target_hit = first_target_done
+    stop_hit = bad_stop_done
+    timeout_hit = timeout_now
+    ambiguous_hit = np.zeros(n, dtype=bool)
+    out = pd.DataFrame(
+        {
+            "symbol": grouped["symbol"].iloc[0],
+            "open_time_ms": current_open_ms,
+            "side": side,
+            "target_hit": target_hit.astype("int8"),
+            "stop_hit": stop_hit.astype("int8"),
+            "timeout_hit": timeout_hit.astype("int8"),
+            "ambiguous_hit": ambiguous_hit.astype("int8"),
+            "time_to_target_ms": np.where(target_hit, (first_target_idx + 1) * 60_000, -1).astype("int64"),
+            "time_to_stop_ms": np.where(stop_hit | trailing_exit_done, (stop_idx + 1) * 60_000, -1).astype("int64"),
+            "time_to_exit_ms": ((exit_idx + 1) * 60_000).astype("int64"),
+            "gross_edge_bps": gross.astype("float64"),
+            "future_return_bps": future_return_bps.astype("float64"),
+            "mfe_bps": mfe_bps.astype("float64"),
+            "mae_bps": mae_bps.astype("float64"),
+            "future_spread_p80": spread_p80.astype("float64"),
+            "future_realized_vol_bps": future_realized_vol_bps.astype("float64"),
+        }
+    )
+    return out.loc[valid, PATH_STAT_COLUMNS].reset_index(drop=True)
+
+
+def _build_path_stats(replay: pd.DataFrame, horizon: int, target_bps: float, stop_bps: float, plan_config: dict[str, Any] | None = None) -> pd.DataFrame:
    frames: list[pd.DataFrame] = []
    for symbol, group in replay.groupby("symbol", sort=False, observed=False):
        logging.info(
-            "trader.training.path_stats_group_start symbol=%s horizonMinutes=%s rowCount=%s",
+            "trader.training.path_stats_group_start symbol=%s horizonMinutes=%s planMethod=%s rowCount=%s",
            symbol,
            horizon,
+            (plan_config or {}).get("plan_method", "FIXED_TARGET_STOP_V1"),
            len(group),
        )
        for side in ("LONG", "SHORT"):
-            stats = _path_stats_for_group(group, side, horizon, target_bps, stop_bps)
+            stats = _path_stats_for_group(group, side, horizon, target_bps, stop_bps, plan_config=plan_config)
            frames.append(stats)
            logging.info(
-                "trader.training.path_stats_side_done symbol=%s side=%s horizonMinutes=%s rowCount=%s",
+                "trader.training.path_stats_side_done symbol=%s side=%s horizonMinutes=%s planMethod=%s rowCount=%s",
                symbol,
                side,
                horizon,
+                (plan_config or {}).get("plan_method", "FIXED_TARGET_STOP_V1"),
                len(stats),
            )
    out = pd.concat(frames, ignore_index=True) if frames else _empty_path_stats_frame()
-    logging.info("trader.training.path_stats_built horizonMinutes=%s rowCount=%s", horizon, len(out))
+    logging.info(
+        "trader.training.path_stats_built horizonMinutes=%s planMethod=%s rowCount=%s",
+        horizon,
+        (plan_config or {}).get("plan_method", "FIXED_TARGET_STOP_V1"),
+        len(out),
+    )
    return out


@@ -235,8 +443,17 @@ def write_price_plan_context(args: Any) -> None:
        "targetDistanceBps": float(entry["target_bps"]),
        "maxHoldMinutes": int(entry["max_hold_minutes"]),
        "minExpectedNetEdgeBps": float(entry["min_expected_net_edge_bps"]),
+        "minPlanNetEdgeBps": float(entry.get("min_plan_net_edge_bps", 0.0)),
+        "maxEntryMaeBps": float(entry.get("max_entry_mae_bps", entry["stop_bps"])),
        "costBps": cost_bps,
        "entryLabelMethod": ENTRY_LABEL_METHOD,
+        "entryTargetMethod": str(entry.get("target_method", ENTRY_LABEL_METHOD)),
+        "entryPlanMethod": str(entry.get("plan_method", "FIXED_TARGET_STOP_V1")),
+        "partialTake1Ratio": float(entry.get("partial_take_1_ratio", 0.50)),
+        "partialTake2Ratio": float(entry.get("partial_take_2_ratio", 0.25)),
+        "secondTargetBps": float(entry.get("second_target_bps", float(entry["target_bps"]) * 2.0)),
+        "trailingStopBps": float(entry.get("trailing_stop_bps", float(entry["stop_bps"]))),
+        "breakevenAfterFirstTarget": bool(entry.get("breakeven_after_first_target", True)),
    }
    path = root / "label" / "price_plan_context.json"
    write_json(path, context)
@@ -247,8 +464,17 @@ def write_price_plan_context(args: Any) -> None:
        "stop_bps": context["stopDistanceBps"],
        "max_hold_minutes": context["maxHoldMinutes"],
        "min_expected_net_edge_bps": context["minExpectedNetEdgeBps"],
+        "min_plan_net_edge_bps": context["minPlanNetEdgeBps"],
+        "max_entry_mae_bps": context["maxEntryMaeBps"],
        "cost_bps": context["costBps"],
        "entry_label_method": context["entryLabelMethod"],
+        "entry_target_method": context["entryTargetMethod"],
+        "entry_plan_method": context["entryPlanMethod"],
+        "partial_take_1_ratio": context["partialTake1Ratio"],
+        "partial_take_2_ratio": context["partialTake2Ratio"],
+        "second_target_bps": context["secondTargetBps"],
+        "trailing_stop_bps": context["trailingStopBps"],
+        "breakeven_after_first_target": context["breakevenAfterFirstTarget"],
    }])
    write_parquet(root / "label" / "price_plan_context.parquet", frame)
    logging.info("trader.training.price_plan_written runId=%s path=%s", args.run_id, path)
@@ -308,6 +534,7 @@ def build_entry_labels(args: Any) -> None:
        int(entry_conf["max_hold_minutes"]),
        float(entry_conf["target_bps"]),
        float(entry_conf["stop_bps"]),
+        plan_config=entry_conf,
    )
    feature_columns = [
        "sample_id",
@@ -321,14 +548,28 @@ def build_entry_labels(args: Any) -> None:
        "realized_vol_15m_bps",
    ]
    merged = features[feature_columns].merge(stats, on=["symbol", "open_time_ms"], how="inner")
+    merged["actual_plan_net_edge_bps"] = merged["gross_edge_bps"] - cost_bps
    merged["max_achievable_gross_edge_bps"] = merged["mfe_bps"]
    merged["max_achievable_net_edge_bps"] = merged["max_achievable_gross_edge_bps"] - cost_bps
-    merged["expected_net_edge_bps"] = merged["gross_edge_bps"] - cost_bps
-    merged["entry_target"] = (merged["expected_net_edge_bps"] >= float(entry_conf["min_expected_net_edge_bps"])).astype("int8")
+    target_method = str(entry_conf.get("target_method", ENTRY_LABEL_METHOD))
+    if target_method == "PRICE_PLAN_OUTCOME_V1":
+        merged["expected_net_edge_bps"] = merged["actual_plan_net_edge_bps"]
+    elif target_method in {"OPPORTUNITY_MFE_V1", "OPPORTUNITY_QUALITY_V1"}:
+        merged["expected_net_edge_bps"] = merged["max_achievable_net_edge_bps"]
+    else:
+        raise ValueError(f"unsupported entry target_method: {target_method}")
+    opportunity = merged["expected_net_edge_bps"] >= float(entry_conf["min_expected_net_edge_bps"])
+    if target_method == "OPPORTUNITY_QUALITY_V1":
+        # MFE 只说明价格曾经给过机会；真实开仓还要确认这笔机会按计划能拿走，
+        # 并且过程中没有先承受过大的反向波动。
+        min_plan_net_edge_bps = float(entry_conf.get("min_plan_net_edge_bps", 0.0))
+        max_entry_mae_bps = float(entry_conf.get("max_entry_mae_bps", entry_conf["stop_bps"]))
+        opportunity = opportunity & (merged["actual_plan_net_edge_bps"] >= min_plan_net_edge_bps) & (merged["mae_bps"] <= max_entry_mae_bps)
+    merged["entry_target"] = opportunity.astype("int8")
    merged["price_plan_id"] = plan["pricePlanId"]
    merged["price_plan_hash"] = plan["pricePlanConfigHash"]
    merged["cost_bps"] = cost_bps
-    merged["label_method"] = ENTRY_LABEL_METHOD
+    merged["label_method"] = target_method
    merged["label_version"] = LABEL_VERSION
    out = merged[
        [
@@ -344,10 +585,12 @@ def build_entry_labels(args: Any) -> None:
            "ambiguous_hit",
            "time_to_target_ms",
            "time_to_stop_ms",
+            "time_to_exit_ms",
            "gross_edge_bps",
            "future_return_bps",
            "mfe_bps",
            "mae_bps",
+            "actual_plan_net_edge_bps",
            "max_achievable_gross_edge_bps",
            "max_achievable_net_edge_bps",
            "cost_bps",
@@ -393,7 +636,18 @@ def build_continue_exit_risk_labels(args: Any) -> None:
    horizon = int(labels["continue"]["horizon_minutes"])
    target_bps = float(plan["targetDistanceBps"])
    stop_bps = float(plan["stopDistanceBps"])
-    stats = _build_path_stats(replay, horizon, target_bps, stop_bps)
+    plan_config = {
+        "plan_method": plan.get("entryPlanMethod", labels["entry"].get("plan_method", "FIXED_TARGET_STOP_V1")),
+        "partial_take_1_ratio": plan.get("partialTake1Ratio", labels["entry"].get("partial_take_1_ratio", 0.50)),
+        "partial_take_2_ratio": plan.get("partialTake2Ratio", labels["entry"].get("partial_take_2_ratio", 0.25)),
+        "second_target_bps": plan.get("secondTargetBps", labels["entry"].get("second_target_bps", target_bps * 2.0)),
+        "trailing_stop_bps": plan.get("trailingStopBps", labels["entry"].get("trailing_stop_bps", stop_bps)),
+        "breakeven_after_first_target": plan.get(
+            "breakevenAfterFirstTarget",
+            labels["entry"].get("breakeven_after_first_target", True),
+        ),
+    }
+    stats = _build_path_stats(replay, horizon, target_bps, stop_bps, plan_config=plan_config)
    long_stats = stats[stats["side"] == "LONG"].drop(columns=["side"]).add_prefix("long_")
    short_stats = stats[stats["side"] == "SHORT"].drop(columns=["side"]).add_prefix("short_")
    long_stats = long_stats.rename(columns={"long_symbol": "symbol", "long_open_time_ms": "open_time_ms"})
@@ -432,6 +686,7 @@ def build_continue_exit_risk_labels(args: Any) -> None:

    long_edge = merged["long_gross_edge_bps"] - cost_bps
    short_edge = merged["short_gross_edge_bps"] - cost_bps
+    dynamic_plan = str(plan_config.get("plan_method")) == "DYNAMIC_TRAILING_V1"
    path_risk = np.maximum(merged["long_mae_bps"], merged["short_mae_bps"])
    max_path_move = np.maximum.reduce([merged["long_mfe_bps"], merged["short_mfe_bps"], path_risk])
    if "ret_15m_bps" in merged.columns:
@@ -449,8 +704,8 @@ def build_continue_exit_risk_labels(args: Any) -> None:
            "sample_id": merged["sample_id"],
            "symbol": merged["symbol"],
            "event_time": merged["event_time"],
-            "long_continue_target": ((long_edge >= min_continue) & (merged["long_stop_hit"] == 0)).astype("int8"),
-            "short_continue_target": ((short_edge >= min_continue) & (merged["short_stop_hit"] == 0)).astype("int8"),
+            "long_continue_target": ((long_edge >= min_continue) & ((merged["long_stop_hit"] == 0) | dynamic_plan)).astype("int8"),
+            "short_continue_target": ((short_edge >= min_continue) & ((merged["short_stop_hit"] == 0) | dynamic_plan)).astype("int8"),
            "long_expected_continue_edge_bps": long_edge,
            "short_expected_continue_edge_bps": short_edge,
            "split_id": merged["split_id"],