from __future__ import annotations from dataclasses import dataclass from typing import Any FEATURE_VERSION = "feature-v4-p0" LABEL_VERSION = "label-v4-p0" SPLIT_VERSION = "split-v4-p0" MODEL_BUNDLE_VERSION = "trader-v4-btc-p0" CALIBRATION_BUNDLE_VERSION = "cal-v4-btc-p0" PM_CONFIG_VERSION = "pm-v4-btc-p0" OUTPUT_SCHEMA_VERSION = "output-schema-v4-btc-p0" FIT_SPLIT = "fit_inner" TUNE_SPLIT = "tune_inner" VALIDATION_LOCKED_SPLIT = "validation_locked" LATEST_STRESS_SPLIT = "latest_stress" TRAINING_SPLITS = (FIT_SPLIT, TUNE_SPLIT, VALIDATION_LOCKED_SPLIT, LATEST_STRESS_SPLIT) @dataclass(frozen=True) class FeatureDef: order: int name: str cn_name: str meaning: str source_tables: tuple[str, ...] formula: str lookback_window: str unit: str dtype: str null_rule: str live_available: bool leakage_check: str owner_models: tuple[str, ...] def as_json(self) -> dict[str, Any]: return { "order": self.order, "name": self.name, "cn_name": self.cn_name, "meaning": self.meaning, "source_tables": list(self.source_tables), "formula": self.formula, "lookback_window": self.lookback_window, "unit": self.unit, "dtype": self.dtype, "null_rule": self.null_rule, "live_available": self.live_available, "leakage_check": self.leakage_check, "owner_models": list(self.owner_models), } ALL_MODELS = ("Direction", "Entry", "Continue", "Exit", "Risk") FEATURES: tuple[FeatureDef, ...] = ( FeatureDef(1, "ret_1m_bps", "最近1分钟收益", "Latest short return.", ("replay_1m",), "close_t / close_t-1m - 1", "1m", "bps", "float32", "WARMUP", True, "uses <= t close only", ALL_MODELS), FeatureDef(2, "ret_5m_bps", "最近5分钟收益", "Short trend.", ("replay_1m",), "close_t / close_t-5m - 1", "5m", "bps", "float32", "WARMUP", True, "uses <= t close only", ("Direction", "Entry", "Continue", "Exit")), FeatureDef(3, "ret_15m_bps", "最近15分钟收益", "Near trend.", ("replay_1m",), "close_t / close_t-15m - 1", "15m", "bps", "float32", "WARMUP", True, "uses <= t close only", ("Direction", "Entry", "Continue", "Exit")), FeatureDef(4, "ret_60m_bps", "最近60分钟收益", "Baseline trend.", ("replay_1m",), "close_t / close_t-60m - 1", "60m", "bps", "float32", "WARMUP", True, "uses <= t close only", ("Direction", "Continue", "Exit", "Risk")), FeatureDef(5, "ret_240m_bps", "最近240分钟收益", "Four-hour trend.", ("replay_1m",), "close_t / close_t-240m - 1", "240m", "bps", "float32", "WARMUP", True, "uses <= t close only", ("Direction", "Continue", "Risk")), FeatureDef(6, "realized_vol_15m_bps", "15分钟波动", "Near realized volatility.", ("replay_1m",), "std(log_return_1m, 15) * 10000", "15m", "bps", "float32", "WARMUP", True, "uses <= t returns only", ("Direction", "Entry", "Exit", "Risk")), FeatureDef(7, "realized_vol_60m_bps", "60分钟波动", "Baseline realized volatility.", ("replay_1m",), "std(log_return_1m, 60) * 10000", "60m", "bps", "float32", "WARMUP", True, "uses <= t returns only", ("Direction", "Entry", "Exit", "Risk")), FeatureDef(8, "vol_ratio_15m_60m", "近端波动放大", "Near volatility versus baseline.", ("feature",), "realized_vol_15m_bps / max(realized_vol_60m_bps, 1)", "15m/60m", "ratio", "float32", "WARMUP", True, "derived from <= t features", ("Entry", "Exit", "Risk")), FeatureDef(9, "range_15m_bps", "15分钟振幅", "Near high-low range.", ("replay_1m",), "max(high_15m) / min(low_15m) - 1", "15m", "bps", "float32", "WARMUP", True, "uses <= t high/low only", ("Entry", "Exit", "Risk")), FeatureDef(10, "range_60m_bps", "60分钟振幅", "Baseline high-low range.", ("replay_1m",), "max(high_60m) / min(low_60m) - 1", "60m", "bps", "float32", "WARMUP", True, "uses <= t high/low only", ("Direction", "Entry", "Risk")), FeatureDef(11, "volume_zscore_60m", "60分钟成交量异常", "Current volume abnormality.", ("replay_1m",), "(volume_t - mean(volume_60m)) / std(volume_60m)", "60m", "zscore", "float32", "std=0 -> 0", True, "uses <= t volume only", ("Direction", "Entry", "Risk")), FeatureDef(12, "trend_consistency_15m", "15分钟方向连续性", "Signed return consistency.", ("replay_1m",), "mean(sign(ret_1m), 15)", "15m", "ratio", "float32", "WARMUP", True, "uses <= t returns only", ("Direction", "Continue", "Exit")), FeatureDef(13, "channel_position_60m_pct", "60分钟通道位置", "Close position in recent channel.", ("replay_1m",), "(close_t - low_60m) / max(high_60m - low_60m, tick)", "60m", "pct", "float32", "WARMUP", True, "uses <= t high/low/close only", ("Direction", "Entry", "Continue")), FeatureDef(14, "upper_breakout_60m_bps", "向上突破距离", "Upper breakout distance.", ("replay_1m",), "max(0, close_t / prev_high_60m_excl_t - 1) * 10000", "60m", "bps", "float32", "WARMUP", True, "current close versus prior window only", ("Direction", "Entry", "Continue")), FeatureDef(15, "lower_breakout_60m_bps", "向下跌破距离", "Lower breakdown distance.", ("replay_1m",), "max(0, prev_low_60m_excl_t / close_t - 1) * 10000", "60m", "bps", "float32", "WARMUP", True, "current close versus prior window only", ("Direction", "Entry", "Continue")), FeatureDef(16, "upper_failed_break_reclaim_15m_bps", "上破失败回落", "Failed upper breakout reclaim.", ("replay_1m",), "if high_15m broke prior high then max(0, prev_high_60m - close_t) / close_t * 10000", "15m/60m", "bps", "float32", "no event -> 0", True, "prior high excludes t", ("Entry", "Exit", "Risk")), FeatureDef(17, "lower_failed_break_reclaim_15m_bps", "下破失败收回", "Failed lower breakdown reclaim.", ("replay_1m",), "if low_15m broke prior low then max(0, close_t - prev_low_60m) / close_t * 10000", "15m/60m", "bps", "float32", "no event -> 0", True, "prior low excludes t", ("Entry", "Exit", "Risk")), FeatureDef(18, "sweep_up_15m_bps", "上影扫高", "Upper sweep size.", ("replay_1m",), "max(0, max(high_15m) / close_t - 1) * 10000", "15m", "bps", "float32", "WARMUP", True, "uses <= t high/close only", ("Exit", "Risk")), FeatureDef(19, "sweep_down_15m_bps", "下影扫低", "Lower sweep size.", ("replay_1m",), "max(0, close_t / min(low_15m) - 1) * 10000", "15m", "bps", "float32", "WARMUP", True, "uses <= t low/close only", ("Exit", "Risk")), FeatureDef(20, "compression_score_4h_pct", "4小时压缩分位", "Higher means recent range is compressed.", ("feature",), "1 - percentile_rank(range_15m_bps over 240m)", "240m", "pct", "float32", "WARMUP", True, "rolling rank uses <= t", ("Direction", "Entry")), FeatureDef(21, "compression_release_15m_bps", "压缩释放幅度", "Range release versus 4h median.", ("feature",), "max(0, range_15m_bps - median(range_15m_bps over 240m))", "15m/240m", "bps", "float32", "WARMUP", True, "rolling median uses <= t", ("Direction", "Entry", "Risk")), FeatureDef(22, "taker_imbalance_1m", "1分钟主动买卖差", "Taker buy/sell imbalance.", ("trades", "replay_1m"), "(buy_1m - sell_1m) / max(total_1m, eps)", "1m", "ratio", "float32", "volume=0 -> 0", True, "uses current closed minute trades only", ("Direction", "Entry", "Continue")), FeatureDef(23, "taker_imbalance_5m", "5分钟主动买卖差", "Short taker imbalance.", ("trades", "replay_1m"), "(buy_5m - sell_5m) / max(total_5m, eps)", "5m", "ratio", "float32", "WARMUP", True, "uses <= t trades only", ("Direction", "Entry", "Continue")), FeatureDef(24, "taker_imbalance_15m", "15分钟主动买卖差", "Near taker imbalance.", ("trades", "replay_1m"), "(buy_15m - sell_15m) / max(total_15m, eps)", "15m", "ratio", "float32", "WARMUP", True, "uses <= t trades only", ("Direction", "Continue", "Exit")), FeatureDef(25, "level1_ofi_1m", "1分钟盘口订单流", "Best bid/ask order-flow imbalance.", ("level_1", "replay_1m"), "sum(OFI changes in minute) / mean(level1 depth)", "1m", "ratio", "float32", "missing -> fail", True, "uses current closed minute L1 only", ("Direction", "Entry", "Risk")), FeatureDef(26, "spread_bps", "买卖价差", "Best bid/ask spread.", ("level_1", "replay_1m"), "(best_ask - best_bid) / mid * 10000", "1m", "bps", "float32", "missing -> fail", True, "uses current closed minute L1 only", ("Entry", "Exit", "Risk")), FeatureDef(27, "spread_rank_24h_pct", "24小时价差分位", "Spread congestion rank.", ("feature",), "percentile_rank(spread_bps over 24h)", "24h", "pct", "float32", "WARMUP", True, "rolling rank uses <= t", ("Entry", "Exit", "Risk")), FeatureDef(28, "oi_delta_15m_bps", "15分钟持仓变化", "Open-interest short change.", ("open_interest", "replay_1m"), "open_interest_t / open_interest_t-15m - 1", "15m", "bps", "float32", "WARMUP", True, "uses <= t OI only", ("Direction", "Continue", "Risk")), FeatureDef(29, "oi_delta_60m_bps", "60分钟持仓变化", "Open-interest baseline change.", ("open_interest", "replay_1m"), "open_interest_t / open_interest_t-60m - 1", "60m", "bps", "float32", "WARMUP", True, "uses <= t OI only", ("Direction", "Continue", "Risk")), FeatureDef(30, "funding_bps", "资金费率", "Current funding rate.", ("funding", "replay_1m"), "rate * 10000", "as-of", "bps", "float32", "as-of > 12h -> fail", True, "backward as-of only", ("Direction", "Entry", "Risk")), FeatureDef(31, "mark_index_basis_bps", "标记价指数价偏离", "Mark-index basis.", ("funding", "replay_1m"), "mark_price / index_price - 1", "as-of", "bps", "float32", "as-of > 12h -> fail", True, "backward as-of only", ("Direction", "Entry", "Risk")), FeatureDef(32, "liquidation_buy_notional_1m", "1分钟买向爆仓金额", "Buy-side liquidation notional.", ("liquidations", "replay_1m"), "sum(quantity * price for BUY)", "1m", "quote", "float32", "missing partition -> 0 with flag", True, "uses current closed minute liquidations only", ("Entry", "Exit", "Risk")), FeatureDef(33, "liquidation_sell_notional_1m", "1分钟卖向爆仓金额", "Sell-side liquidation notional.", ("liquidations", "replay_1m"), "sum(quantity * price for SELL)", "1m", "quote", "float32", "missing partition -> 0 with flag", True, "uses current closed minute liquidations only", ("Entry", "Exit", "Risk")), FeatureDef(34, "liquidation_imbalance_15m", "15分钟爆仓方向差", "Liquidation imbalance.", ("liquidations", "replay_1m"), "(buy_15m - sell_15m) / max(total_15m, eps)", "15m", "ratio", "float32", "missing partition -> 0 with flag", True, "uses <= t liquidations only", ("Direction", "Entry", "Exit", "Risk")), FeatureDef(35, "liquidation_notional_zscore_15m", "爆仓金额异常", "Liquidation notional zscore.", ("liquidations", "replay_1m"), "(liq_15m - mean_24h) / std_24h", "15m/24h", "zscore", "float32", "missing partition -> 0 with flag", True, "rolling window uses <= t", ("Entry", "Exit", "Risk")), FeatureDef(36, "liquidation_available", "爆仓数据可用", "Whether liquidation data exists.", ("liquidations", "replay_1m"), "day partition exists", "day", "0/1", "float32", "never null", True, "partition availability known by event day", ("Entry", "Exit", "Risk")), FeatureDef(37, "minute_of_day_sin", "日内时间正弦", "Time of day cyclic feature.", ("event_time",), "sin(2*pi*minute_of_day/1440)", "event_time", "ratio", "float32", "never null", True, "event timestamp only", ("Direction", "Entry", "Risk")), FeatureDef(38, "minute_of_day_cos", "日内时间余弦", "Time of day cyclic feature.", ("event_time",), "cos(2*pi*minute_of_day/1440)", "event_time", "ratio", "float32", "never null", True, "event timestamp only", ("Direction", "Entry", "Risk")), FeatureDef(39, "minutes_to_next_funding", "距离下次资金费分钟", "Minutes to next funding settlement.", ("funding", "replay_1m"), "clip((next_funding_time - event_time) / 60000, 0, 480)", "as-of", "minute", "float32", "as-of > 12h -> fail", True, "backward as-of only", ("Entry", "Continue", "Risk")), ) FEATURE_ORDER = [feature.name for feature in FEATURES] OUTPUT_SCHEMA: dict[str, Any] = { "output_schema_version": OUTPUT_SCHEMA_VERSION, "direction": { "longProb": {"type": "decimal", "range": [0.0, 1.0]}, "shortProb": {"type": "decimal", "range": [0.0, 1.0]}, "neutralProb": {"type": "decimal", "range": [0.0, 1.0]}, "sum_rule": "longProb + shortProb + neutralProb must equal 1.0 within 0.000001", }, "entry": { "longEntryProb": {"type": "decimal", "range": [0.0, 1.0]}, "shortEntryProb": {"type": "decimal", "range": [0.0, 1.0]}, "longExpectedNetEdgeBps": {"type": "decimal", "range": [-500.0, 500.0]}, "shortExpectedNetEdgeBps": {"type": "decimal", "range": [-500.0, 500.0]}, }, "continuation": { "longContinueProb": {"type": "decimal", "range": [0.0, 1.0]}, "shortContinueProb": {"type": "decimal", "range": [0.0, 1.0]}, "longExpectedContinueEdgeBps": {"type": "decimal", "range": [-500.0, 500.0]}, "shortExpectedContinueEdgeBps": {"type": "decimal", "range": [-500.0, 500.0]}, }, "exit": { "longExitProb": {"type": "decimal", "range": [0.0, 1.0]}, "shortExitProb": {"type": "decimal", "range": [0.0, 1.0]}, "longAdverseMoveBps": {"type": "decimal", "range": [0.0, 500.0]}, "shortAdverseMoveBps": {"type": "decimal", "range": [0.0, 500.0]}, "exitReasonScores": { "adverse_move_prob": {"type": "decimal", "range": [0.0, 1.0]}, "reversal_prob": {"type": "decimal", "range": [0.0, 1.0]}, "stop_hit_prob": {"type": "decimal", "range": [0.0, 1.0]}, "stagnation_prob": {"type": "decimal", "range": [0.0, 1.0]}, }, }, "risk": { "marketRiskProb": {"type": "decimal", "range": [0.0, 1.0]}, "longPositionRiskProb": {"type": "decimal", "range": [0.0, 1.0]}, "shortPositionRiskProb": {"type": "decimal", "range": [0.0, 1.0]}, "marketPathRiskBps": {"type": "decimal", "range": [0.0, 1000.0]}, "longPositionPathRiskBps": {"type": "decimal", "range": [0.0, 1000.0]}, "shortPositionPathRiskBps": {"type": "decimal", "range": [0.0, 1000.0]}, "riskReasonScores": { "market_drawdown_prob": {"type": "decimal", "range": [0.0, 1.0]}, "volatility_expansion_prob": {"type": "decimal", "range": [0.0, 1.0]}, "spike_prob": {"type": "decimal", "range": [0.0, 1.0]}, "liquidity_deterioration_prob": {"type": "decimal", "range": [0.0, 1.0]}, "position_drawdown_prob": {"type": "decimal", "range": [0.0, 1.0]}, }, }, } MODEL_OUTPUTS: dict[str, list[str]] = { "DIRECTION": ["long_prob", "short_prob", "neutral_prob"], "ENTRY": ["long_entry_prob", "short_entry_prob", "long_expected_net_edge_bps", "short_expected_net_edge_bps"], "CONTINUE": ["long_continue_prob", "short_continue_prob", "long_expected_continue_edge_bps", "short_expected_continue_edge_bps"], "EXIT": [ "long_exit_prob", "short_exit_prob", "long_adverse_move_bps", "short_adverse_move_bps", "adverse_move_prob", "reversal_prob", "stop_hit_prob", "stagnation_prob", ], "RISK": [ "market_risk_prob", "long_position_risk_prob", "short_position_risk_prob", "market_path_risk_bps", "long_position_path_risk_bps", "short_position_path_risk_bps", "market_drawdown_prob", "volatility_expansion_prob", "spike_prob", "liquidity_deterioration_prob", "position_drawdown_prob", ], } PROBABILITY_TARGET_NAMES: dict[str, list[str]] = { "DIRECTION": ["longProb", "shortProb", "neutralProb"], "ENTRY": ["longEntryProb", "shortEntryProb"], "CONTINUE": ["longContinueProb", "shortContinueProb"], "EXIT": ["longExitProb", "shortExitProb", "adverse_move_prob", "reversal_prob", "stop_hit_prob", "stagnation_prob"], "RISK": [ "marketRiskProb", "longPositionRiskProb", "shortPositionRiskProb", "market_drawdown_prob", "volatility_expansion_prob", "spike_prob", "liquidity_deterioration_prob", "position_drawdown_prob", ], } OUTPUT_MAPPING: dict[str, dict[str, str]] = { model: {field: f"prediction[{index}]" for index, field in enumerate(fields)} for model, fields in MODEL_OUTPUTS.items() }