Improve Trader entry quality training diagnostics
This commit is contained in:
@@ -14,8 +14,10 @@ if str(TRAINING_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(TRAINING_ROOT))
|
||||
|
||||
from trader_training.onnx_export import LinearHead, export_heads
|
||||
from trader_training.entry_feature_screen import _screen_edge_column
|
||||
from trader_training.io_utils import read_json, write_json
|
||||
from trader_training.labels import ENTRY_LABEL_METHOD, _path_stats_for_group, build_entry_labels
|
||||
from trader_training.ofi_feature_experiment import l1_snapshot_diff_ofi_quote
|
||||
from trader_training.promote import promote_artifact_bundle
|
||||
from trader_training.replay import build_splits
|
||||
from trader_training.schemas import FEATURE_ORDER, LATEST_STRESS_SPLIT, MODEL_OUTPUTS, OUTPUT_MAPPING, TRAINING_SPLITS, VALIDATION_LOCKED_SPLIT
|
||||
@@ -33,6 +35,19 @@ class TrainingContractTest(unittest.TestCase):
|
||||
self.assertEqual(set(fields), set(OUTPUT_MAPPING[model_name]))
|
||||
self.assertEqual([f"prediction[{idx}]" for idx in range(len(fields))], [OUTPUT_MAPPING[model_name][field] for field in fields])
|
||||
|
||||
def test_entry_feature_screen_prefers_actual_plan_edge(self) -> None:
|
||||
dataset = pd.DataFrame(
|
||||
{
|
||||
"long_expected_net_edge_bps": [20.0],
|
||||
"short_expected_net_edge_bps": [15.0],
|
||||
"long_actual_plan_net_edge_bps": [-3.0],
|
||||
"short_actual_plan_net_edge_bps": [4.0],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertEqual("long_actual_plan_net_edge_bps", _screen_edge_column(dataset, "LONG"))
|
||||
self.assertEqual("short_actual_plan_net_edge_bps", _screen_edge_column(dataset, "SHORT"))
|
||||
|
||||
def test_split_builder_uses_locked_validation_contract(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
data_root = Path(tmp)
|
||||
@@ -90,7 +105,7 @@ class TrainingContractTest(unittest.TestCase):
|
||||
self.assertEqual(120_000, first["time_to_stop_ms"])
|
||||
self.assertAlmostEqual(-8.0, first["gross_edge_bps"])
|
||||
|
||||
def test_entry_label_uses_max_future_edge_not_fixed_target_hit(self) -> None:
|
||||
def test_entry_label_uses_price_plan_outcome_not_max_future_edge(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
data_root = Path(tmp)
|
||||
run_root = data_root / "trader-v4" / "runs" / "unit-entry"
|
||||
@@ -167,11 +182,200 @@ class TrainingContractTest(unittest.TestCase):
|
||||
labels = pd.read_parquet(run_root / "label" / "entry_labels.parquet")
|
||||
row = labels[labels["sample_id"].eq("s0") & labels["side"].eq("LONG")].iloc[0]
|
||||
self.assertEqual(0, row["target_hit"])
|
||||
self.assertEqual(1, row["entry_target"])
|
||||
self.assertEqual(0, row["entry_target"])
|
||||
self.assertEqual(ENTRY_LABEL_METHOD, row["label_method"])
|
||||
self.assertAlmostEqual(13.5, row["expected_net_edge_bps"], places=6)
|
||||
self.assertAlmostEqual(-6.5, row["expected_net_edge_bps"], places=6)
|
||||
self.assertAlmostEqual(row["gross_edge_bps"] - row["cost_bps"], row["expected_net_edge_bps"], places=6)
|
||||
self.assertAlmostEqual(row["mfe_bps"] - row["cost_bps"], row["max_achievable_net_edge_bps"], places=6)
|
||||
|
||||
def test_entry_opportunity_label_keeps_plan_outcome_for_pm(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
data_root = Path(tmp)
|
||||
run_root = data_root / "trader-v4" / "runs" / "unit-entry-opportunity"
|
||||
feature_path = run_root / "feature" / "feature_frame.parquet"
|
||||
replay_path = run_root / "replay" / "replay_1m.parquet"
|
||||
plan_path = run_root / "label" / "price_plan_context.json"
|
||||
config_path = data_root / "label_config.json"
|
||||
feature_path.parent.mkdir(parents=True)
|
||||
replay_path.parent.mkdir(parents=True)
|
||||
|
||||
times = pd.date_range("2026-01-01", periods=5, freq="min", tz="UTC")
|
||||
pd.DataFrame(
|
||||
{
|
||||
"sample_id": ["s0"],
|
||||
"symbol": "BTC-USDT-PERP",
|
||||
"event_time": [times[0]],
|
||||
"open_time_ms": [0],
|
||||
"split_id": "fit_inner",
|
||||
"walk_forward_fold": 0,
|
||||
"data_quality_flag": "OK",
|
||||
"spread_bps": 1.0,
|
||||
"spread_rank_24h_pct": 0.1,
|
||||
"realized_vol_15m_bps": 2.0,
|
||||
}
|
||||
).to_parquet(feature_path, index=False)
|
||||
pd.DataFrame(
|
||||
{
|
||||
"event_time": times,
|
||||
"open_time_ms": np.arange(5, dtype=np.int64) * 60_000,
|
||||
"symbol": "BTC-USDT-PERP",
|
||||
"open": [100.0] * 5,
|
||||
"high": [100.0, 100.05, 100.19, 100.20, 100.0],
|
||||
"low": [100.0, 99.99, 99.98, 99.97, 100.0],
|
||||
"close": [100.0] * 5,
|
||||
"spread_bps": 1.0,
|
||||
}
|
||||
).to_parquet(replay_path, index=False)
|
||||
write_json(
|
||||
config_path,
|
||||
{
|
||||
"entry": {
|
||||
"max_hold_minutes": 3,
|
||||
"target_bps": 50.0,
|
||||
"stop_bps": 50.0,
|
||||
"min_expected_net_edge_bps": 3.0,
|
||||
"target_method": "OPPORTUNITY_MFE_V1",
|
||||
}
|
||||
},
|
||||
)
|
||||
write_json(
|
||||
plan_path,
|
||||
{
|
||||
"pricePlanId": "unit-plan",
|
||||
"pricePlanConfigHash": "unit-hash",
|
||||
"targetDistanceBps": 50.0,
|
||||
"stopDistanceBps": 50.0,
|
||||
"maxHoldMinutes": 3,
|
||||
"costBps": 6.5,
|
||||
"entryLabelMethod": ENTRY_LABEL_METHOD,
|
||||
"entryTargetMethod": "OPPORTUNITY_MFE_V1",
|
||||
},
|
||||
)
|
||||
|
||||
build_entry_labels(
|
||||
Namespace(
|
||||
data_root=data_root,
|
||||
run_id="unit-entry-opportunity",
|
||||
feature_path=feature_path,
|
||||
replay_path=replay_path,
|
||||
label_config_path=config_path,
|
||||
cost_config_path=None,
|
||||
price_plan_context_path=plan_path,
|
||||
)
|
||||
)
|
||||
|
||||
labels = pd.read_parquet(run_root / "label" / "entry_labels.parquet")
|
||||
row = labels[labels["sample_id"].eq("s0") & labels["side"].eq("LONG")].iloc[0]
|
||||
self.assertEqual(0, row["target_hit"])
|
||||
self.assertEqual(1, row["entry_target"])
|
||||
self.assertEqual("OPPORTUNITY_MFE_V1", row["label_method"])
|
||||
self.assertAlmostEqual(row["mfe_bps"] - row["cost_bps"], row["expected_net_edge_bps"], places=6)
|
||||
self.assertAlmostEqual(-6.5, row["gross_edge_bps"] - row["cost_bps"], places=6)
|
||||
|
||||
def test_entry_quality_label_rejects_untradable_opportunity(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
data_root = Path(tmp)
|
||||
run_root = data_root / "trader-v4" / "runs" / "unit-entry-quality"
|
||||
feature_path = run_root / "feature" / "feature_frame.parquet"
|
||||
replay_path = run_root / "replay" / "replay_1m.parquet"
|
||||
plan_path = run_root / "label" / "price_plan_context.json"
|
||||
config_path = data_root / "label_config.json"
|
||||
feature_path.parent.mkdir(parents=True)
|
||||
replay_path.parent.mkdir(parents=True)
|
||||
|
||||
times = pd.date_range("2026-01-01", periods=5, freq="min", tz="UTC")
|
||||
pd.DataFrame(
|
||||
{
|
||||
"sample_id": ["s0"],
|
||||
"symbol": "BTC-USDT-PERP",
|
||||
"event_time": [times[0]],
|
||||
"open_time_ms": [0],
|
||||
"split_id": "fit_inner",
|
||||
"walk_forward_fold": 0,
|
||||
"data_quality_flag": "OK",
|
||||
"spread_bps": 1.0,
|
||||
"spread_rank_24h_pct": 0.1,
|
||||
"realized_vol_15m_bps": 2.0,
|
||||
}
|
||||
).to_parquet(feature_path, index=False)
|
||||
pd.DataFrame(
|
||||
{
|
||||
"event_time": times,
|
||||
"open_time_ms": np.arange(5, dtype=np.int64) * 60_000,
|
||||
"symbol": "BTC-USDT-PERP",
|
||||
"open": [100.0] * 5,
|
||||
"high": [100.0, 100.05, 100.19, 100.20, 100.0],
|
||||
"low": [100.0, 99.99, 99.98, 99.97, 100.0],
|
||||
"close": [100.0] * 5,
|
||||
"spread_bps": 1.0,
|
||||
}
|
||||
).to_parquet(replay_path, index=False)
|
||||
write_json(
|
||||
config_path,
|
||||
{
|
||||
"entry": {
|
||||
"max_hold_minutes": 3,
|
||||
"target_bps": 50.0,
|
||||
"stop_bps": 50.0,
|
||||
"min_expected_net_edge_bps": 3.0,
|
||||
"min_plan_net_edge_bps": 0.0,
|
||||
"max_entry_mae_bps": 12.0,
|
||||
"target_method": "OPPORTUNITY_QUALITY_V1",
|
||||
}
|
||||
},
|
||||
)
|
||||
write_json(
|
||||
plan_path,
|
||||
{
|
||||
"pricePlanId": "unit-plan",
|
||||
"pricePlanConfigHash": "unit-hash",
|
||||
"targetDistanceBps": 50.0,
|
||||
"stopDistanceBps": 50.0,
|
||||
"maxHoldMinutes": 3,
|
||||
"costBps": 6.5,
|
||||
"entryLabelMethod": ENTRY_LABEL_METHOD,
|
||||
"entryTargetMethod": "OPPORTUNITY_QUALITY_V1",
|
||||
},
|
||||
)
|
||||
|
||||
build_entry_labels(
|
||||
Namespace(
|
||||
data_root=data_root,
|
||||
run_id="unit-entry-quality",
|
||||
feature_path=feature_path,
|
||||
replay_path=replay_path,
|
||||
label_config_path=config_path,
|
||||
cost_config_path=None,
|
||||
price_plan_context_path=plan_path,
|
||||
)
|
||||
)
|
||||
|
||||
labels = pd.read_parquet(run_root / "label" / "entry_labels.parquet")
|
||||
row = labels[labels["sample_id"].eq("s0") & labels["side"].eq("LONG")].iloc[0]
|
||||
self.assertEqual("OPPORTUNITY_QUALITY_V1", row["label_method"])
|
||||
self.assertGreater(row["expected_net_edge_bps"], 3.0)
|
||||
self.assertLess(row["actual_plan_net_edge_bps"], 0.0)
|
||||
self.assertEqual(0, row["entry_target"])
|
||||
|
||||
def test_l1_snapshot_diff_ofi_uses_quote_notional_and_signed_ask_side(self) -> None:
|
||||
bid_part, ask_part = l1_snapshot_diff_ofi_quote(
|
||||
pd.Series([101.0, 101.0, 100.5]),
|
||||
pd.Series([2.0, 3.0, 4.0]),
|
||||
pd.Series([102.0, 101.5, 102.5]),
|
||||
pd.Series([5.0, 6.0, 7.0]),
|
||||
pd.Series([100.0, 101.0, 101.0]),
|
||||
pd.Series([1.5, 2.0, 3.0]),
|
||||
pd.Series([102.0, 102.0, 101.5]),
|
||||
pd.Series([4.0, 5.0, 6.0]),
|
||||
)
|
||||
|
||||
self.assertAlmostEqual(202.0, bid_part.iloc[0])
|
||||
self.assertAlmostEqual(-102.0, ask_part.iloc[0])
|
||||
self.assertAlmostEqual(101.0, bid_part.iloc[1])
|
||||
self.assertAlmostEqual(-609.0, ask_part.iloc[1])
|
||||
self.assertAlmostEqual(-303.0, bid_part.iloc[2])
|
||||
self.assertAlmostEqual(609.0, ask_part.iloc[2])
|
||||
|
||||
def test_exported_onnx_accepts_java_feature_shape(self) -> None:
|
||||
import onnxruntime as ort
|
||||
|
||||
|
||||
Reference in New Issue
Block a user