Add Direction label and PM probe diagnostics

This commit is contained in:
Codex
2026-06-28 09:00:15 +08:00
parent 5ad77ffe90
commit e8420f76fe
8 changed files with 634 additions and 16 deletions
+63
View File
@@ -15,17 +15,20 @@ if str(TRAINING_ROOT) not in sys.path:
from trader_training.onnx_export import LinearHead, export_heads
from trader_training.conditional_entry_probe import probe_conditional_entry_training
from trader_training.direction_opportunity_dataset import _opportunity_labels
from trader_training.dynamic_exit_search import search_dynamic_exit_plans
from trader_training.entry_condition_pair_screen import screen_entry_condition_pairs
from trader_training.entry_feature_screen import _bucket_edges, _screen_edge_column
from trader_training.entry_mae_label_diagnostic import diagnose_entry_mae_labels
from trader_training.io_utils import read_json, write_json
from trader_training.labels import ENTRY_LABEL_METHOD, _path_stats_for_group, build_entry_labels
from trader_training.nonlinear_pm_probe import _expanded_threshold_candidates
from trader_training.ofi_feature_experiment import _load_entry_dataset, l1_snapshot_diff_ofi_quote
from trader_training.promote import promote_artifact_bundle
from trader_training.replay import build_splits
from trader_training.schemas import FEATURE_ORDER, LATEST_STRESS_SPLIT, MODEL_OUTPUTS, OUTPUT_MAPPING, TRAINING_SPLITS, VALIDATION_LOCKED_SPLIT
from trader_training.training import TARGETS, _head_train_mask
from trader_training.diagnostics import _label_summary
class TrainingContractTest(unittest.TestCase):
@@ -40,6 +43,21 @@ class TrainingContractTest(unittest.TestCase):
self.assertEqual(set(fields), set(OUTPUT_MAPPING[model_name]))
self.assertEqual([f"prediction[{idx}]" for idx in range(len(fields))], [OUTPUT_MAPPING[model_name][field] for field in fields])
def test_nonlinear_pm_probe_expands_low_probability_thresholds(self) -> None:
candidates = _expanded_threshold_candidates()
self.assertIn(
{
"long_open_prob": 0.2,
"short_open_prob": 0.2,
"min_entry_prob": 0.05,
"max_market_risk_prob": 0.45,
"min_expected_edge_bps": -5.0,
"min_direction_margin": 0.0,
},
candidates,
)
def test_entry_feature_screen_prefers_actual_plan_edge(self) -> None:
dataset = pd.DataFrame(
{
@@ -79,6 +97,51 @@ class TrainingContractTest(unittest.TestCase):
self.assertEqual("ALL_FIT_ROWS", default_filter)
self.assertEqual([True, True, True, True], default_mask.tolist())
def test_direction_opportunity_labels_choose_clear_path_opportunity(self) -> None:
labels = _opportunity_labels(
np.array([45.0, 10.0, 45.0, 42.0, np.nan]),
np.array([20.0, 50.0, 43.0, 48.0, 50.0]),
opportunity_bps=40.0,
min_advantage_bps=5.0,
)
self.assertEqual([1, 0, 0, 0, 0], labels["long_target"].tolist())
self.assertEqual([0, 1, 0, 1, 1], labels["short_target"].tolist())
self.assertEqual([0, 0, 1, 0, 0], labels["neutral_target"].tolist())
def test_diagnostics_reads_actual_training_dataset_labels(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
dataset_dir = root / "dataset"
dataset_dir.mkdir(parents=True)
pd.DataFrame(
{
"sample_id": ["s1", "s2"],
"split_id": ["fit_inner", "fit_inner"],
"long_target": [1, 0],
"short_target": [0, 0],
"neutral_target": [0, 1],
"future_return_bps": [5.0, -1.0],
}
).to_parquet(dataset_dir / "direction_train.parquet", index=False)
pd.DataFrame(
{
"sample_id": ["s1", "s2"],
"split_id": ["fit_inner", "fit_inner"],
"long_entry_target": [1, 0],
"short_entry_target": [0, 1],
"long_actual_plan_net_edge_bps": [8.0, -6.0],
"short_actual_plan_net_edge_bps": [-5.0, 7.0],
}
).to_parquet(dataset_dir / "entry_train.parquet", index=False)
summary = _label_summary(root)
self.assertEqual("dataset/direction_train.parquet", summary["fit_inner"]["direction"]["source"])
self.assertEqual({"LONG": 0.5, "SHORT": 0.0, "NEUTRAL": 0.5}, summary["fit_inner"]["direction"]["label_ratio"])
self.assertEqual("dataset/entry_train.parquet", summary["fit_inner"]["entry"]["source"])
self.assertEqual(0.5, summary["fit_inner"]["entry"]["target_rate_by_side"]["LONG"])
def test_entry_feature_screen_keeps_zero_inflated_event_features(self) -> None:
values = np.concatenate((np.zeros(5000), np.linspace(1.0, 100.0, 500)))
edges = _bucket_edges(values)