Add Direction label and PM probe diagnostics
This commit is contained in:
@@ -15,17 +15,20 @@ if str(TRAINING_ROOT) not in sys.path:
|
||||
|
||||
from trader_training.onnx_export import LinearHead, export_heads
|
||||
from trader_training.conditional_entry_probe import probe_conditional_entry_training
|
||||
from trader_training.direction_opportunity_dataset import _opportunity_labels
|
||||
from trader_training.dynamic_exit_search import search_dynamic_exit_plans
|
||||
from trader_training.entry_condition_pair_screen import screen_entry_condition_pairs
|
||||
from trader_training.entry_feature_screen import _bucket_edges, _screen_edge_column
|
||||
from trader_training.entry_mae_label_diagnostic import diagnose_entry_mae_labels
|
||||
from trader_training.io_utils import read_json, write_json
|
||||
from trader_training.labels import ENTRY_LABEL_METHOD, _path_stats_for_group, build_entry_labels
|
||||
from trader_training.nonlinear_pm_probe import _expanded_threshold_candidates
|
||||
from trader_training.ofi_feature_experiment import _load_entry_dataset, l1_snapshot_diff_ofi_quote
|
||||
from trader_training.promote import promote_artifact_bundle
|
||||
from trader_training.replay import build_splits
|
||||
from trader_training.schemas import FEATURE_ORDER, LATEST_STRESS_SPLIT, MODEL_OUTPUTS, OUTPUT_MAPPING, TRAINING_SPLITS, VALIDATION_LOCKED_SPLIT
|
||||
from trader_training.training import TARGETS, _head_train_mask
|
||||
from trader_training.diagnostics import _label_summary
|
||||
|
||||
|
||||
class TrainingContractTest(unittest.TestCase):
|
||||
@@ -40,6 +43,21 @@ class TrainingContractTest(unittest.TestCase):
|
||||
self.assertEqual(set(fields), set(OUTPUT_MAPPING[model_name]))
|
||||
self.assertEqual([f"prediction[{idx}]" for idx in range(len(fields))], [OUTPUT_MAPPING[model_name][field] for field in fields])
|
||||
|
||||
def test_nonlinear_pm_probe_expands_low_probability_thresholds(self) -> None:
|
||||
candidates = _expanded_threshold_candidates()
|
||||
|
||||
self.assertIn(
|
||||
{
|
||||
"long_open_prob": 0.2,
|
||||
"short_open_prob": 0.2,
|
||||
"min_entry_prob": 0.05,
|
||||
"max_market_risk_prob": 0.45,
|
||||
"min_expected_edge_bps": -5.0,
|
||||
"min_direction_margin": 0.0,
|
||||
},
|
||||
candidates,
|
||||
)
|
||||
|
||||
def test_entry_feature_screen_prefers_actual_plan_edge(self) -> None:
|
||||
dataset = pd.DataFrame(
|
||||
{
|
||||
@@ -79,6 +97,51 @@ class TrainingContractTest(unittest.TestCase):
|
||||
self.assertEqual("ALL_FIT_ROWS", default_filter)
|
||||
self.assertEqual([True, True, True, True], default_mask.tolist())
|
||||
|
||||
def test_direction_opportunity_labels_choose_clear_path_opportunity(self) -> None:
|
||||
labels = _opportunity_labels(
|
||||
np.array([45.0, 10.0, 45.0, 42.0, np.nan]),
|
||||
np.array([20.0, 50.0, 43.0, 48.0, 50.0]),
|
||||
opportunity_bps=40.0,
|
||||
min_advantage_bps=5.0,
|
||||
)
|
||||
|
||||
self.assertEqual([1, 0, 0, 0, 0], labels["long_target"].tolist())
|
||||
self.assertEqual([0, 1, 0, 1, 1], labels["short_target"].tolist())
|
||||
self.assertEqual([0, 0, 1, 0, 0], labels["neutral_target"].tolist())
|
||||
|
||||
def test_diagnostics_reads_actual_training_dataset_labels(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
root = Path(tmp)
|
||||
dataset_dir = root / "dataset"
|
||||
dataset_dir.mkdir(parents=True)
|
||||
pd.DataFrame(
|
||||
{
|
||||
"sample_id": ["s1", "s2"],
|
||||
"split_id": ["fit_inner", "fit_inner"],
|
||||
"long_target": [1, 0],
|
||||
"short_target": [0, 0],
|
||||
"neutral_target": [0, 1],
|
||||
"future_return_bps": [5.0, -1.0],
|
||||
}
|
||||
).to_parquet(dataset_dir / "direction_train.parquet", index=False)
|
||||
pd.DataFrame(
|
||||
{
|
||||
"sample_id": ["s1", "s2"],
|
||||
"split_id": ["fit_inner", "fit_inner"],
|
||||
"long_entry_target": [1, 0],
|
||||
"short_entry_target": [0, 1],
|
||||
"long_actual_plan_net_edge_bps": [8.0, -6.0],
|
||||
"short_actual_plan_net_edge_bps": [-5.0, 7.0],
|
||||
}
|
||||
).to_parquet(dataset_dir / "entry_train.parquet", index=False)
|
||||
|
||||
summary = _label_summary(root)
|
||||
|
||||
self.assertEqual("dataset/direction_train.parquet", summary["fit_inner"]["direction"]["source"])
|
||||
self.assertEqual({"LONG": 0.5, "SHORT": 0.0, "NEUTRAL": 0.5}, summary["fit_inner"]["direction"]["label_ratio"])
|
||||
self.assertEqual("dataset/entry_train.parquet", summary["fit_inner"]["entry"]["source"])
|
||||
self.assertEqual(0.5, summary["fit_inner"]["entry"]["target_rate_by_side"]["LONG"])
|
||||
|
||||
def test_entry_feature_screen_keeps_zero_inflated_event_features(self) -> None:
|
||||
values = np.concatenate((np.zeros(5000), np.linspace(1.0, 100.0, 500)))
|
||||
edges = _bucket_edges(values)
|
||||
|
||||
Reference in New Issue
Block a user