From 38a728c00bdf3fdb509ed0c73be86f5e98683ac5 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 27 Jun 2026 23:39:40 +0800 Subject: [PATCH] Expose state Continue Huber tuning --- .../22_train_state_continue_experiment.py | 2 ++ .../tests/test_state_continue_experiment.py | 16 ++++++++++ .../state_continue_experiment.py | 32 +++++++++++++++++-- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/training/scripts/22_train_state_continue_experiment.py b/training/scripts/22_train_state_continue_experiment.py index 1f5028b..002ba87 100644 --- a/training/scripts/22_train_state_continue_experiment.py +++ b/training/scripts/22_train_state_continue_experiment.py @@ -15,6 +15,8 @@ def main() -> None: parser.add_argument("--max-rows-per-split", type=int, default=0) parser.add_argument("--regressor-kind", choices=["huber", "ridge"], default="huber") parser.add_argument("--ridge-alpha", type=float, default=10.0) + parser.add_argument("--huber-alpha", type=float, default=0.001) + parser.add_argument("--huber-epsilon", type=float, default=1.35) parser.add_argument("--huber-max-iter", type=int, default=1000) parser.add_argument("--regression-target-clip-bps", type=float, default=0.0) args = parser.parse_args() diff --git a/training/tests/test_state_continue_experiment.py b/training/tests/test_state_continue_experiment.py index bfa5ef6..765bd7b 100644 --- a/training/tests/test_state_continue_experiment.py +++ b/training/tests/test_state_continue_experiment.py @@ -184,6 +184,22 @@ class StateContinueExperimentTest(unittest.TestCase): self.assertEqual(8, len(predictions)) self.assertIn("time_in_position_minutes", predictions.columns) + huber_metrics, _ = _train_side_models( + frame, + "LONG", + [*FEATURE_ORDER, *STATE_FEATURES], + regressor_kind="huber", + huber_alpha=0.002, + huber_epsilon=1.10, + huber_max_iter=100, + regression_target_clip_bps=4.0, + ) + + self.assertEqual("huber", huber_metrics["regressor_kind"]) + self.assertEqual(0.002, huber_metrics["huber_alpha"]) + self.assertEqual(1.10, huber_metrics["huber_epsilon"]) + self.assertEqual(4.0, huber_metrics["regression_target_clip_bps"]) + if __name__ == "__main__": unittest.main() diff --git a/training/trader_training/state_continue_experiment.py b/training/trader_training/state_continue_experiment.py index 2c27397..b0ca781 100644 --- a/training/trader_training/state_continue_experiment.py +++ b/training/trader_training/state_continue_experiment.py @@ -52,15 +52,19 @@ def run_state_continue_experiment(args: Any) -> None: ages = _parse_ages(args.ages_minutes) regressor_kind = getattr(args, "regressor_kind", "huber") ridge_alpha = float(getattr(args, "ridge_alpha", 10.0)) + huber_alpha = float(getattr(args, "huber_alpha", 0.001)) + huber_epsilon = float(getattr(args, "huber_epsilon", 1.35)) huber_max_iter = int(getattr(args, "huber_max_iter", 1000)) regression_target_clip_bps = float(getattr(args, "regression_target_clip_bps", 0.0)) logging.info( - "trader.training.state_continue_experiment_started runId=%s baselineRunId=%s ages=%s regressorKind=%s ridgeAlpha=%s huberMaxIter=%s regressionTargetClipBps=%s", + "trader.training.state_continue_experiment_started runId=%s baselineRunId=%s ages=%s regressorKind=%s ridgeAlpha=%s huberAlpha=%s huberEpsilon=%s huberMaxIter=%s regressionTargetClipBps=%s", args.run_id, args.baseline_run_id, ages, regressor_kind, ridge_alpha, + huber_alpha, + huber_epsilon, huber_max_iter, regression_target_clip_bps, ) @@ -101,6 +105,8 @@ def run_state_continue_experiment(args: Any) -> None: dataset_hash, regressor_kind, ridge_alpha, + huber_alpha, + huber_epsilon, huber_max_iter, regression_target_clip_bps, ) @@ -133,7 +139,17 @@ def run_state_continue_experiment(args: Any) -> None: side_frame = state_frame[state_frame["position_side"].eq(side)].copy() for feature_set_name, feature_columns in feature_sets.items(): key = f"{side.lower()}_{feature_set_name}" - result, predictions = _train_side_models(side_frame, side, feature_columns, regressor_kind, ridge_alpha, huber_max_iter, regression_target_clip_bps) + result, predictions = _train_side_models( + side_frame, + side, + feature_columns, + regressor_kind, + ridge_alpha, + huber_alpha, + huber_epsilon, + huber_max_iter, + regression_target_clip_bps, + ) results[key] = result predictions["side"] = side predictions["feature_set"] = feature_set_name @@ -433,6 +449,8 @@ def _train_side_models( feature_columns: list[str], regressor_kind: str = "huber", ridge_alpha: float = 10.0, + huber_alpha: float = 0.001, + huber_epsilon: float = 1.35, huber_max_iter: int = 1000, regression_target_clip_bps: float = 0.0, ) -> tuple[dict[str, Any], pd.DataFrame]: @@ -451,7 +469,7 @@ def _train_side_models( clf.fit(x_train, y_train_cls) reg_max_iter = huber_max_iter if regressor_kind == "huber": - reg = HuberRegressor(alpha=0.001, epsilon=1.35, max_iter=reg_max_iter) + reg = HuberRegressor(alpha=huber_alpha, epsilon=huber_epsilon, max_iter=reg_max_iter) elif regressor_kind == "ridge": reg = Ridge(alpha=ridge_alpha) else: @@ -498,6 +516,8 @@ def _train_side_models( n_iter = getattr(reg, "n_iter_", None) metrics["regressor_kind"] = regressor_kind metrics["ridge_alpha"] = ridge_alpha if regressor_kind == "ridge" else None + metrics["huber_alpha"] = huber_alpha if regressor_kind == "huber" else None + metrics["huber_epsilon"] = huber_epsilon if regressor_kind == "huber" else None metrics["regressor_iterations"] = int(n_iter) if n_iter is not None else 0 metrics["regressor_max_iter"] = reg_max_iter metrics["regressor_converged"] = True if n_iter is None else 0 <= metrics["regressor_iterations"] < reg_max_iter @@ -545,6 +565,8 @@ def _source_manifest( dataset_hash: str, regressor_kind: str, ridge_alpha: float, + huber_alpha: float, + huber_epsilon: float, huber_max_iter: int, regression_target_clip_bps: float, ) -> dict[str, Any]: @@ -561,6 +583,8 @@ def _source_manifest( "min_continue_edge_bps": min_continue_edge_bps, "regressor_kind": regressor_kind, "ridge_alpha": ridge_alpha if regressor_kind == "ridge" else None, + "huber_alpha": huber_alpha if regressor_kind == "huber" else None, + "huber_epsilon": huber_epsilon if regressor_kind == "huber" else None, "huber_max_iter": huber_max_iter if regressor_kind == "huber" else None, "regression_target_clip_bps": regression_target_clip_bps if regression_target_clip_bps > 0 else None, "dataset_hash_sha256": dataset_hash, @@ -666,6 +690,8 @@ def _report(args: Any, baseline_root: Path, manifest: dict[str, Any], results: d f"- row_count: `{manifest['row_count']}`", f"- ages_minutes: `{manifest['ages_minutes']}`", f"- regressor_kind: `{manifest['regressor_kind']}`", + f"- huber_alpha: `{manifest['huber_alpha']}`", + f"- huber_epsilon: `{manifest['huber_epsilon']}`", f"- huber_max_iter: `{manifest['huber_max_iter']}`", f"- regression_target_clip_bps: `{manifest['regression_target_clip_bps']}`", f"- continue_horizon_minutes: `{manifest['continue_horizon_minutes']}`",