Expose state Continue Huber tuning

This commit is contained in:
Codex
2026-06-27 23:39:40 +08:00
parent c463be1741
commit 38a728c00b
3 changed files with 47 additions and 3 deletions
@@ -15,6 +15,8 @@ def main() -> None:
parser.add_argument("--max-rows-per-split", type=int, default=0)
parser.add_argument("--regressor-kind", choices=["huber", "ridge"], default="huber")
parser.add_argument("--ridge-alpha", type=float, default=10.0)
parser.add_argument("--huber-alpha", type=float, default=0.001)
parser.add_argument("--huber-epsilon", type=float, default=1.35)
parser.add_argument("--huber-max-iter", type=int, default=1000)
parser.add_argument("--regression-target-clip-bps", type=float, default=0.0)
args = parser.parse_args()
@@ -184,6 +184,22 @@ class StateContinueExperimentTest(unittest.TestCase):
self.assertEqual(8, len(predictions))
self.assertIn("time_in_position_minutes", predictions.columns)
huber_metrics, _ = _train_side_models(
frame,
"LONG",
[*FEATURE_ORDER, *STATE_FEATURES],
regressor_kind="huber",
huber_alpha=0.002,
huber_epsilon=1.10,
huber_max_iter=100,
regression_target_clip_bps=4.0,
)
self.assertEqual("huber", huber_metrics["regressor_kind"])
self.assertEqual(0.002, huber_metrics["huber_alpha"])
self.assertEqual(1.10, huber_metrics["huber_epsilon"])
self.assertEqual(4.0, huber_metrics["regression_target_clip_bps"])
if __name__ == "__main__":
unittest.main()
@@ -52,15 +52,19 @@ def run_state_continue_experiment(args: Any) -> None:
ages = _parse_ages(args.ages_minutes)
regressor_kind = getattr(args, "regressor_kind", "huber")
ridge_alpha = float(getattr(args, "ridge_alpha", 10.0))
huber_alpha = float(getattr(args, "huber_alpha", 0.001))
huber_epsilon = float(getattr(args, "huber_epsilon", 1.35))
huber_max_iter = int(getattr(args, "huber_max_iter", 1000))
regression_target_clip_bps = float(getattr(args, "regression_target_clip_bps", 0.0))
logging.info(
"trader.training.state_continue_experiment_started runId=%s baselineRunId=%s ages=%s regressorKind=%s ridgeAlpha=%s huberMaxIter=%s regressionTargetClipBps=%s",
"trader.training.state_continue_experiment_started runId=%s baselineRunId=%s ages=%s regressorKind=%s ridgeAlpha=%s huberAlpha=%s huberEpsilon=%s huberMaxIter=%s regressionTargetClipBps=%s",
args.run_id,
args.baseline_run_id,
ages,
regressor_kind,
ridge_alpha,
huber_alpha,
huber_epsilon,
huber_max_iter,
regression_target_clip_bps,
)
@@ -101,6 +105,8 @@ def run_state_continue_experiment(args: Any) -> None:
dataset_hash,
regressor_kind,
ridge_alpha,
huber_alpha,
huber_epsilon,
huber_max_iter,
regression_target_clip_bps,
)
@@ -133,7 +139,17 @@ def run_state_continue_experiment(args: Any) -> None:
side_frame = state_frame[state_frame["position_side"].eq(side)].copy()
for feature_set_name, feature_columns in feature_sets.items():
key = f"{side.lower()}_{feature_set_name}"
result, predictions = _train_side_models(side_frame, side, feature_columns, regressor_kind, ridge_alpha, huber_max_iter, regression_target_clip_bps)
result, predictions = _train_side_models(
side_frame,
side,
feature_columns,
regressor_kind,
ridge_alpha,
huber_alpha,
huber_epsilon,
huber_max_iter,
regression_target_clip_bps,
)
results[key] = result
predictions["side"] = side
predictions["feature_set"] = feature_set_name
@@ -433,6 +449,8 @@ def _train_side_models(
feature_columns: list[str],
regressor_kind: str = "huber",
ridge_alpha: float = 10.0,
huber_alpha: float = 0.001,
huber_epsilon: float = 1.35,
huber_max_iter: int = 1000,
regression_target_clip_bps: float = 0.0,
) -> tuple[dict[str, Any], pd.DataFrame]:
@@ -451,7 +469,7 @@ def _train_side_models(
clf.fit(x_train, y_train_cls)
reg_max_iter = huber_max_iter
if regressor_kind == "huber":
reg = HuberRegressor(alpha=0.001, epsilon=1.35, max_iter=reg_max_iter)
reg = HuberRegressor(alpha=huber_alpha, epsilon=huber_epsilon, max_iter=reg_max_iter)
elif regressor_kind == "ridge":
reg = Ridge(alpha=ridge_alpha)
else:
@@ -498,6 +516,8 @@ def _train_side_models(
n_iter = getattr(reg, "n_iter_", None)
metrics["regressor_kind"] = regressor_kind
metrics["ridge_alpha"] = ridge_alpha if regressor_kind == "ridge" else None
metrics["huber_alpha"] = huber_alpha if regressor_kind == "huber" else None
metrics["huber_epsilon"] = huber_epsilon if regressor_kind == "huber" else None
metrics["regressor_iterations"] = int(n_iter) if n_iter is not None else 0
metrics["regressor_max_iter"] = reg_max_iter
metrics["regressor_converged"] = True if n_iter is None else 0 <= metrics["regressor_iterations"] < reg_max_iter
@@ -545,6 +565,8 @@ def _source_manifest(
dataset_hash: str,
regressor_kind: str,
ridge_alpha: float,
huber_alpha: float,
huber_epsilon: float,
huber_max_iter: int,
regression_target_clip_bps: float,
) -> dict[str, Any]:
@@ -561,6 +583,8 @@ def _source_manifest(
"min_continue_edge_bps": min_continue_edge_bps,
"regressor_kind": regressor_kind,
"ridge_alpha": ridge_alpha if regressor_kind == "ridge" else None,
"huber_alpha": huber_alpha if regressor_kind == "huber" else None,
"huber_epsilon": huber_epsilon if regressor_kind == "huber" else None,
"huber_max_iter": huber_max_iter if regressor_kind == "huber" else None,
"regression_target_clip_bps": regression_target_clip_bps if regression_target_clip_bps > 0 else None,
"dataset_hash_sha256": dataset_hash,
@@ -666,6 +690,8 @@ def _report(args: Any, baseline_root: Path, manifest: dict[str, Any], results: d
f"- row_count: `{manifest['row_count']}`",
f"- ages_minutes: `{manifest['ages_minutes']}`",
f"- regressor_kind: `{manifest['regressor_kind']}`",
f"- huber_alpha: `{manifest['huber_alpha']}`",
f"- huber_epsilon: `{manifest['huber_epsilon']}`",
f"- huber_max_iter: `{manifest['huber_max_iter']}`",
f"- regression_target_clip_bps: `{manifest['regression_target_clip_bps']}`",
f"- continue_horizon_minutes: `{manifest['continue_horizon_minutes']}`",