Improve Trader V4 training pipeline

Align entry labels with max future edge, tune direction labeling, and harden regression evaluation.

Add training diagnostics, price-plan search, feature screening, and nonlinear benchmark scripts.
This commit is contained in:
Codex
2026-06-27 19:57:29 +08:00
parent e58e4a5572
commit 9acb3460a1
27 changed files with 2059 additions and 341 deletions
+15 -5
View File
@@ -8,7 +8,7 @@ from typing import Any
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.linear_model import HuberRegressor, LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, mean_absolute_error, roc_auc_score
from sklearn.preprocessing import StandardScaler
@@ -217,7 +217,7 @@ def _fit_head(item, x_train, x_tune, train: pd.DataFrame, tune: pd.DataFrame, sc
if kind == "regression":
y_train = pd.to_numeric(train[target], errors="coerce").fillna(0.0).to_numpy()
y_val = pd.to_numeric(tune[target], errors="coerce").fillna(0.0).to_numpy()
model = Ridge(alpha=1.0)
model = HuberRegressor(alpha=0.001, epsilon=1.35, max_iter=500)
model.fit(x_train, y_train)
pred = model.predict(x_tune)
weight, bias = _fold_scaler(model.coef_.reshape(1, -1).T, np.array([model.intercept_]), scaler)
@@ -297,10 +297,15 @@ def _binary_metrics(y_train: np.ndarray, y_val: np.ndarray, proba: np.ndarray) -
def _regression_metrics(y_train: np.ndarray, y_val: np.ndarray, pred: np.ndarray) -> dict[str, Any]:
mae = float(mean_absolute_error(y_val, pred))
train_std = float(np.std(y_train))
train_median = float(np.median(y_train)) if len(y_train) else 0.0
constant_mae = float(mean_absolute_error(y_val, np.full(len(y_val), train_median))) if len(y_val) else 0.0
metrics: dict[str, Any] = {
"mae": mae,
"constant_mae": constant_mae,
"train_target_median": train_median,
"train_target_std": train_std,
"mae_vs_train_std_ratio": float(mae / train_std) if train_std > 0 else None,
"mae_vs_constant_ratio": float(mae / constant_mae) if constant_mae > 0 else None,
}
return _with_quality(metrics)
@@ -314,8 +319,8 @@ def _with_quality(metrics: dict[str, Any]) -> dict[str, Any]:
reasons.append("brier_not_better_than_constant")
if "brier_multiclass" in metrics and metrics["brier_multiclass"] >= metrics["constant_brier_multiclass"]:
reasons.append("brier_not_better_than_constant")
if "mae" in metrics and metrics.get("train_target_std") is not None and metrics["train_target_std"] > 0 and metrics["mae"] > metrics["train_target_std"]:
reasons.append("mae_above_train_target_std")
if "mae" in metrics and metrics.get("constant_mae") is not None and metrics["constant_mae"] > 0 and metrics["mae"] >= metrics["constant_mae"]:
reasons.append("mae_not_better_than_constant")
if "top10_hit_rate" in metrics and "all_hit_rate" in metrics and metrics["top10_hit_rate"] <= metrics["all_hit_rate"]:
reasons.append("top10_not_better_than_all")
metrics["quality_status"] = "REJECTED" if reasons else "PASS"
@@ -360,7 +365,7 @@ def _predict_frame(frame: pd.DataFrame, results: list[HeadResult], include_label
for idx, field in enumerate(MODEL_OUTPUTS["DIRECTION"]):
out[field] = values[:, idx]
elif result.kind == "sigmoid":
out[result.field] = (1.0 / (1.0 + np.exp(-values))).reshape(-1)
out[result.field] = _sigmoid(values).reshape(-1)
else:
out[result.field] = values.reshape(-1)
if include_labels and result.kind != "softmax" and result.target_name and result.target_name in frame.columns:
@@ -374,6 +379,11 @@ def _softmax(values: np.ndarray) -> np.ndarray:
return exp / exp.sum(axis=1, keepdims=True)
def _sigmoid(values: np.ndarray) -> np.ndarray:
clipped = np.clip(values, -50.0, 50.0)
return 1.0 / (1.0 + np.exp(-clipped))
def _write_training_report(path: Path, model_name: str, metrics: dict[str, Any], quality_status: str, quality_reasons: list[str]) -> None:
lines = [
"# Trader Model Training Report",