Improve Trader V4 training pipeline
Align entry labels with max future edge, tune direction labeling, and harden regression evaluation. Add training diagnostics, price-plan search, feature screening, and nonlinear benchmark scripts.
This commit is contained in:
@@ -8,7 +8,7 @@ from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LogisticRegression, Ridge
|
||||
from sklearn.linear_model import HuberRegressor, LogisticRegression
|
||||
from sklearn.metrics import accuracy_score, log_loss, mean_absolute_error, roc_auc_score
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
@@ -217,7 +217,7 @@ def _fit_head(item, x_train, x_tune, train: pd.DataFrame, tune: pd.DataFrame, sc
|
||||
if kind == "regression":
|
||||
y_train = pd.to_numeric(train[target], errors="coerce").fillna(0.0).to_numpy()
|
||||
y_val = pd.to_numeric(tune[target], errors="coerce").fillna(0.0).to_numpy()
|
||||
model = Ridge(alpha=1.0)
|
||||
model = HuberRegressor(alpha=0.001, epsilon=1.35, max_iter=500)
|
||||
model.fit(x_train, y_train)
|
||||
pred = model.predict(x_tune)
|
||||
weight, bias = _fold_scaler(model.coef_.reshape(1, -1).T, np.array([model.intercept_]), scaler)
|
||||
@@ -297,10 +297,15 @@ def _binary_metrics(y_train: np.ndarray, y_val: np.ndarray, proba: np.ndarray) -
|
||||
def _regression_metrics(y_train: np.ndarray, y_val: np.ndarray, pred: np.ndarray) -> dict[str, Any]:
|
||||
mae = float(mean_absolute_error(y_val, pred))
|
||||
train_std = float(np.std(y_train))
|
||||
train_median = float(np.median(y_train)) if len(y_train) else 0.0
|
||||
constant_mae = float(mean_absolute_error(y_val, np.full(len(y_val), train_median))) if len(y_val) else 0.0
|
||||
metrics: dict[str, Any] = {
|
||||
"mae": mae,
|
||||
"constant_mae": constant_mae,
|
||||
"train_target_median": train_median,
|
||||
"train_target_std": train_std,
|
||||
"mae_vs_train_std_ratio": float(mae / train_std) if train_std > 0 else None,
|
||||
"mae_vs_constant_ratio": float(mae / constant_mae) if constant_mae > 0 else None,
|
||||
}
|
||||
return _with_quality(metrics)
|
||||
|
||||
@@ -314,8 +319,8 @@ def _with_quality(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
reasons.append("brier_not_better_than_constant")
|
||||
if "brier_multiclass" in metrics and metrics["brier_multiclass"] >= metrics["constant_brier_multiclass"]:
|
||||
reasons.append("brier_not_better_than_constant")
|
||||
if "mae" in metrics and metrics.get("train_target_std") is not None and metrics["train_target_std"] > 0 and metrics["mae"] > metrics["train_target_std"]:
|
||||
reasons.append("mae_above_train_target_std")
|
||||
if "mae" in metrics and metrics.get("constant_mae") is not None and metrics["constant_mae"] > 0 and metrics["mae"] >= metrics["constant_mae"]:
|
||||
reasons.append("mae_not_better_than_constant")
|
||||
if "top10_hit_rate" in metrics and "all_hit_rate" in metrics and metrics["top10_hit_rate"] <= metrics["all_hit_rate"]:
|
||||
reasons.append("top10_not_better_than_all")
|
||||
metrics["quality_status"] = "REJECTED" if reasons else "PASS"
|
||||
@@ -360,7 +365,7 @@ def _predict_frame(frame: pd.DataFrame, results: list[HeadResult], include_label
|
||||
for idx, field in enumerate(MODEL_OUTPUTS["DIRECTION"]):
|
||||
out[field] = values[:, idx]
|
||||
elif result.kind == "sigmoid":
|
||||
out[result.field] = (1.0 / (1.0 + np.exp(-values))).reshape(-1)
|
||||
out[result.field] = _sigmoid(values).reshape(-1)
|
||||
else:
|
||||
out[result.field] = values.reshape(-1)
|
||||
if include_labels and result.kind != "softmax" and result.target_name and result.target_name in frame.columns:
|
||||
@@ -374,6 +379,11 @@ def _softmax(values: np.ndarray) -> np.ndarray:
|
||||
return exp / exp.sum(axis=1, keepdims=True)
|
||||
|
||||
|
||||
def _sigmoid(values: np.ndarray) -> np.ndarray:
|
||||
clipped = np.clip(values, -50.0, 50.0)
|
||||
return 1.0 / (1.0 + np.exp(-clipped))
|
||||
|
||||
|
||||
def _write_training_report(path: Path, model_name: str, metrics: dict[str, Any], quality_status: str, quality_reasons: list[str]) -> None:
|
||||
lines = [
|
||||
"# Trader Model Training Report",
|
||||
|
||||
Reference in New Issue
Block a user