Add Entry opportunity training diagnostics
This commit is contained in:
@@ -89,7 +89,7 @@ def train_small_models(args: Any) -> None:
|
||||
model_manifest: dict[str, Any] = {}
|
||||
for model_name, spec in TARGETS.items():
|
||||
dataset = read_parquet(root / "dataset" / spec["dataset"])
|
||||
if model_name == "ENTRY" and _conditional_entry_enabled(args):
|
||||
if model_name == "ENTRY" and _conditional_entry_source(args) == "direction_label":
|
||||
dataset = _attach_direction_fit_labels(root, dataset)
|
||||
if args.max_rows and len(dataset) > args.max_rows:
|
||||
dataset = dataset.sort_values("event_time").tail(args.max_rows).copy()
|
||||
@@ -189,7 +189,17 @@ def train_small_models(args: Any) -> None:
|
||||
|
||||
|
||||
def _conditional_entry_enabled(args: Any) -> bool:
|
||||
return bool(getattr(args, "conditional_entry_direction_labels", False))
|
||||
return _conditional_entry_source(args) != "none"
|
||||
|
||||
|
||||
def _conditional_entry_source(args: Any) -> str:
|
||||
source = str(getattr(args, "conditional_entry_source", "none") or "none").strip().lower()
|
||||
if bool(getattr(args, "conditional_entry_direction_labels", False)):
|
||||
source = "direction_label"
|
||||
allowed = {"none", "direction_label", "side_opportunity"}
|
||||
if source not in allowed:
|
||||
raise ValueError(f"unsupported conditional Entry source: {source}")
|
||||
return source
|
||||
|
||||
|
||||
def _attach_direction_fit_labels(root: Path, entry_dataset: pd.DataFrame) -> pd.DataFrame:
|
||||
@@ -213,19 +223,29 @@ def _attach_direction_fit_labels(root: Path, entry_dataset: pd.DataFrame) -> pd.
|
||||
|
||||
|
||||
def _head_train_mask(model_name: str, head_name: str, train: pd.DataFrame, args: Any) -> tuple[np.ndarray, str]:
|
||||
if model_name != "ENTRY" or not _conditional_entry_enabled(args):
|
||||
source = _conditional_entry_source(args)
|
||||
if model_name != "ENTRY" or source == "none":
|
||||
return np.ones(len(train), dtype=bool), "ALL_FIT_ROWS"
|
||||
if head_name.startswith("long_"):
|
||||
condition_column = "long_target"
|
||||
filter_name = "DIRECTION_LABEL_LONG_FIT_ROWS"
|
||||
side = "LONG"
|
||||
direction_label_column = "long_target"
|
||||
opportunity_column = "long_max_achievable_net_edge_bps"
|
||||
elif head_name.startswith("short_"):
|
||||
condition_column = "short_target"
|
||||
filter_name = "DIRECTION_LABEL_SHORT_FIT_ROWS"
|
||||
side = "SHORT"
|
||||
direction_label_column = "short_target"
|
||||
opportunity_column = "short_max_achievable_net_edge_bps"
|
||||
else:
|
||||
return np.ones(len(train), dtype=bool), "ALL_FIT_ROWS"
|
||||
if condition_column not in train.columns:
|
||||
raise ValueError(f"conditional Entry training requires {condition_column} for head {head_name}")
|
||||
mask = pd.to_numeric(train[condition_column], errors="coerce").fillna(0).astype(int).eq(1).to_numpy()
|
||||
if source == "direction_label":
|
||||
if direction_label_column not in train.columns:
|
||||
raise ValueError(f"conditional Entry training requires {direction_label_column} for head {head_name}")
|
||||
mask = pd.to_numeric(train[direction_label_column], errors="coerce").fillna(0).astype(int).eq(1).to_numpy()
|
||||
return mask, f"DIRECTION_LABEL_{side}_FIT_ROWS"
|
||||
threshold = float(getattr(args, "conditional_entry_opportunity_bps", 40.0) or 40.0)
|
||||
if opportunity_column not in train.columns:
|
||||
raise ValueError(f"side opportunity Entry training requires {opportunity_column} for head {head_name}")
|
||||
mask = pd.to_numeric(train[opportunity_column], errors="coerce").ge(threshold).fillna(False).to_numpy()
|
||||
filter_name = f"SIDE_OPPORTUNITY_{side}_GE_{threshold:g}_BPS_FIT_ROWS"
|
||||
return mask, filter_name
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user