Handle sparse event buckets in entry screening

This commit is contained in:
Codex
2026-06-28 00:53:54 +08:00
parent 340d1dd91b
commit 1fd46ff3c9
2 changed files with 21 additions and 2 deletions
@@ -146,7 +146,18 @@ def _bucket_edges(values: np.ndarray) -> np.ndarray:
edges = np.quantile(clean, quantiles)
edges = np.unique(edges)
if edges.size < 3:
return np.array([], dtype="float64")
non_zero = clean[clean != 0.0]
if non_zero.size < 300:
return np.array([], dtype="float64")
# 突破/扫单类特征常常绝大多数为 0。普通十分位会全挤在 0,
# 这里单独保留“没有事件”和“有事件强弱”两类桶,避免漏掉稀有但可能有用的信号。
event_edges = np.unique(np.quantile(non_zero, np.linspace(0.0, 1.0, 6)))
if event_edges.size < 2:
return np.array([-np.inf, 0.0, np.inf], dtype="float64")
edges = np.unique(np.concatenate(([-np.inf, 0.0], event_edges[1:-1], [np.inf]))).astype("float64")
if edges.size < 3:
return np.array([], dtype="float64")
return edges
edges[0] = -np.inf
edges[-1] = np.inf
return edges