@@ -24,8 +24,8 @@ class MinimizationMethod(Enum):
2424
2525
2626class LossGradHess :
27- def __init__ (self , X , y , alpha , intercept ):
28- # type: (np.ndarray, np.ndarray, float, bool) -> None
27+ def __init__ (self , X , y , alpha , intercept , sample_weight = None ):
28+ # type: (np.ndarray, np.ndarray, float, bool, Optional[np.ndarray] ) -> None
2929 """
3030 In the generation of design matrix, if intercept option is True, the first column of design matrix is of 1's,
3131 which means that the first coefficient corresponds to the intercept term. This setup is a little different
@@ -37,13 +37,17 @@ def __init__(self, X, y, alpha, intercept):
3737 self .X = X
3838 self .alpha = alpha
3939 self .intercept = intercept
40+ if sample_weight is None :
41+ self .sample_weight = np .ones (len (y ))
42+ else :
43+ self .sample_weight = np .asarray (sample_weight )
44+ self .weight_sum = np .sum (self .sample_weight )
4045
4146 def loss (self , coefs ):
4247 # type: (np.ndarray) -> np.ndarray
4348 yz = self .y * np .dot (self .X , coefs )
4449 # P(label= 1 or -1 |X) = 1 / (1+exp(-yz))
45- # Log Likelihood = Sum over log ( 1/(1 + exp(-yz)) )
46- loss_val = - np .sum (log_expit (yz ))
50+ loss_val = - np .sum (self .sample_weight * log_expit (yz ))
4751 if self .intercept :
4852 loss_val += 0.5 * self .alpha * np .dot (coefs [1 :], coefs [1 :])
4953 else :
@@ -58,8 +62,7 @@ def grad(self, coefs):
5862
5963 # if y = 1, we want z to be close to 1; if y = -1, we want z to be close to 0.
6064 z0 = (z - 1 ) * self .y
61-
62- grad = np .dot (self .X .T , z0 )
65+ grad = np .dot (self .X .T , self .sample_weight * z0 )
6366
6467 if self .intercept :
6568 grad [1 :] += self .alpha * coefs [1 :]
@@ -74,19 +77,14 @@ def hess(self, coefs):
7477 Compute the Hessian of the logistic loss.
7578
7679 The Hessian of logistic regression is: H = X.T @ diag(w) @ X + alpha * I
77- where w = p * (1 - p) and p = sigmoid(X @ coefs).
80+ where w = sample_weight * p * (1 - p) and p = sigmoid(X @ coefs).
7881
7982 This is used by trust-constr for faster convergence (Newton-like steps).
8083 """
8184 z = np .dot (self .X , coefs )
8285 p = expit (z )
83- # Weights for the Hessian: p * (1 - p)
84- # This is always positive, making the Hessian positive semi-definite
85- weights = p * (1 - p )
86-
87- # H = X.T @ diag(weights) @ X
88- # Efficient computation: (X.T * weights) @ X
89- H = np .dot (self .X .T * weights , self .X )
86+ hess_weights = self .sample_weight * p * (1 - p )
87+ H = np .dot (self .X .T * hess_weights , self .X )
9088
9189 # Add regularization term
9290 if self .intercept :
@@ -259,8 +257,8 @@ def __init__(
259257 self .random_state = random_state
260258 self .verbose = verbose
261259
262- def _fit (self , X , y , initial_guess = None ):
263- # type: (pd.DataFrame, pd.Series, Optional[np.ndarray], bool ) -> None
260+ def _fit (self , X , y , sample_weight = None , initial_guess = None ):
261+ # type: (pd.DataFrame, pd.Series, Optional[np.ndarray], Optional[np.ndarray] ) -> None
264262 constraint = [] # type: Union[Dict[str, Any], List, LinearConstraint]
265263 if self .monotonicity != Monotonicity .none .value :
266264 # This function returns G and h such that G * beta <= 0 is the constraint we want:
@@ -298,7 +296,7 @@ def _fit(self, X, y, initial_guess=None):
298296 else :
299297 x0 = initial_guess
300298
301- lgh = LossGradHess (design_X , y , 1 / self .C , self .intercept )
299+ lgh = LossGradHess (design_X , y , 1 / self .C , self .intercept , sample_weight )
302300
303301 # Determine whether to use Hessian
304302 # - 'auto': use Hessian only for trust-constr with no monotonicity constraints (3-4x faster)
@@ -344,8 +342,8 @@ def get_additional_columns(self, X):
344342 additional_columns = np .delete (X , self .input_score_column_index , axis = 1 )
345343 return additional_columns
346344
347- def _stratified_subsample (self , X , y , n_samples , n_strata = 10 ):
348- # type: (np.ndarray, np.ndarray, int, int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]
345+ def _stratified_subsample (self , X , y , n_samples , sample_weight = None , n_strata = 10 ):
346+ # type: (np.ndarray, np.ndarray, int, Optional[np.ndarray], int) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray] , np.ndarray]
349347 """
350348 Create a stratified subsample based on quantiles of the input scores.
351349
@@ -358,13 +356,15 @@ def _stratified_subsample(self, X, y, n_samples, n_strata=10):
358356 y : array-like of shape (n_samples,)
359357 n_samples : int
360358 Target number of samples in the subsample
359+ sample_weight : array-like of shape (n_samples,), optional
361360 n_strata : int
362361 Number of strata (quantile bins) to use
363362
364363 Returns
365364 -------
366365 X_sub : array-like
367- y_sub : array-like
366+ y_sub : array-like
367+ weight_sub : array-like or None
368368 indices : array-like
369369 Indices of selected samples
370370 """
@@ -412,11 +412,12 @@ def _stratified_subsample(self, X, y, n_samples, n_strata=10):
412412 else :
413413 X_sub = X [selected_indices , :]
414414 y_sub = y [selected_indices ]
415+ weight_sub = sample_weight [selected_indices ] if sample_weight is not None else None
415416
416- return X_sub , y_sub , selected_indices
417+ return X_sub , y_sub , weight_sub , selected_indices
417418
418- def _random_subsample (self , X , y , n_samples ):
419- # type: (np.ndarray, np.ndarray, int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]
419+ def _random_subsample (self , X , y , n_samples , sample_weight = None ):
420+ # type: (np.ndarray, np.ndarray, int, Optional[np.ndarray] ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray] , np.ndarray]
420421 """Simple random subsampling (original behavior)."""
421422 indices = self .random_state_ .choice (
422423 np .arange (len (X )), n_samples , replace = False
@@ -425,7 +426,8 @@ def _random_subsample(self, X, y, n_samples):
425426 X_sub , y_sub = X .iloc [indices ], y [indices ]
426427 else :
427428 X_sub , y_sub = X [indices , :], y [indices ]
428- return X_sub , y_sub , indices
429+ weight_sub = sample_weight [indices ] if sample_weight is not None else None
430+ return X_sub , y_sub , weight_sub , indices
429431
430432 def _check_convergence (self , coefs_old , coefs_new ):
431433 # type: (np.ndarray, np.ndarray) -> bool
@@ -443,11 +445,28 @@ def _check_convergence(self, coefs_old, coefs_new):
443445
444446 return change < self .early_stopping_tol
445447
446- def fit (self , X , y ):
448+ def fit (self , X , y , sample_weight = None ):
447449 # type: (pd.DataFrame, Union[np.ndarray, pd.Series], Optional[np.ndarray]) -> None
448450 """
449451 Fit the linear spline logistic regression model.
450452
453+ Parameters
454+ ----------
455+ X : array-like of shape (n_samples, n_features)
456+ Training data.
457+ y : array-like of shape (n_samples,)
458+ Target values (binary: 0 or 1).
459+ sample_weight : array-like of shape (n_samples,), optional
460+ Individual weights for each sample. If not provided, all samples
461+ are given equal weight.
462+
463+ Returns
464+ -------
465+ self : object
466+ Fitted estimator.
467+
468+ Notes
469+ -----
451470 Supports three fitting modes:
452471 1. Direct fitting (default): Fit on full data
453472 2. Two-stage fitting (legacy): Uses `two_stage_fitting_initial_size`
@@ -478,10 +497,21 @@ def fit(self, X, y):
478497 )
479498 y = y [:, 0 ]
480499
500+ # Validate sample_weight
501+ if sample_weight is not None :
502+ sample_weight = np .asarray (sample_weight )
503+ if sample_weight .shape [0 ] != X .shape [0 ]:
504+ raise ValueError (
505+ f"sample_weight has { sample_weight .shape [0 ]} samples, "
506+ f"but X has { X .shape [0 ]} samples"
507+ )
508+ if np .any (sample_weight < 0 ):
509+ raise ValueError ("sample_weight must be non-negative" )
510+
481511 if self .n_knots and self .knots is None :
482512 # only n_knots given so we create knots
483513 self .n_knots_ = min ([self .n_knots , X .shape [0 ]])
484- self .knots_ = _fit_knots (self .get_input_scores (X ), self .n_knots_ )
514+ self .knots_ = _fit_knots (self .get_input_scores (X ), self .n_knots_ , sample_weight )
485515 elif self .knots is not None and self .n_knots is None :
486516 # knots are given so we just take them
487517 self .knots_ = np .array (self .knots )
@@ -496,13 +526,13 @@ def fit(self, X, y):
496526 # Determine fitting mode
497527 if self .progressive_fitting_fractions is not None :
498528 # Mode 3: Progressive fitting (recommended)
499- self ._progressive_fit (X , y )
529+ self ._progressive_fit (X , y , sample_weight )
500530 elif self .two_stage_fitting_initial_size is not None :
501531 # Mode 2: Legacy two-stage fitting
502- self ._two_stage_fit (X , y )
532+ self ._two_stage_fit (X , y , sample_weight )
503533 else :
504534 # Mode 1: Direct fitting on full data
505- self ._fit (X , y , initial_guess = None )
535+ self ._fit (X , y , sample_weight = sample_weight , initial_guess = None )
506536 self .fitting_history_ .append ({
507537 'stage' : 0 ,
508538 'n_samples' : n_samples ,
@@ -513,8 +543,8 @@ def fit(self, X, y):
513543
514544 return self
515545
516- def _two_stage_fit (self , X , y ):
517- # type: (np.ndarray, np.ndarray) -> None
546+ def _two_stage_fit (self , X , y , sample_weight = None ):
547+ # type: (np.ndarray, np.ndarray, Optional[np.ndarray] ) -> None
518548 """Legacy two-stage fitting for backward compatibility."""
519549 n_samples = X .shape [0 ]
520550
@@ -532,15 +562,15 @@ def _two_stage_fit(self, X, y):
532562
533563 # Stage 1: Fit on subsample
534564 if self .stratified_sampling :
535- X_sub , y_sub , _ = self ._stratified_subsample (
536- X , y , self .two_stage_fitting_initial_size
565+ X_sub , y_sub , weight_sub , _ = self ._stratified_subsample (
566+ X , y , self .two_stage_fitting_initial_size , sample_weight
537567 )
538568 else :
539- X_sub , y_sub , _ = self ._random_subsample (
540- X , y , self .two_stage_fitting_initial_size
569+ X_sub , y_sub , weight_sub , _ = self ._random_subsample (
570+ X , y , self .two_stage_fitting_initial_size , sample_weight
541571 )
542572
543- self ._fit (X_sub , y_sub , initial_guess = None )
573+ self ._fit (X_sub , y_sub , sample_weight = weight_sub , initial_guess = None )
544574 self .fitting_history_ .append ({
545575 'stage' : 0 ,
546576 'n_samples' : self .two_stage_fitting_initial_size ,
@@ -555,7 +585,7 @@ def _two_stage_fit(self, X, y):
555585
556586 # Stage 2: Fit on full data with warm start
557587 coefs_stage1 = self .coefficients_ .copy ()
558- self ._fit (X , y , initial_guess = coefs_stage1 )
588+ self ._fit (X , y , sample_weight = sample_weight , initial_guess = coefs_stage1 )
559589 self .fitting_history_ .append ({
560590 'stage' : 1 ,
561591 'n_samples' : n_samples ,
@@ -567,8 +597,8 @@ def _two_stage_fit(self, X, y):
567597 if self .verbose :
568598 print (f"Stage 2: { n_samples } samples, { self .result_ .nit } iterations" )
569599
570- def _progressive_fit (self , X , y ):
571- # type: (np.ndarray, np.ndarray) -> None
600+ def _progressive_fit (self , X , y , sample_weight = None ):
601+ # type: (np.ndarray, np.ndarray, Optional[np.ndarray] ) -> None
572602 """
573603 Progressive fitting with gradual sample increase.
574604
@@ -590,12 +620,12 @@ def _progressive_fit(self, X, y):
590620
591621 for stage , frac in enumerate (fractions ):
592622 n_stage_samples = int (n_samples * frac )
593- n_stage_samples = max (n_stage_samples , self .knots_ .shape [0 ] + 10 ) # Ensure enough samples
623+ n_stage_samples = max (n_stage_samples , self .knots_ .shape [0 ] + 10 )
594624 n_stage_samples = min (n_stage_samples , n_samples )
595625
596626 if frac >= 1.0 :
597627 # Use full data for final stage
598- X_stage , y_stage = X , y
628+ X_stage , y_stage , weight_stage = X , y , sample_weight
599629 else :
600630 # Warn if subsample too small for knots
601631 samples_per_knot = n_stage_samples / (self .knots_ .shape [0 ] + 1 )
@@ -604,15 +634,15 @@ def _progressive_fit(self, X, y):
604634
605635 # Subsample for intermediate stages
606636 if self .stratified_sampling :
607- X_stage , y_stage , _ = self ._stratified_subsample (
608- X , y , n_stage_samples
637+ X_stage , y_stage , weight_stage , _ = self ._stratified_subsample (
638+ X , y , n_stage_samples , sample_weight
609639 )
610640 else :
611- X_stage , y_stage , _ = self ._random_subsample (
612- X , y , n_stage_samples
641+ X_stage , y_stage , weight_stage , _ = self ._random_subsample (
642+ X , y , n_stage_samples , sample_weight
613643 )
614644
615- self ._fit (X_stage , y_stage , initial_guess = prev_coefs )
645+ self ._fit (X_stage , y_stage , sample_weight = weight_stage , initial_guess = prev_coefs )
616646
617647 self .fitting_history_ .append ({
618648 'stage' : stage ,
@@ -631,9 +661,9 @@ def _progressive_fit(self, X, y):
631661 if frac < 1.0 and self ._check_convergence (prev_coefs , self .coefficients_ ):
632662 if self .verbose :
633663 print (f"Early stopping: coefficients converged at stage { stage + 1 } " )
634- # Still fit on full data for final refinement, but with fewer iterations expected
664+ # Still fit on full data for final refinement
635665 prev_coefs = self .coefficients_ .copy ()
636- self ._fit (X , y , initial_guess = prev_coefs )
666+ self ._fit (X , y , sample_weight = sample_weight , initial_guess = prev_coefs )
637667 self .fitting_history_ .append ({
638668 'stage' : stage + 1 ,
639669 'n_samples' : n_samples ,
0 commit comments