|
15 | 15 | import pandas as pd |
16 | 16 | import matplotlib.pyplot as plt |
17 | 17 | from sklearn.preprocessing import LabelEncoder |
18 | | -from sklearn.cross_validation import train_test_split |
19 | 18 | from sklearn.preprocessing import StandardScaler |
20 | 19 | from sklearn.decomposition import PCA |
21 | 20 | from sklearn.linear_model import LogisticRegression |
22 | 21 | from sklearn.pipeline import Pipeline |
23 | | -from sklearn.cross_validation import StratifiedKFold |
24 | | -from sklearn.cross_validation import cross_val_score |
25 | | -from sklearn.learning_curve import learning_curve |
26 | | -from sklearn.learning_curve import validation_curve |
27 | | -from sklearn.grid_search import GridSearchCV |
28 | 22 | from sklearn.tree import DecisionTreeClassifier |
29 | 23 | from sklearn.svm import SVC |
30 | 24 | from sklearn.metrics import confusion_matrix |
|
38 | 32 | from sklearn.metrics import accuracy_score |
39 | 33 | from scipy import interp |
40 | 34 |
|
| 35 | +# for sklearn 0.18's alternative syntax |
| 36 | +from distutils.version import LooseVersion as Version |
| 37 | +from sklearn import __version__ as sklearn_version |
| 38 | +if Version(sklearn_version) < '0.18': |
| 39 | + from sklearn.grid_search import train_test_split |
| 40 | + from sklearn.cross_validation import StratifiedKFold |
| 41 | + from sklearn.cross_validation import cross_val_score |
| 42 | + from sklearn.learning_curve import learning_curve |
| 43 | + from sklearn.learning_curve import validation_curve |
| 44 | + from sklearn.grid_search import GridSearchCV |
| 45 | +else: |
| 46 | + from sklearn.model_selection import train_test_split |
| 47 | + from sklearn.model_selection import StratifiedKFold |
| 48 | + from sklearn.model_selection import cross_val_score |
| 49 | + from sklearn.model_selection import learning_curve |
| 50 | + from sklearn.model_selection import validation_curve |
| 51 | + from sklearn.model_selection import GridSearchCV |
| 52 | + |
41 | 53 | ############################################################################# |
42 | 54 | print(50 * '=') |
43 | 55 | print('Section: Loading the Breast Cancer Wisconsin dataset') |
|
83 | 95 | print('Section: K-fold cross-validation') |
84 | 96 | print(50 * '-') |
85 | 97 |
|
86 | | -kfold = StratifiedKFold(y=y_train, |
87 | | - n_folds=10, |
88 | | - random_state=1) |
| 98 | +if Version(sklearn_version) < '0.18': |
| 99 | + kfold = StratifiedKFold(y=y_train, |
| 100 | + n_folds=10, |
| 101 | + random_state=1) |
| 102 | +else: |
| 103 | + kfold = StratifiedKFold(n_splits=10, |
| 104 | + random_state=1).split(X_train, y_train) |
89 | 105 |
|
90 | 106 | scores = [] |
91 | 107 | for k, (train, test) in enumerate(kfold): |
92 | 108 | pipe_lr.fit(X_train[train], y_train[train]) |
93 | 109 | score = pipe_lr.score(X_train[test], y_train[test]) |
94 | 110 | scores.append(score) |
95 | | - print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, |
| 111 | + print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k + 1, |
96 | 112 | np.bincount(y_train[train]), score)) |
97 | 113 |
|
98 | 114 | print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) |
99 | 115 |
|
100 | 116 | print('Using StratifiedKFold') |
101 | | -kfold = StratifiedKFold(y=y_train, |
102 | | - n_folds=10, |
103 | | - random_state=1) |
| 117 | +if Version(sklearn_version) < '0.18': |
| 118 | + kfold = StratifiedKFold(y=y_train, |
| 119 | + n_folds=10, |
| 120 | + random_state=1) |
| 121 | +else: |
| 122 | + kfold = StratifiedKFold(n_splits=10, |
| 123 | + random_state=1).split(X_train, y_train) |
104 | 124 |
|
105 | 125 | scores = [] |
106 | 126 | for k, (train, test) in enumerate(kfold): |
107 | 127 | pipe_lr.fit(X_train[train], y_train[train]) |
108 | 128 | score = pipe_lr.score(X_train[test], y_train[test]) |
109 | 129 | scores.append(score) |
110 | | - print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, |
| 130 | + print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k + 1, |
111 | 131 | np.bincount(y_train[train]), score)) |
112 | 132 |
|
113 | 133 | print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) |
|
134 | 154 | ('clf', LogisticRegression(penalty='l2', random_state=0))]) |
135 | 155 |
|
136 | 156 | train_sizes, train_scores, test_scores =\ |
137 | | - learning_curve(estimator=pipe_lr, |
138 | | - X=X_train, |
139 | | - y=y_train, |
140 | | - train_sizes=np.linspace(0.1, 1.0, 10), |
141 | | - cv=10, |
142 | | - n_jobs=1) |
| 157 | + learning_curve(estimator=pipe_lr, |
| 158 | + X=X_train, |
| 159 | + y=y_train, |
| 160 | + train_sizes=np.linspace(0.1, 1.0, 10), |
| 161 | + cv=10, |
| 162 | + n_jobs=1) |
143 | 163 |
|
144 | 164 | train_mean = np.mean(train_scores, axis=1) |
145 | 165 | train_std = np.std(train_scores, axis=1) |
|
182 | 202 |
|
183 | 203 | param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0] |
184 | 204 | train_scores, test_scores = validation_curve( |
185 | | - estimator=pipe_lr, |
186 | | - X=X_train, |
187 | | - y=y_train, |
188 | | - param_name='clf__C', |
189 | | - param_range=param_range, |
190 | | - cv=10) |
| 205 | + estimator=pipe_lr, |
| 206 | + X=X_train, |
| 207 | + y=y_train, |
| 208 | + param_name='clf__C', |
| 209 | + param_range=param_range, |
| 210 | + cv=10) |
191 | 211 |
|
192 | 212 | train_mean = np.mean(train_scores, axis=1) |
193 | 213 | train_std = np.std(train_scores, axis=1) |
|
345 | 365 |
|
346 | 366 | X_train2 = X_train[:, [4, 14]] |
347 | 367 |
|
348 | | -cv = StratifiedKFold(y_train, n_folds=3, random_state=1) |
| 368 | +if Version(sklearn_version) < '0.18': |
| 369 | + cv = StratifiedKFold(y_train, |
| 370 | + n_folds=3, |
| 371 | + random_state=1) |
| 372 | + |
| 373 | +else: |
| 374 | + cv = list(StratifiedKFold(n_splits=3, |
| 375 | + random_state=1).split(X_train, y_train)) |
349 | 376 |
|
350 | 377 | fig = plt.figure(figsize=(7, 5)) |
351 | 378 |
|
|
367 | 394 | tpr, |
368 | 395 | lw=1, |
369 | 396 | label='ROC fold %d (area = %0.2f)' |
370 | | - % (i+1, roc_auc)) |
| 397 | + % (i + 1, roc_auc)) |
371 | 398 |
|
372 | 399 | plt.plot([0, 1], |
373 | 400 | [0, 1], |
|
0 commit comments