Skip to content

Commit 7153bec

Browse files
author
Yam Peleg
committed
hunga bunga
1 parent 1d80350 commit 7153bec

7 files changed

Lines changed: 74 additions & 158 deletions

File tree

example.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11

22

3+
from hunga_bunga import HungaBungaClassifier, HungaBungaRegressor
4+
from hunga_bunga.regression import gen_reg_data
35
from sklearn import datasets
4-
iris = datasets.load_iris()
5-
x, y = iris.data, iris.target
66

77

8+
# ---------- Getting The Data ----------
89

10+
iris = datasets.load_iris()
11+
X_c, y_c = iris.data, iris.target
12+
X_r, y_r = gen_reg_data(10, 3, 100, 3, sum, 0.3)
913

10-
from hunga_bunga import HungaBungaClassifier, HungaBungaRegressor
14+
15+
16+
# ---------- Classification ----------
1117

1218
clf = HungaBungaClassifier()
13-
clf.fit(x, y)
14-
clf.predict(x)
19+
clf.fit(X_c, y_r)
20+
print(clf.predict(X_c))
21+
1522

1623

24+
# ---------- Regression ----------
1725

26+
mdl = HungaBungaRegressor()
27+
mdl.fit(X_c, y_r)
28+
print(mdl.predict(X_c))
1829

1930

hunga_bunga/classification.py

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
12
import warnings
23
warnings.filterwarnings('ignore')
4+
35
import numpy as np
46
from sklearn import datasets
5-
from sklearn.linear_model import SGDClassifier, LogisticRegression, \
6-
Perceptron, PassiveAggressiveClassifier
7-
7+
from sklearn.linear_model import SGDClassifier, LogisticRegression, Perceptron, PassiveAggressiveClassifier
88
from sklearn.preprocessing import StandardScaler
99
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
1010
from sklearn.svm import SVC, LinearSVC, NuSVC
@@ -158,7 +158,6 @@
158158
'criterion': ['gini', 'entropy']})
159159
]
160160

161-
162161
tree_models_n_params_small = [
163162

164163
(RandomForestClassifier,
@@ -175,60 +174,36 @@
175174
]
176175

177176

178-
179-
def run_linear_models(x, y, small = True, normalize_x = True):
180-
return big_loop(linear_models_n_params_small if small else linear_models_n_params,
181-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
182-
183-
def run_svm_models(x, y, small = True, normalize_x = True):
184-
return big_loop(svm_models_n_params_small if small else svm_models_n_params,
185-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
186-
187-
def run_neighbor_models(x, y, normalize_x = True):
188-
return big_loop(neighbor_models_n_params,
189-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
190-
191-
def run_gaussian_models(x, y, normalize_x = True):
192-
return big_loop(gaussianprocess_models_n_params,
193-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
194-
195-
def run_nn_models(x, y, small = True, normalize_x = True):
196-
return big_loop(nn_models_n_params_small if small else nn_models_n_params,
197-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
198-
199-
def run_tree_models(x, y, small = True, normalize_x = True):
200-
return big_loop(tree_models_n_params_small if small else tree_models_n_params,
201-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)
202-
203-
def run_all(x, y, small = False, normalize_x = True, n_jobs=cpu_count()-1, brain=False):
204-
205-
all_params = (linear_models_n_params_small if small else linear_models_n_params) + \
206-
(nn_models_n_params_small if small else nn_models_n_params) + \
207-
([] if small else gaussianprocess_models_n_params) + \
208-
neighbor_models_n_params + \
209-
(svm_models_n_params_small if small else svm_models_n_params) + \
210-
(tree_models_n_params_small if small else tree_models_n_params)
211-
212-
return big_loop(all_params,
213-
StandardScaler().fit_transform(x) if normalize_x else x, y,
214-
isClassification=True, n_jobs=n_jobs, verbose=False, brain=brain)
177+
def run_all_classifiers(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False):
178+
all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
179+
return main_loop(all_params, StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True, n_jobs=n_jobs, verbose=False, brain=brain)
215180

216181

217182
class HungaBungaClassifier(ClassifierMixin):
218-
def __init__(self, brain=False):
183+
def __init__(self, brain=False, test_size = 0.2, n_splits = 5, random_state=None, upsample=True, scoring=None, verbose=True, normalize_x = True, n_jobs =cpu_count() - 1):
219184
self.model = None
220185
self.brain = brain
186+
self.test_size = test_size
187+
self.n_splits = n_splits
188+
self.random_state = random_state
189+
self.upsample = upsample
190+
self.scoring = None
191+
self.verbose = verbose
192+
self.n_jobs = n_jobs
193+
self.normalize_x = normalize_x
194+
super(HungaBungaClassifier, self).__init__()
195+
221196
def fit(self, x, y):
222-
self.model = run_all(x, y, normalize_x=True, brain=self.brain)[0]
197+
self.model = run_all_classifiers(x, y, normalize_x=self.normalize_x, test_size=self.test_size, n_splits=self.n_splits, upsample=self.upsample, scoring=self.scoring, verbose=self.verbose, brain=self.brain, n_jobs=self.n_jobs)[0]
198+
223199
def predict(self, x):
224200
return self.model.predict(x)
225201

226202

227203
if __name__ == '__main__':
228204
iris = datasets.load_iris()
229-
x, y = iris.data, iris.target
230-
run_all(x, y, n_jobs=1)
231-
a = HungaBungaClassifier()
232-
a.fit(x, y)
233-
a.predict(x)
205+
X, y = iris.data, iris.target
206+
clf = HungaBungaClassifier()
207+
clf.fit(X, y)
208+
print(clf.predict(X).shape)
234209

hunga_bunga/classification.pyc

-1.63 KB
Binary file not shown.

hunga_bunga/regression.py

Lines changed: 23 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,28 @@
1+
12
import warnings
23
warnings.filterwarnings('ignore')
34
from multiprocessing import cpu_count
45

5-
# linear models: http://scikit-learn.org/stable/modules/linear_model.html#stochastic-gradient-descent-sgd
6-
from sklearn.linear_model import \
7-
LinearRegression, Ridge, Lasso, ElasticNet, \
8-
Lars, LassoLars, \
9-
OrthogonalMatchingPursuit, \
10-
BayesianRidge, ARDRegression, \
11-
SGDRegressor, \
12-
PassiveAggressiveRegressor, \
13-
RANSACRegressor, HuberRegressor
14-
6+
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, Lars, LassoLars, OrthogonalMatchingPursuit, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor, RANSACRegressor, HuberRegressor
157
from sklearn.kernel_ridge import KernelRidge
168
from sklearn.preprocessing import StandardScaler
17-
18-
# svm models: http://scikit-learn.org/stable/modules/svm.html
199
from sklearn.svm import SVR, NuSVR, LinearSVR
20-
21-
# neighbor models: http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsRegressor.html#sklearn.neighbors.RadiusNeighborsRegressor
2210
from sklearn.neighbors import RadiusNeighborsRegressor, KNeighborsRegressor
23-
2411
from sklearn.gaussian_process import GaussianProcessRegressor
2512
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, DotProduct, WhiteKernel
2613
from sklearn.neural_network import MLPRegressor
27-
2814
from sklearn.ensemble import AdaBoostRegressor, ExtraTreesRegressor, RandomForestRegressor
2915
from sklearn.tree import DecisionTreeRegressor
3016
from sklearn.base import BaseEstimator
3117
from sklearn.base import ClassifierMixin
3218
from sklearn.base import RegressorMixin
3319
from sklearn.base import is_classifier
3420

21+
3522
from utilities import *
3623
from universal_params import *
3724

3825

39-
4026
linear_models_n_params = [
4127
(LinearRegression, normalize),
4228

@@ -297,63 +283,40 @@
297283
'criterion': ['mse', 'mae']})
298284
]
299285

300-
def run_linear_models(x, y, small = True, normalize_x = True):
301-
return big_loop(linear_models_n_params_small if small else linear_models_n_params,
302-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
303-
304-
def run_svm_models(x, y, small = True, normalize_x = True):
305-
return big_loop(svm_models_n_params_small if small else svm_models_n_params,
306-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
307-
308-
def run_neighbor_models(x, y, normalize_x = True):
309-
return big_loop(neighbor_models_n_params,
310-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
311-
312-
def run_gaussian_models(x, y, normalize_x = True):
313-
return big_loop(gaussianprocess_models_n_params,
314-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
315-
316-
def run_nn_models(x, y, small = True, normalize_x = True):
317-
return big_loop(nn_models_n_params_small if small else nn_models_n_params,
318-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
319-
320-
def run_tree_models(x, y, small = True, normalize_x = True):
321-
return big_loop(tree_models_n_params_small if small else tree_models_n_params,
322-
StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False)
323-
324-
def run_all(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False):
325-
326-
all_params = (linear_models_n_params_small if small else linear_models_n_params) + \
327-
(nn_models_n_params_small if small else nn_models_n_params) + \
328-
([] if small else gaussianprocess_models_n_params) + \
329-
neighbor_models_n_params + \
330-
(svm_models_n_params_small if small else svm_models_n_params) + \
331-
(tree_models_n_params_small if small else tree_models_n_params)
332-
333-
return big_loop(all_params,
334-
StandardScaler().fit_transform(x) if normalize_x else x, y,
335-
isClassification=False, n_jobs=n_jobs, brain=brain)
336-
337286

338287
def gen_reg_data(x_mu=10., x_sigma=1., num_samples=100, num_features=3, y_formula=sum, y_sigma=1.):
339288
x = np.random.normal(x_mu, x_sigma, (num_samples, num_features))
340289
y = np.apply_along_axis(y_formula, 1, x) + np.random.normal(0, y_sigma, (num_samples,))
341290
return x, y
342291

292+
def run_all_regressors(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False):
293+
all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
294+
return main_loop(all_params, StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=False, n_jobs=n_jobs, brain=brain)
295+
343296

344297
class HungaBungaRegressor(RegressorMixin):
345-
def __init__(self, brain=False):
298+
def __init__(self, brain=False, test_size = 0.2, n_splits = 5, random_state=None, upsample=True, scoring=None, verbose=True, normalize_x = True, n_jobs =cpu_count() - 1):
346299
self.model = None
347300
self.brain = brain
301+
self.test_size = test_size
302+
self.n_splits = n_splits
303+
self.random_state = random_state
304+
self.upsample = upsample
305+
self.scoring = None
306+
self.verbose = verbose
307+
self.n_jobs = n_jobs
308+
self.normalize_x = normalize_x
309+
super(HungaBungaRegressor, self).__init__()
310+
348311
def fit(self, x, y):
349-
self.model = run_all(x, y, normalize_x=True, brain=self.brain)[0]
312+
self.model = run_all_regressors(x, y, normalize_x=self.normalize_x, test_size=self.test_size, n_splits=self.n_splits, upsample=self.upsample, scoring=self.scoring, verbose=self.verbose, brain=self.brain, n_jobs=self.n_jobs)[0]
313+
350314
def predict(self, x):
351315
return self.model.predict(x)
352316

353317

354318
if __name__ == '__main__':
355319
x, y = gen_reg_data(10, 3, 100, 3, sum, 0.3)
356-
# print run_all(x, y, small=True, normalize_x=True)
357-
a = HungaBungaRegressor()
358-
a.fit(x, y)
359-
a.predict(x)
320+
mdl = HungaBungaRegressor()
321+
mdl.fit(x, y)
322+
print(mdl.predict(x).shape)

hunga_bunga/regression.pyc

-2.09 KB
Binary file not shown.

hunga_bunga/universal_params.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
"""
2-
parameter settings used by multiple classifiers/regressors
3-
"""
41

52
import numpy as np
63

@@ -16,7 +13,6 @@
1613
alpha = [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 3, 10]
1714
alpha_small = [1e-5, 1e-3, 0.1, 1]
1815
n_iter = [5, 10, 20]
19-
2016
eta0 = [1e-4, 1e-3, 1e-2, 0.1]
2117
C = [1e-2, 0.1, 1, 5, 10]
2218
C_small = [ 0.1, 1, 5]
@@ -31,15 +27,13 @@
3127
shrinking = [True, False]
3228
nu = [1e-4, 1e-2, 0.1, 0.3, 0.5, 0.75, 0.9]
3329
nu_small = [1e-2, 0.1, 0.5, 0.9]
34-
3530
n_neighbors = [5, 7, 10, 15, 20]
3631
neighbor_algo = ['ball_tree', 'kd_tree', 'brute']
3732
neighbor_leaf_size = [1, 2, 5, 10, 20, 30, 50, 100]
3833
neighbor_metric = ['cityblock', 'euclidean', 'l1', 'l2', 'manhattan']
3934
neighbor_radius = [1e-2, 0.1, 1, 5, 10]
4035
learning_rate = ['constant', 'invscaling', 'adaptive']
4136
learning_rate_small = ['invscaling', 'adaptive']
42-
4337
n_estimators = [2, 3, 5, 10, 25, 50, 100]
4438
n_estimators_small = [2, 10, 25, 100]
4539
max_features = [3, 5, 10, 25, 50, 'auto', 'log2', None]
@@ -50,5 +44,3 @@
5044
min_impurity_split = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
5145
tree_learning_rate = [0.8, 1]
5246
min_samples_leaf = [2]
53-
54-

0 commit comments

Comments
 (0)