-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathml_utils.py
More file actions
56 lines (46 loc) · 1.79 KB
/
ml_utils.py
File metadata and controls
56 lines (46 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# !/usr/bin/python
# coding=utf-8
import pickle
import numpy as np
import pandas as pd
import train_models as M
from itertools import product
import matplotlib.pyplot as plt
# dump models into files
def modelDump(model, path):
with open(path, 'wb') as f:
pickle.dump(model, f)
# reload trained models
def modelReload(path):
with open(path, 'rb') as f:
model2 = pickle.load(f)
return model2
# print models' decision Margin, first choose two most important dimention of data
def plotMargin(dir, index1, index2):
csv = pd.read_csv(dir + 'train.csv')
csv.columns = range(0, len(csv.columns), 1)
X = csv.iloc[:, [index1, index2]]
Y = csv.iloc[:, -1]
print X.shape
# temporary is four decision margin
RandomForest = M.trainRF(X, X, Y, Y, dir)
SVM = M.trainSVM(X, X, Y, Y, dir)
GBDT = M.trainGBDT(X, X, Y, Y, dir)
DecisionTree = M.trainDT(X, X, Y, Y, dir)
x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
print xx.shape, yy.shape
f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))
for idx, clf, tt in zip(product([0, 1], [0, 1]),
[RandomForest, SVM, GBDT, DecisionTree],
['RandomForest', 'SVM(RBF)',
'GBDT', 'DecisionTree']):
temp = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()])
Z = clf.predict(temp)
Z = Z.reshape(xx.shape)
axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)
axarr[idx[0], idx[1]].scatter(X.iloc[:, 0], X.iloc[:, 1], c=Y, alpha=0.8)
axarr[idx[0], idx[1]].set_title(tt)
plt.show()