加载中...
随机森林
发表于:2021-11-08 | 分类: 机器学习课程(魏)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#begging
import numpy as np
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

n_samples = 2000
X_train, y_train=make_moons(n_samples=n_samples, noise=.3,random_state=8)

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,max_samples=100, bootstrap=True, n_jobs=-1)
bag_clf.fit(X_train[0:1500], y_train[0:1500])
y_pred = bag_clf.predict(X_train[1500:])
print(accuracy_score(y_train[1500:], y_pred))

plt.figure();
plt.scatter(X_train[:,0],X_train[:,1],c=y_train)
plt.show()

0.9

png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#RandomForest
import numpy as np
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

n_samples = 2000
X_train, y_train=make_moons(n_samples=n_samples, noise=.3,random_state=8)

ada_clf = RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)
ada_clf.fit(X_train[0:1500], y_train[0:1500])
y_pred = ada_clf.predict(X_train[1500:])
print(accuracy_score(y_train[1500:], y_pred))


plt.figure();
plt.scatter(X_train[:,0],X_train[:,1],c=y_train)
plt.show()

0.904

png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#Boost
import numpy as np
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

n_samples = 2000
X_train, y_train=make_moons(n_samples=n_samples, noise=.3,random_state=8)

ada_clf = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=1), n_estimators=500,
algorithm="SAMME.R", learning_rate=0.5
)
ada_clf.fit(X_train[0:1500], y_train[0:1500])
y_pred = ada_clf.predict(X_train[1500:])
print(accuracy_score(y_train[1500:], y_pred))


plt.figure();
plt.scatter(X_train[:,0],X_train[:,1],c=y_train)
plt.show()

0.892

png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#Stacking
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
import numpy as np


iris = datasets.load_iris()
X, y = iris.data[:, 1:3], iris.target

clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=lr)

print('3-fold cross validation:\n')

for clf, label in zip(
[clf1, clf2, clf3, sclf],
['KNN', 'Random Forest', 'Naive Bayes', 'StackingClassifier']):

scores = model_selection.cross_val_score(clf, X, y, cv=3, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

---------------------------------------------------------------------------

ModuleNotFoundError                       Traceback (most recent call last)

<ipython-input-4-483486e68139> in <module>
      6 from sklearn.naive_bayes import GaussianNB
      7 from sklearn.ensemble import RandomForestClassifier
----> 8 from mlxtend.classifier import StackingClassifier
      9 import numpy as np
     10 


ModuleNotFoundError: No module named 'mlxtend'
1
!pip install mlxtend
Looking in indexes: http://mirrors.aliyun.com/pypi/simple/


ERROR: Could not find a version that satisfies the requirement mlxtend (from versions: none)
ERROR: No matching distribution found for mlxtend
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#Voting
import numpy as np
from sklearn.datasets import make_moons#生成月牙形数据
from sklearn.metrics import accuracy_score#分类器的准确率
import matplotlib.pyplot as plt#画图

from sklearn.ensemble import VotingClassifier#调用集成学习的投票分类器
from sklearn.linear_model import LogisticRegression#调用逻辑回归分类器
from sklearn.svm import SVC#调用支持向量机分类器
from sklearn.neighbors import KNeighborsClassifier#调用KNN分类器

n_samples = 2000
X_train, y_train=make_moons(n_samples=n_samples, noise=.3,random_state=8)
#shuffle_index=np.random.permutation(2000)

log_clf = LogisticRegression()
svm_clf = SVC()
knn_clf=KNeighborsClassifier()

voting_clf = VotingClassifier(
estimators=[('lr', log_clf),('svc', svm_clf),('knn',knn_clf)],
voting='hard'
)
#voting_clf.fit(X_train[0:1500], y_train[0:1500])

for clf in (log_clf,svm_clf,knn_clf,voting_clf):
clf.fit(X_train[0:1500], y_train[0:1500])
y_pred = clf.predict(X_train[1500:])
print(clf.__class__.__name__, accuracy_score(y_train[1500:], y_pred))

plt.figure();
plt.scatter(X_train[:,0],X_train[:,1],c=y_train)
plt.show()

LogisticRegression 0.862
SVC 0.906
KNeighborsClassifier 0.902
VotingClassifier 0.91

png

1

上一篇:
线性规划
下一篇:
非线性规划
本文目录
本文目录