主成分分析 | 千里稻花应秀色

对数据降维后使用KNN算法进行分类的案例

from scipy.io import loadmat
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import pickle
from sklearn.decomposition import PCA
mnist=loadmat('mnist-original.mat')#获取原始数据
x,y=mnist["data"],mnist["label"]#分别获取原始数据的数据描述和数据标签
x=x.T#对原始数据描述进行转转置
y=y[0]
pca=PCA()#建立PCA模型
pca.fit(x)#通过PCA（主成分分析）对原始数据进行主成分分析,得到特征值和特征向量
cumsum=np.cumsum(pca.explained_variance_ratio_)#对特征值进行排序（从大到小进行排序），计算特征值的累计贡献率
d=np.argmax(cumsum>=0.95)+1 #设定一个累计贡献率的阈值，阈值为0.95，d是累计贡献率达到0.95的特征值的数量，d=154
pca=PCA(n_components=d)#对原始数据进行降维，从784维降到154维
x1=pca.fit_transform(x)#x1是降维之后的数据
x_train=x1[:60000]#选择训练数据，原始数据有70000条，我们选择前60000作为训练样本，x_train代表前60000个样本的数据描述
y_train=y[:60000]#前60000个样本的标签
shuffle_index=np.random.permutation(60000)#对前60000个数据打乱顺序
x_train=x_train[shuffle_index]
y_train=y_train[shuffle_index]
sgd_clf=KNeighborsClassifier()#调用KNN分类器
sgd_clf.fit(x_train,y_train)#建立KNN分类器模型
print(d)
with open('clf.pickle', 'wb') as f:
    pickle.dump(sgd_clf, f)

from scipy.io import loadmat
import numpy as np
import pickle
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

mnist=loadmat('mnist-original.mat')
x,y=mnist["data"],mnist["label"]
x=x.T
y=y[0]
pca=PCA(n_components=154)
x1=pca.fit_transform(x)
some_digit=x1[66000]

x_test=x[60000:]
y_test=y[60000:]
with open('clf.pickle', 'rb') as f:
    clf2 = pickle.load(f)
y_pred = clf2.predict(x1[60000:])
print(accuracy_score(y[60000:], y_pred))

0.9719

LLE

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import LocallyLinearEmbedding

N=2000
angle=np.pi*(1.5*np.random.random(int(N/2))-1)
height=5*np.random.random(N)
x=np.array([np.append(np.cos(angle),-1*np.cos(angle)),height,np.append(np.sin(angle),2-np.sin(angle))])
x=x.T
fig=plt.figure()
ax=Axes3D(fig)
ax.scatter(x[:,0],x[:,1],x[:,2])
lle=LocallyLinearEmbedding(n_components=2,n_neighbors=12)#n_neighbors不能太小，也不能太大
#如果太小结果不理想，如果太大，则效果接近于PCA（主成分分析），原论文推荐是12
x2d=lle.fit_transform(x)
plt.figure()
plt.plot(x2d[:,0],x2d[:,1],'k.')
plt.show()

c:\users\administrator\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:12: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6.  This is consistent with other Axes classes.
  if sys.path[0] == '':