import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib

# 查看一下数据集的数据

# zero = plt.imread('./knn_num_data/0/0_1.bmp')
# plt.imshow(zero,cmap='gray')
# print(zero.shape)

# 将数据组合成可以训练的数据集

path = './knn_num_data/%d/%d_%d.bmp'

data = []
target = []

for i in range(10):
    for j in range(500):
        im_data = plt.imread(path % (i, i, j + 1))
        data.append(im_data)
        target.append(i)
data = np.array(data)

# print(data.shape)

# knn只能用二维数据 所以更改一下shape
data_ = data.reshape(5000, -1)

# print(data_.shape)

# 分割数据集 选取1%作为测试数据集
X_train, X_test, y_train, y_test = train_test_split(data_, target, test_size=0.01)

# 实例化KNN分类器
knn = KNeighborsClassifier()

knn.fit(X_train, y_train)

# 模型保存路径
save_path_name = 'knn_train_model.m'
# 保存模型
joblib.dump(knn, save_path_name)
# 加载模型
knn = joblib.load(save_path_name)
# 预测结果
y_ = knn.predict(X_test)
print(y_)

# 训练集评分
train_score = knn.score(X_train, y_train)
print(train_score)

# 测试集评分
test_score = knn.score(X_test, y_test)
print(test_score)

数据集

链接:https://pan.baidu.com/s/1ehaljfupk-_kuxk3khh3BA 
提取码:zl3o 

12-10 08:49