简单实现KNN(处理连续型数据)

霸气de小男生 提交于 2019-12-02 21:31:12
import numpy as np
import matplotlib.pyplot as plt
import time
import math
import collections



raw_data_x = [[3.39,2.33],
             [3.11,1.78],
             [1.34,3.36],
             [3.58,4.67],
             [2.28,2.86],
             [7.442,4.69],
             [5.74,3.53],
             [9.17,2.51],
             [7.79,3.42],
             [7.93,0.79]
             ]
raw_data_y = [0,0,0,0,0,1,1,1,1,1]
x_train = np.array(raw_data_x)
y_train = np.array(raw_data_y)

x_test = np.array([8.0,3.36])

plt.scatter(x_train[y_train == 0,0],x_train[y_train == 0,1],color = 'r')
plt.scatter(x_train[y_train == 1,0],x_train[y_train == 1,1],color = 'g')
plt.scatter(x_test[0],x_test[1],color = 'b')

plt.show()

#compute the Euclidean distance

distance = [] #creat empty list
for i in x_train: # for loop
    result_sum = math.sqrt(sum((i-x_test)**2)) #compute distance for single vector
    distance.append(result_sum) # add the result to list


index = np.argsort(distance)
k = 6
result = [y_train[i] for i in index[:k]]


vote = collections.Counter(result)
print("the class is {}".format(vote.most_common(1)[0][0]))

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!