Skip to content

Instantly share code, notes, and snippets.

@vaibhawvipul
Created May 5, 2018 17:49
Show Gist options
  • Select an option

  • Save vaibhawvipul/00a468685ffc0c551062149bba6347fa to your computer and use it in GitHub Desktop.

Select an option

Save vaibhawvipul/00a468685ffc0c551062149bba6347fa to your computer and use it in GitHub Desktop.
Knn Iris
import csv
import random
import math
def loadDataset(filename, split):
training_data = []
test_data = []
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
training_data.append(dataset[x])
else:
test_data.append(dataset[x])
return training_data, test_data
def L2(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)
def getNeighbors(training_data, testInstance, k):
distances = []
length = len(testInstance)-1
for x in range(len(training_data)):
dist = L2(testInstance, training_data[x], length)
distances.append((training_data[x], dist))
distances = sorted(distances, key=lambda x: x[1])
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
class_votes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in class_votes:
class_votes[response] += 1
else:
class_votes[response] = 1
sorted_votes = class_votes.items()
return sorted_votes[0][0]
def getAccuracy(test_data, predictions):
correct = 0
for x in range(len(test_data)):
if test_data[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(test_data))) * 100.0
def main():
# prepare data
split = 0.67
training_data, test_data = loadDataset('iris.data', split)
# generate predictions
predictions=[]
k = 3
for x in range(len(test_data)):
neighbors = getNeighbors(training_data, test_data[x], k)
result = getResponse(neighbors)
predictions.append(result)
print('> predicted=' + repr(result) + ', actual=' + repr(test_data[x][-1]))
accuracy = getAccuracy(test_data, predictions)
print('Accuracy: ' + repr(accuracy) + '%')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment