Created
May 5, 2018 17:49
-
-
Save vaibhawvipul/00a468685ffc0c551062149bba6347fa to your computer and use it in GitHub Desktop.
Knn Iris
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| import random | |
| import math | |
| def loadDataset(filename, split): | |
| training_data = [] | |
| test_data = [] | |
| with open(filename, 'rb') as csvfile: | |
| lines = csv.reader(csvfile) | |
| dataset = list(lines) | |
| for x in range(len(dataset)-1): | |
| for y in range(4): | |
| dataset[x][y] = float(dataset[x][y]) | |
| if random.random() < split: | |
| training_data.append(dataset[x]) | |
| else: | |
| test_data.append(dataset[x]) | |
| return training_data, test_data | |
| def L2(instance1, instance2, length): | |
| distance = 0 | |
| for x in range(length): | |
| distance += pow((instance1[x] - instance2[x]), 2) | |
| return math.sqrt(distance) | |
| def getNeighbors(training_data, testInstance, k): | |
| distances = [] | |
| length = len(testInstance)-1 | |
| for x in range(len(training_data)): | |
| dist = L2(testInstance, training_data[x], length) | |
| distances.append((training_data[x], dist)) | |
| distances = sorted(distances, key=lambda x: x[1]) | |
| neighbors = [] | |
| for x in range(k): | |
| neighbors.append(distances[x][0]) | |
| return neighbors | |
| def getResponse(neighbors): | |
| class_votes = {} | |
| for x in range(len(neighbors)): | |
| response = neighbors[x][-1] | |
| if response in class_votes: | |
| class_votes[response] += 1 | |
| else: | |
| class_votes[response] = 1 | |
| sorted_votes = class_votes.items() | |
| return sorted_votes[0][0] | |
| def getAccuracy(test_data, predictions): | |
| correct = 0 | |
| for x in range(len(test_data)): | |
| if test_data[x][-1] == predictions[x]: | |
| correct += 1 | |
| return (correct/float(len(test_data))) * 100.0 | |
| def main(): | |
| # prepare data | |
| split = 0.67 | |
| training_data, test_data = loadDataset('iris.data', split) | |
| # generate predictions | |
| predictions=[] | |
| k = 3 | |
| for x in range(len(test_data)): | |
| neighbors = getNeighbors(training_data, test_data[x], k) | |
| result = getResponse(neighbors) | |
| predictions.append(result) | |
| print('> predicted=' + repr(result) + ', actual=' + repr(test_data[x][-1])) | |
| accuracy = getAccuracy(test_data, predictions) | |
| print('Accuracy: ' + repr(accuracy) + '%') | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment