K Nearest Neighbors

K Nearest Neighbors is a simple algorithm that stores all the available cases and classifies the new data or case based on a similarity measure.

K= Number of Nearest Neighbors

How its works

Let’s assume we have 2 types of data class A and class B. Now we got another new data the star now we have to predict which class it belongs. Now take k=3, so we take 3 nearest neighbours of star. If no of data of class A is more than B then it belongs to class A, otherwise B. So simple.

Dataset: The dataset we use is headbrain.csv
From https://www.kaggle.com/saarthaksangam/headbrain

Source Code:

# -*- coding: utf-8 -*-
"""
Created on Mon Sep 30 03:12:40 2019
@author: nowshad
"""
import csv
import random
import math
import operator
def loadDataset(filename, split,trainingSet=[],testSet=[]):
with open(filename, 'r') as csvfile:
lines=csv.reader(csvfile)
dataset=list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y]=float(dataset[x][y])
if random.random()<split:
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
def euclideanDistance(instance1,instance2, length):
distance=0
for x in range(length):
distance +=pow((instance1[x]-instance2[x]),2)
return math.sqrt(distance)
def getNeighbours(trainingSet, testInstance, k):
distance=[]
length=len(testInstance)-1
for x in range(len(trainingSet)):
dist=euclideanDistance(testInstance,trainingSet[x], length)
distance.append((trainingSet[x],dist))
distance.sort(key=operator.itemgetter(1))
neighbors=[]
for x in range (k):
neighbors.append(distance[x][0])
return neighbors
def getResponse(neighbors):
classVotes={}
for x in range(len(neighbors)):
response=neighbors[x][-1]
if response in classVotes:
classVotes[response]+=1
else:
classVotes[response]=1
sortedVotes=sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
return sortedVotes[0][0]
def getAccuracy(testSet, predictions):
correct=0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct+=1
return (correct/float(len(testSet)))*100.0
#main
with open(r'G:\AUST4.1\AILab\lab5\Assignment5\iris.data')as csvfile:
lines=csv.reader(csvfile)
trainingSet=[]
testSet=[]
split=0.67
loadDataset('iris.data',split,trainingSet,testSet)
predictions=[]
k=3
for x in range(len(testSet)):
neighbors=getNeighbours(trainingSet,testSet[x],k)
result=getResponse(neighbors)
predictions.append(result)
#print('Predicted=',result,', actual=',testSet[x][-1])
accuracy=getAccuracy(testSet,predictions)
print('Accuracy= ',accuracy,'%')

Output:

The max accuracy we got is 98.14%. the Screen Shot given bellow.

Reference

https://www.edureka.co/python

Search This Blog

Nowshad's Blog