2.1 KNearest Neighbors Classification
2.1 KNearest Neighbors Classification
2.1.1 Small Data Set
2.1.1 Small Data Set
In[]:=
session=StartExternalSession["Python"]
Out[]=
ExternalSessionObject
Test
In[]:=
5+6
Out[]=
11
In[]:=
import mglearn
import numpy as np
X, y = mglearn.datasets.make_forge()
np.savetxt('D:\\dataX.txt',X,fmt='%.5e')
import numpy as np
X, y = mglearn.datasets.make_forge()
np.savetxt('D:\\dataX.txt',X,fmt='%.5e')
In[]:=
y
Out[]=
NumericArray
In[]:=
y=Normal[%]
Out[]=
{1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0}
In[]:=
X=Import["D:\\dataX.txt","Table"];
In[]:=
class1={};class2={};
In[]:=
MapThread[If[#10,AppendTo[class1,#2],AppendTo[class2,#2]]&,{y,X}];
In[]:=
p0=ListPlot[{class1,class2},PlotStyle{Green,Red},FrameTrue,AxesNone,PlotMarkers{Automatic,Medium},AspectRatio1]
Out[]=
In[]:=
trainingData=MapThread[#2#1&,{y,X}];
In[]:=
c1=Classify[trainingData,Method{"NearestNeighbors","NeighborsNumber"1,"NearestMethod""Scan"}];
In[]:=
p1=Show[{DensityPlot[c1[{u,v}],{u,7.5,12},{v,-1,6},ColorFunction"CMYKColors"],p0}];
In[]:=
c3=Classify[trainingData,Method{"NearestNeighbors","NeighborsNumber"3,"NearestMethod""Scan"}];
In[]:=
p3=Show[{DensityPlot[c3[{u,v}],{u,7.5,12},{v,-1,6},ColorFunction"CMYKColors"],p0}];
In[]:=
GraphicsGrid[{{p1,p3}}]
Out[]=
In[]:=
c9=Classify[trainingData,Method{"NearestNeighbors","NeighborsNumber"9,"NearestMethod""Scan"}];
In[]:=
p9=Show[{DensityPlot[c9[{u,v}],{u,7.5,12},{v,-1,6},ColorFunction"CMYKColors"],p0}]
Out[]=
Fig.2.3 Classification via Nearest Neighbor k = 9
In[]:=
Map[c1[#]&,X]
Out[]=
{1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0}
In[]:=
yM=%;
In[]:=
from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier(n_neighbors=1).fit(X,y)
prediction=clf.predict(X)
prediction
clf=KNeighborsClassifier(n_neighbors=1).fit(X,y)
prediction=clf.predict(X)
prediction
Out[]=
NumericArray
In[]:=
yP=Normal[%]
Out[]=
{1,0,1,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0}
In[]:=
Norm[yM-yP]
Out[]=
0
In[]:=
from sklearn.model_selection import train_test_split
X, y = mglearn.datasets.make_forge()
X_train, X_test, y_train, y_test= train_test_split(X, y, random_state=0)
X, y = mglearn.datasets.make_forge()
X_train, X_test, y_train, y_test= train_test_split(X, y, random_state=0)
In[]:=
X_train
Out[]=
NumericArray
In[]:=
Xtrain=Normal[%]
Out[]=
{{8.9223,-0.639932},{8.73371,2.49162},{9.32298,5.09841},{7.99815,4.85251},{11.033,-0.168167},{9.17748,5.09283},{11.564,1.33894},{9.15072,5.49832},{8.3481,5.13416},{11.9303,4.64866},{8.10623,4.28696},{8.67495,4.47573},{9.67285,-0.202832},{9.50169,1.93825},{8.69289,1.54322},{9.96347,4.59677},{9.50049,-0.264303},{9.25694,5.13285},{8.68937,1.4871}}
In[]:=
X_test
Out[]=
NumericArray
In[]:=
Xtest=Normal[%]
Out[]=
{{11.5416,5.21116},{10.0639,0.990781},{9.49123,4.33225},{8.18378,1.29564},{8.30989,4.80624},{10.2403,2.45544},{8.34469,1.63824}}
In[]:=
y_train
Out[]=
NumericArray
In[]:=
ytrain=Normal[%]
Out[]=
{0,0,1,1,0,1,0,1,1,1,0,1,0,0,0,1,0,1,0}
In[]:=
y_test
Out[]=
NumericArray
Training the classifier,
In[]:=
from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
clf=KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
In[]:=
prediction=clf.predict(X_train)
prediction
prediction
Result of the prediction on the test set,
In[]:=
from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
prediction=clf.predict(X_test)
prediction
clf=KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
prediction=clf.predict(X_test)
prediction
In[]:=
print("Test set score: {:.2f}".format(clf.score(X_train, y_train)))
In[]:=
print("Test set score: {:.2f}".format(clf.score(X_test, y_test)))
2.1.2 Vacant and Residential Lands
2.1.2 Vacant and Residential Lands
In[]:=
import numpy as np
from numpy import array, matrix
from scipy.io import mmread, mmwrite
from numpy import array, matrix
from scipy.io import mmread, mmwrite
In[]:=
Xtrain=mmread('D:\\Xtrain.mtx')
y=mmread('D:\\ytrain.mtx')
ytrain=y[0]
y=mmread('D:\\ytrain.mtx')
ytrain=y[0]
Training process
In[]:=
from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier(n_neighbors=1).fit(Xtrain,ytrain)
clf=KNeighborsClassifier(n_neighbors=1).fit(Xtrain,ytrain)
In[]:=
prediction=clf.predict(Xtrain)
prediction
prediction
In[]:=
print("Training set score: {:.2f}".format(clf.score(Xtrain, ytrain)))
In[]:=
Xtest=mmread('D:\\Xtest.mtx')
y=mmread('D:\\ytest.mtx')
ytest=y[0]
y=mmread('D:\\ytest.mtx')
ytest=y[0]
In[]:=
prediction=clf.predict(Xtest)
prediction
prediction
The accuracy on the test set
In[]:=
print("Test set score: {:.2f}".format(clf.score(Xtest, ytest)))