3.1 KMeans Clustering

3.1.1 Small Data Set

In[]:=

session=StartExternalSession["Python"]

Out[]=

ExternalSessionObject

System: Python	EvaluationCount: None
UUID: 8881a91f-ce94-44bb-b329-ed3118b79b9b



Test

〉

In[]:=

5+6

Out[]=

〉

In[]:=

from numpy import array, matrix
from scipy.io import mmread, mmwrite
import numpy as np

〉

In[]:=

from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

〉

In[]:=

X, y = make_blobs(random_state=1)
kmeans = KMeans(n_clusters=3).fit(X)
pre= kmeans.predict(X)
pred=np.array([pre])

〉

In[]:=

mmwrite('labels.mtx',pred)
mmwrite('dataX',X)

〉

In[]:=

cent= kmeans.cluster_centers_
cent

Out[]=

NumericArray

Type: Real64

Dimensions: {3,2}



In[]:=

Normal[%]

Out[]=

{{-6.58197,-8.17239},{-1.47108,4.33722},{-10.0494,-3.85954}}

In[]:=

centers=%;

In[]:=

labels=Import["labels.mtx"]//Flatten;

In[]:=

datap=Import["dataX.mtx"];

In[]:=

p0=ListPlot[datap,Frame->True,PlotMarkers"",AxesNone]

Out[]=

In[]:=

dataC=MapThread[Flatten[{#1,#2}]&,{datap,labels}];

In[]:=

dataC1=Map[{#[[1]],#[[2]]}&,Select[dataC,#[[3]]0&]];

In[]:=

dataC2=Map[{#[[1]],#[[2]]}&,Select[dataC,#[[3]]1&]];

In[]:=

dataC3=Map[{#[[1]],#[[2]]}&,Select[dataC,#[[3]]2&]];

In[]:=

Show[{p0,ListPlot[{dataC1,dataC2,dataC3},Frame->True,PlotMarkersAutomatic,AxesNone],ListPlot[centers,PlotMarkers"♠",PlotStyle{Black,Large}]},AspectRatio0.7]

Out[]=

〉

In[]:=

from sklearn import metrics
s=metrics.silhouette_score(X,pre,'euclidean')
s

Out[]=

0.769709

In[]:=

clusters=FindClusters[datap,3,Method"KMeans",PerformanceGoal"Quality"];

In[]:=

centers=Map[Mean[#]&,clusters]

Out[]=

{{-1.47108,4.33722},{-10.0494,-3.85954},{-6.58197,-8.17239}}

In[]:=

Show[{p0,ListPlot[clusters,Frame->True,PlotMarkersAutomatic,AxesNone],ListPlot[centers,PlotMarkers"♠",PlotStyle{Black,Large}]},AspectRatio0.7]

Out[]=

3.1.3 The result of the clustering with the centers of the clusters using Mathematica

In[]:=

clustersF=ClusterClassify[datap,3,Method"KMeans",PerformanceGoal"Quality"]

Out[]=

ClassifierFunction

Input type: NumericalVector

(length: 2)

Classes: 1,2,3



In[]:=

clustersF[{-6,-8},"Probabilities"]//Normal

Out[]=

{11.,23.92839×

-36

,34.92367×

-10

}

In[]:=

clustersF[{-5,-1},"Probabilities"]//Normal

Out[]=

{10.305066,20.665971,30.0289628}

In[]:=

pD=DensityPlot[clustersF[{x,y}],{x,-12,2},{y,-12,7},PlotPoints100];

In[]:=

Show[{pD,p0,ListPlot[clusters,Frame->True,PlotMarkersAutomatic,AxesNone],ListPlot[centers,PlotMarkers"♠",PlotStyle{Black,Large}]},AspectRatio0.7]

Out[]=

In[]:=

index=Map[#-1&,ClusteringComponents[datap,3,1,Method->"KMeans",PerformanceGoal"Quality"]]

Out[]=

{0,1,1,1,2,2,2,1,0,0,1,1,2,0,2,2,2,0,1,1,2,1,2,0,1,2,2,0,0,2,0,0,2,0,1,2,1,1,1,2,2,1,0,1,1,2,0,0,0,0,1,2,2,2,0,2,1,1,0,0,1,2,2,1,1,2,0,2,0,1,1,1,2,0,0,1,2,2,0,1,0,1,1,2,0,0,0,0,1,0,2,0,0,1,1,2,2,0,2,0}

In[]:=

Export["pred.mtx",{index}]

Out[]=

pred.mtx

〉

In[]:=

index=mmread('pred.mtx')

〉

In[]:=

ind=index[0]
from sklearn import metrics
s=metrics.silhouette_score(X,ind,'euclidean')
s

〉

In[]:=

index=mmread('pred.mtx')

〉

In[]:=

ind=index[0]
s=metrics.silhouette_score(X,ind,'euclidean')
s

3.1.2 Clustering Images

〉

In[]:=

X=mmread('dataX.mtx')

〉

In[]:=

import numpy as np

〉

In[]:=

kmeans = KMeans(n_clusters=3).fit(X)
prediction= kmeans.predict(X)
prediction

〉

In[]:=

cent= kmeans.cluster_centers_
cent

〉

In[]:=

from sklearn import metrics
s=metrics.silhouette_score(X,prediction,'euclidean')
s