Skip to content

Commit 9142552

Browse files
Create 8.py
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program.
1 parent 82108a6 commit 9142552

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

8.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import matplotlib.pyplot as plt
2+
from sklearn import datasets
3+
from sklearn.cluster import KMeans
4+
import sklearn.metrics as sm
5+
import pandas as pd
6+
import numpy as np
7+
iris = datasets.load_iris()
8+
X = pd.DataFrame(iris.data)
9+
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
10+
y = pd.DataFrame(iris.target)
11+
y.columns = ['Targets']
12+
model = KMeans(n_clusters=3)
13+
model.fit(X)
14+
model.labels_
15+
plt.figure(figsize=(14,7))
16+
colormap = np.array(['red', 'lime', 'black'])
17+
plt.subplot(1, 2, 1)
18+
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
19+
plt.title('Real Classification')
20+
plt.subplot(1, 2, 2)
21+
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
22+
plt.title('K Mean Classification')
23+
plt.figure(figsize=(14,7))
24+
predY = np.choose(model.labels_, [0, 1, 2]).astype(np.int64)
25+
print (predY)
26+
plt.subplot(1, 2, 1)
27+
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
28+
plt.title('Real Classification')
29+
plt.subplot(1, 2, 2)
30+
plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[predY], s=40)
31+
plt.title('K Mean Classification')
32+
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
33+
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))
34+
from sklearn import preprocessing
35+
scaler = preprocessing.StandardScaler()
36+
scaler.fit(X)
37+
xsa = scaler.transform(X)
38+
xs = pd.DataFrame(xsa, columns = X.columns)
39+
from sklearn.mixture import GaussianMixture
40+
gmm = GaussianMixture(n_components=3)
41+
gmm.fit(xs)
42+
y_cluster_gmm = gmm.predict(xs)
43+
plt.subplot(2, 2, 3)
44+
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
45+
plt.title('GMM Classification')
46+
print('The accuracy score of EM: ',sm.accuracy_score(y, y_cluster_gmm))
47+
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_cluster_gmm))

0 commit comments

Comments
 (0)