Create 8.py

fatalcoder524 · web-flow · commit 9142552318c2 · 2019-11-20T09:20:14.000+05:30
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program.
diff --git a/8.py b/8.py
@@ -0,0 +1,47 @@
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn.cluster import KMeans
+import sklearn.metrics as sm
+import pandas as pd
+import numpy as np
+iris = datasets.load_iris()
+X = pd.DataFrame(iris.data)
+X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
+y = pd.DataFrame(iris.target)
+y.columns = ['Targets']
+model = KMeans(n_clusters=3)
+model.fit(X)
+model.labels_
+plt.figure(figsize=(14,7))
+colormap = np.array(['red', 'lime', 'black'])
+plt.subplot(1, 2, 1)
+plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
+plt.title('Real Classification')
+plt.subplot(1, 2, 2)
+plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
+plt.title('K Mean Classification')
+plt.figure(figsize=(14,7))
+predY = np.choose(model.labels_, [0, 1, 2]).astype(np.int64)
+print (predY)
+plt.subplot(1, 2, 1)
+plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
+plt.title('Real Classification')
+plt.subplot(1, 2, 2)
+plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[predY], s=40)
+plt.title('K Mean Classification')
+print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
+print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))
+from sklearn import preprocessing
+scaler = preprocessing.StandardScaler()
+scaler.fit(X)
+xsa = scaler.transform(X)
+xs = pd.DataFrame(xsa, columns = X.columns)
+from sklearn.mixture import GaussianMixture
+gmm = GaussianMixture(n_components=3)
+gmm.fit(xs)
+y_cluster_gmm = gmm.predict(xs)
+plt.subplot(2, 2, 3)
+plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
+plt.title('GMM Classification')
+print('The accuracy score of EM: ',sm.accuracy_score(y, y_cluster_gmm))
+print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_cluster_gmm))