机器学习基础：模糊C均值聚类（Machine Learning Fundamentals: Fuzzy C-Means ）Python实现_python实现模糊c均值聚类

作者：小小林熬夜学编程 | 2024-06-11 09:07:50

踩

python实现模糊c均值聚类

说明：FCM python 简单实现

在我看来，真正厉害的不是如何实现，而是怎么对这个问题求解，即我们利用的更新参数的公式是怎么来的。具体就得看原论文[4]了。

代码

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt

class Data(object):
    def __init__(self):
        pass

    def generate(self):
        self.colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']

        # Define three cluster centers
        centers = [[4, 2],
                [1, 7],
                [5, 6]]

        # Define three cluster sigmas in x and y, respectively
        sigmas = [[0.8, 0.3],
                [0.3, 0.5],
                [1.1, 0.7]]

        # Generate test data
        np.random.seed(42)  # Set seed for reproducibility
        self.xpts = np.zeros(1)
        self.ypts = np.zeros(1)
        self.labels = np.zeros(1)

        # 伪造3个高斯分布，以u和sigma作为特征分布
        for i, ((xmu, ymu), (xsigma, ysigma)) in enumerate(zip(centers, sigmas)):
            self.xpts = np.hstack((self.xpts, np.random.standard_normal(200) * xsigma + xmu))
            self.ypts = np.hstack((self.ypts, np.random.standard_normal(200) * ysigma + ymu))
            self.labels = np.hstack((self.labels, np.ones(200) * i))
        return self.xpts, self.ypts, self.labels

    def visualize(self):
        # Visualize the test data
        fig0, ax0 = plt.subplots()
        for label in range(3):
            ax0.plot(self.xpts[self.labels == label], self.ypts[self.labels == label], '.',
                    color=self.colors[label])
        ax0.set_title('Test data: 200 points x3 clusters.')
        # plt.show()

class Fuzzy(object):
    def __init__(self, xpts, ypts, labels):
        self.xpts = xpts
        self.ypts = ypts
        self.labels = labels

    def _norm1(self, array):
        array = np.abs(array)
        return np.sum(array)
    
    def _normalize_rows(self, rows):
        normalized_rows = rows / np.sum(rows, axis=1, keepdims=1)
        return normalized_rows

    def cluster(self, classes, m = 2, niter = 1000, error = 1e-5):
        
        # init u
        # u = np.ones([len(self.labels), classes], dtype=np.float32) / 3
        n_data = self.xpts.shape[0]
        u = np.random.rand(n_data, classes)
        u = self._normalize_rows(u)

        self.x = np.array(zip(self.xpts, self.ypts))
        
        len_j = u.shape[1]
        c = np.zeros([len_j, 2], dtype=np.float32)

        for n in xrange(niter):
            # calculate c_j
            for j in xrange(len_j):
                u_j = u[:, j]
                u_jm = u_j ** m
                numer = np.dot(u_jm, self.x)
                deno = np.sum(u_jm)
                c[j] = numer / deno

            # update u_k
            u_new = np.zeros_like(u)
            data_size = self.x.shape[0]
            class_size = c.shape[0]
            for i in xrange(data_size):
                for j in xrange(class_size):
                    numer = 0
                    for k in xrange(c.shape[0]):
                        temp = self._norm1(self.x[i] - c[j]) / self._norm1(self.x[i] - c[k])
                        temp = temp ** (2 / (m-1))
                        numer += temp
                    u_new[i, j] = 1 / numer
            
            # check convergence
            print('FCM steps:', n)
            if(self._norm1(u - u_new) < error):
                break
            
            # update u
            u = u_new

        # return value and center 
        predict = np.argmax(u, axis=1)
        return c, predict

if __name__ == '__main__':
    # generate data 
    data = Data()
    xpts, ypts, labels = data.generate()
    data.visualize()

    # fuzzy c means
    fuzzy = Fuzzy(xpts, ypts, labels)
    center, predict_labels = fuzzy.cluster(classes=3, m=2, niter = 1000)

    # visualize
    colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
    fig, ax = plt.subplots()
    for i in xrange(3):
        ax.plot(xpts[predict_labels == i],
                ypts[predict_labels == i], 
                '.',
                color=colors[i])
    for pt in center:
        ax.plot(pt[0], pt[1], 'rs')
    ax.set_title('clustering results')
    plt.show()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

结果：
fuzzy c means 实现

说明

主要实现一下fuzzy c means，理解其实现过程。
注意，fcm实现过程中degree of memerbership 矩阵 $U$ 初始化需要满足三个条件[1].我在第一次初始化时直接给每个点属于每个类隶属度都设为相同的值，结果得到错误的结果。

后来参考了skfuzzy的初始化方式，即随机初始化，得到正确的结果。详情直接看源码。

参考

A Tutorial on Clustering Algorithms
skfuzzy demo
skfuzzy
J. C. Dunn (1973): “A Fuzzy Relative of the ISODATA Process and Its Use in Detecting Compact Well-Separated Clusters”, Journal of Cybernetics 3: 32-57

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/702511