Source code for got.relevance_analysis.faddis

''' FADDIS clustering implementation in Python
'''

import numpy as np
import numpy.linalg as LA

ZERO_BOUND = 10 ** (-9)
MIN_CLUSTER_CONTRIBUTION = 5 * 10 ** (-3)
EPSILON = 5 * 10 ** (-2)
# Maximum number of clusters
MAX_NUM_CLUSTERS = 15


[docs]def ensure_np_matrix(A): if not isinstance(A, np.matrix): A = np.matrix(A) return A
[docs]def faddis(A): ''' faddis: equential extraction of fuzzy clusters, in a sequential manner A is NxN similatriy matrix, symmetrized membership_matrix - NxK membership matix of clustering; contrib - 1xK vector of relative contributions to the data scatter; intensity - Kx2 matrix of cluster intensities^0.5 and intensities; lat - 1xK vector of eigen-values corresponding to clusters; cluster_got ''' A = ensure_np_matrix(A) # minimum cluster's relative contribution to the data scatter min_cont = MIN_CLUSTER_CONTRIBUTION # minimum relative residual data scatter eps = EPSILON # maximum number of clusters max_clust_num = MAX_NUM_CLUSTERS is_positive = True matrix_dim, _ = A.shape sc = np.power(A, 2) # Total data scatter scatter = np.sum(sc) cluster_got = 0 membership_matrix = np.empty((matrix_dim, 0)) contrib = np.array([]) lat = np.array([]) intensities = np.empty((0, 2)) curr_cont = 1 res_cont = 1 # 'zero' and 'one' vectors for comparisons zeros_vect = np.zeros((matrix_dim, 1)) ones_vect = np.ones((matrix_dim, 1)) # ensure matrix is symmetrical At = (A + A.T) / 2 matrix_sequence = [At] # Stop condition: # is_positive is True: eigen-value of the residual matrix is not positive; # OR la cluster intensity reaches its minimum lam; # OR ep relative residual data scatter reaches its minimum eps; # OR maximum number of clusters max_clust_num is achieved while is_positive and curr_cont > min_cont and res_cont > eps and cluster_got <= max_clust_num: # collecting a fuzzy cluster membership uf, with contrib con and intensity la, eig_vals, eig_vecs = LA.eig(At) # (lt, ii) - (maximum eigen-value, corresponding position) eig_vals_diag = np.diag(eig_vals) # Only positive eigenvalues eig_vals_pos = np.argwhere(eig_vals > ZERO_BOUND).ravel() eig_vals_pos_len = eig_vals_pos.size cur_intensities = np.zeros((eig_vals_pos_len, 1)) vm = np.zeros((matrix_dim, eig_vals_pos_len)) for k in range(eig_vals_pos_len): lt = eig_vals_diag[eig_vals_pos[k]] vf = eig_vecs[:, eig_vals_pos[k]] # Calculate normalized membership vector belonging to [0, 1] by # projection on the space. The normalization factor is the # Euclidean length of the vector bf = np.maximum(zeros_vect, vf) uf = np.minimum(bf, ones_vect) if LA.norm(uf) > 0: uf = uf / LA.norm(uf) vt = uf.T.dot(At).dot(uf) uf = np.squeeze(np.asarray(uf)) wt = uf.T.dot(uf) # Calculates the intensity Lambda (la) of the cluster, which is # defined almost as the Rayleigh quotient if wt > 0: la = vt.item() / (wt **2) else: la = 0 # since lt*vf =(-lt)*(-vf), try symmetric version # using -vf: vf1 = -vf bf1 = np.maximum(zeros_vect, vf1) uf1 = np.minimum(bf1, ones_vect) uf1 = np.squeeze(np.asarray(uf1)) if LA.norm(uf1) > 0: uf1 = uf1 / LA.norm(uf1) vt1 = uf1.T.dot(At).dot(uf1) wt1 = uf1.T.dot(uf1) if wt1 > 0: la1 = vt1.item() / (wt1 **2) else: la1 = 0 if la > la1: cur_intensities[k] = la vm[:, k] = uf.ravel() else: cur_intensities[k] = la1 vm[:, k] = uf1.ravel() contrib_max, contrib_max_index = cur_intensities.max(), cur_intensities.argmax() if contrib_max > ZERO_BOUND: lat = np.append(lat, eig_vals[eig_vals_pos[contrib_max_index]]) intensities = np.append(intensities, np.matrix([np.sqrt(contrib_max), contrib_max]), axis=0) # square root and value of lambda intensity of cluster_got # square root shows the value of fuzzyness uf = vm[:, contrib_max_index] vt = uf.T.dot(At).dot(uf) wt = uf.T.dot(uf) membership_matrix = np.append(membership_matrix, np.matrix(uf).T, axis=1) # calculate residual similarity matrix: # remove the present cluster (i.e. itensity* membership) from # similarity matrix Att = At - contrib_max * np.matrix(uf).T * np.matrix(uf) At = (Att + Att.T) / 2 matrix_sequence.append(At) curr_cont = (vt / wt) ** 2 # Calculate the relative contribution of cluster_got curr_cont /= scatter contrib = np.append(contrib, curr_cont) # Calculate the residual contribution res_cont -= curr_cont cluster_got += 1 else: is_positive = False if not is_positive: print('No positive weights at spectral clusters') elif curr_cont < min_cont: print('Cluster contribution is too small') elif res_cont < eps: print('Residual is too small') elif cluster_got > max_clust_num: print('Maximum number of clusters reached') return matrix_sequence, membership_matrix, contrib, intensities, lat, cluster_got
if __name__ == '__main__': M = np.matrix([[1, .5, .3, .1], [.5, 1, .98, .4], [.3, .98, 1, .6], [.1, .4, .6, 1 ]]) #M = np.matrix([[1, 0, 1], [0, 3, 0], [1, 0, 9]]) M = np.matrix(np.random.rand(500, 500)) B, member, contrib, intensity, lat, tt = faddis(M) print("B") print(B) print("member") print(member) print("contrib") print(contrib) print("intensity") print(intensity) print("lat") print(lat) print("tt") print(tt)