Implementation of the FRIS-STOLP algorithm (python 3)
The theory for this algorithm is given Hereand also aware Konstantin Vorontsov.
In this article we will only look at the implementation in Python 3.
The code can be copied in the same order “as is”.
We connect the necessary packages
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from math import floor, ceil # for rounding up and down
from scipy.stats import pearsonr
from sklearn import datasets
import pandas as pd
import math
from typing import Dict, Tuple, List
import copy
from random import randint
from scipy.stats import pearsonr
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
import matplotlib.cm as cm
from scipy.stats import pearsonr
import matplotlib.colors as matplotcolor
from sklearn.decomposition import PCA
from random import randint
import matplotlib.cm as cm
def Get_Class_Label_Of_Object(x):
return x[len(x)-1]
Method for calculating the distance between two objects
def Distance(x, y):
if len(x) != len(y):
return -1
sum = 0
for i in range(len(x)):
sum += (x[i]-y[i])*(x[i]-y[i])
return sum
Reading data from a file:
def PrepareData():
all_data = np.loadtxt(open("./wine_data.csv","r"), delimiter = ",", skiprows = 0, dtype = np.float64)
# load class labels from column 1
y_wine = all_data[:,0]
#print (y_wine)
# conversion of the class labels to integer-type array
y_wine = y_wine.astype(np.int64, copy = False)
# load the 14 features
X_wine = all_data[:,1:]
data = all_data[:, 6:8] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 6:9
x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
plt.figure(2, figsize = (16, 12))
plt.clf()
# Plot the training points
plt.scatter(data[:, 0], data[:, 1], c = y_wine, cmap = plt.cm.Set1, edgecolor="k")
plt.xlabel('Flavanoids')
plt.ylabel('Nonflavanoid phenols')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
uniq = [1 for x in range(len(data))]
cnt_of_uniq = 0
for i in range(len(data)):
for j in range(i+1,len(data)):
if data[i, 0] == data[j, 0] and data[i, 1] == data[j, 1]:
uniq[j] = 0
for i in range(len(data)):
if uniq[i] == 1:
cnt_of_uniq += 1
w, h = 3, cnt_of_uniq
m = [[0 for x in range(w)] for y in range(h)]
cur_cnt = 0
for i in range(len(data)):
if uniq[i] == 1:
for j in range(2):
m[cur_cnt][j] = data[i, j]
m[cur_cnt][2] = y_wine[i]-1
cur_cnt += 1
data = m
return data
data = PrepareData()
Normalizing data, displaying data on a graph, removing identical objects in the sample:
def Prepare_Iris_Data():
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, 2:4] # we only take the first two features.
y = iris.target
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize = (16, 12))
plt.clf()
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c = y, cmap = plt.cm.Set1, edgecolor="k")
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
uniq = [1 for x in range(len(X))]
cnt_of_uniq = 0
for i in range(len(X)):
for j in range(i+1,len(X)):
if X[i, 0] == X[j, 0] and X[i, 1] == X[j, 1]:
uniq[j] = 0
for i in range(len(X)):
if uniq[i] == 1:
cnt_of_uniq += 1
w, h = 3, cnt_of_uniq
m = [[0 for x in range(w)] for y in range(h)]
cur_cnt = 0
for i in range(150):
if uniq[i] == 1:
for j in range(2):
m[cur_cnt][j] = X[i, j]
m[cur_cnt][2] = y[i]
cur_cnt += 1
data = m
return data
iris_data = Prepare_Iris_Data()
FRIS-STOLP class:
class FRiS_STOLP:
EPS = 1E-7
# Найти ближайшего соседа
def Get_Nearest_Neighbour(self, x, Omega):
dist = [0]*len(Omega)
for i in range(len(Omega)):
dist[i] = Distance(x[0:len(x)-1], Omega[i][0:len(x)-1])
min_dist = min(dist)
for i in range(len(Omega)):
if dist[i] + self.EPS > min_dist and dist[i] - self.EPS < min_dist:
return Omega[i]
# Найти эталоны
def FindEtalon(self, X, Omega, Xl, defence_to_tolerance_ratio):
label = Get_Class_Label_Of_Object(X[0])
Efficiency = [0]*len(X)
Tolerance_denom = 0
for item in Xl:
if Get_Class_Label_Of_Object(item) != label:
Tolerance_denom += 1
if Tolerance_denom == 0:
Tolerance_denom = 1
NN_enemy_etalon_For_Defence = [list() for t in range(len(X))]
for p in range(len(X)):
item = X[p]
if Get_Class_Label_Of_Object(item) == label:
NN_enemy_etalon_For_Defence[p] = self.Get_Nearest_Neighbour(item, Omega)
NN_enemy_etalon_For_Tolerance = [list() for t in range(len(Xl))]
for p in range(len(Xl)):
item = Xl[p]
if Get_Class_Label_Of_Object(item) != label:
NN_enemy_etalon_For_Tolerance[p] = self.Get_Nearest_Neighbour(item, Omega)
for i in range(len(X)):
x = X[i]
sum_of_defence = 0
for j in range(len(X)):
item = X[j]
if item != x and Get_Class_Label_Of_Object(item) == label:
S = self.FRiS(NN_enemy_etalon_For_Defence[j], item, x)
sum_of_defence += S
Defence = sum_of_defence / (len(X)-1)
Tolerance_sum = 0
NN_enemy_etalon = []
NN_enemy_etalon = [list() for t in range(len(Xl))]
for j in range(len(Xl)):
item = Xl[j]
if Get_Class_Label_Of_Object(item) != label:
S = self.FRiS(x, item, NN_enemy_etalon_For_Tolerance[j])
Tolerance_sum += S
Tolerance = Tolerance_sum / Tolerance_denom
Efficiency[i] = defence_to_tolerance_ratio * Defence + (1.0 - defence_to_tolerance_ratio) * Tolerance
max_Efficiency = max(Efficiency)
for i in range(len(X)):
if Efficiency[i] + self.EPS > max_Efficiency and Efficiency[i] - self.EPS < max_Efficiency:
return X[i]
# Получить объекты i-го класса
def Get_Objects_From_Ith_Class(self, X, class_label):
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) == class_label:
cnt += 1
w, h = 4, cnt
res = [[0 for x in range(w)] for y in range(h)]
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) == class_label:
res[cnt] = obj
cnt += 1
return res
# --------------------------------------------------------------
# Получить "врагов" i-го класса
def Get_Enemies_Of_Ith_Class(self, X, class_label):
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) != class_label:
cnt += 1
w, h = 4, cnt
res = [[0 for x in range(w)] for y in range(h)]
cnt = 0
for i in range(len(X)):
obj = X[i]
if Get_Class_Label_Of_Object(obj) != class_label:
res[cnt] = obj
cnt += 1
return res
# --------------------------------------------------------------
def Truncate_Set(self, X, Y):
res = []
for i in range(len(X)):
is_exist = False
for j in range(len(Y)):
if X[i] == Y[j]:
is_exist = True
break
if is_exist == False:
res[len(res):] = [X[i]]
return res
# --------------------------------------------------------------
# FRIS-функция
def FRiS(self, a, x, c):
S_nom = math.sqrt(Distance(a[0:len(x)-1], x[0:len(x)-1])) - math.sqrt(Distance(x[0:len(x)-1], c[0:len(x)-1]))
S_denom = math.sqrt(Distance(a[0:len(x)-1], x[0:len(x)-1])) + math.sqrt(Distance(x[0:len(x)-1], c[0:len(x)-1]))
S = S_nom / S_denom
return S
# Метод "получить множество правильно классифицированных объектов"
def Construct_A_Set_Of_Correctly_Classified_Objects(self, Xl, Etalons):
res = []
for i in range(len(Xl)):
x = Xl[i]
label_of_cur_class = Get_Class_Label_Of_Object(x)
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[label_of_cur_class]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
nearest_neighbour_friend = self.Get_Nearest_Neighbour(x, Etalons[label_of_cur_class])
nearest_neighbour_enemy = self.Get_Nearest_Neighbour(x, Enemies_Etalons)
S = self.FRiS(nearest_neighbour_enemy, x, nearest_neighbour_friend)
if S > 0:
res[len(res):] = [x]
return res
# --------------------------------------------------------------
# Получить количество классов в выборке
def Get_Number_Of_Classes_In_Dataset(self, Xl):
len_of_obj = len(Xl[0])
labels = set()
for obj in Xl:
cur_label = obj[len_of_obj-1]
labels.add(cur_label)
return len(labels)
# Объекдинить элементы
def Merge_Elements(self, lst):
res = []
for i in range(len(lst)):
res += lst[i]
return res
# Классификация по одному ближайшему соседу
def Classify_By_1NN(self, u, Etalons):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(Etalons)
nn = self.Get_Nearest_Neighbour(u, Etalons)
return Get_Class_Label_Of_Object(nn)
# Получить эталоны
def Get_Etalons(self, Xl):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(Xl)
Etalons = [list() for i in range(number_of_classes)]
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
X_enemies = self.Get_Enemies_Of_Ith_Class(Xl, i)
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, X_enemies, Xl, 0.5)]
# print (Etalons)
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[i]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
Etalons[i] = []
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, Enemies_Etalons, Xl, 0.5)]
while len(Xl) > 0:
U = self.Construct_A_Set_Of_Correctly_Classified_Objects(Xl, Etalons)
Xl = self.Truncate_Set(Xl, U)
for i in range(3):
Xy = self.Get_Objects_From_Ith_Class(Xl, i)
if len(Xy) == 0:
continue
Enemies_Etalons = copy.deepcopy(Etalons)
del Enemies_Etalons[i]
Enemies_Etalons = self.Merge_Elements(Enemies_Etalons)
if len(Xy) == 1:
obj = Xy[0]
Etalons[i][len(Etalons[i]):] = [obj]
else:
Etalons[i][len(Etalons[i]):] = [self.FindEtalon(Xy, Enemies_Etalons, Xl, 0.5)]
return Etalons
def FRiS_Support_Classify(self, u, xl, k, draw_plot_if_two_dimentional, draw_lines_between_u_nn_and_supports):
number_of_classes = self.Get_Number_Of_Classes_In_Dataset(xl)
nn = [list() for t in range(number_of_classes)]
for i in range(number_of_classes):
Xy = self.Get_Objects_From_Ith_Class(xl, i)
nn[i] = self.Get_Nearest_Neighbour(u, Xy)
candidates_for_supporting = [list() for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
candidates_for_supporting[cur_class] = self.Get_Objects_From_Ith_Class(xl, cur_class)
candidates_for_supporting[cur_class].remove(nn[cur_class])
supporting_list = [list() for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
for it in range(k-1):
supporting_element = fris.Get_Nearest_Neighbour(nn[cur_class], candidates_for_supporting[cur_class])
candidates_for_supporting[cur_class].remove(supporting_element)
supporting_list[cur_class].append(supporting_element)
sum_of_fris = [0 for t in range(number_of_classes)]
for cur_class in range(number_of_classes):
for i in range(len(supporting_list[cur_class])):
sum_of_fris[cur_class] += self.FRiS(supporting_list[cur_class][i], nn[cur_class], u)
if draw_lines_between_u_nn_and_supports == True:
plt.plot([supporting_list[cur_class][i][0], nn[cur_class][0]], [supporting_list[cur_class][i][1], nn[cur_class][1]], color="yellow")
plt.plot([nn[cur_class][0], u[0]], [nn[cur_class][1], u[1]], color="yellow")
sum_of_fris[cur_class] /= len(supporting_list[cur_class])
if draw_lines_between_u_nn_and_supports == True:
plt.scatter([u[0]], [u[1]], c="green", s = 25)
label = sum_of_fris.index(max(sum_of_fris))
cmap_light = matplotlib.colors.ListedColormap(['red', 'green', '#AAAAFF'])
#---------------------------------------------------------
if len(u)-1 == 2 and draw_plot_if_two_dimentional == True:
cur_col="green"
if label == 0:
cur_col="red"
if label == 1:
cur_col="blue"
plt.scatter([u[0]], [u[1]], c = cur_col, s = 25)
x1_coord = []
x2_coord = []
y = []
for i in range(len(xl)):
x1_coord.append(xl[i][0])
x2_coord.append(xl[i][1])
y.append(xl[i][2])
plt.scatter(x1_coord, x2_coord, c = y, cmap = cmap_light, s = 25)
#---------------------------------------------------------
return label
Main program:
# Инициализация выборки
xl_0 = [[-1, 1, 0], [-1, 3, 0], [-1, 5, 0], [1, 1, 0], [1, 3, 0], [1, 5, 0], [3, 1, 0], [3, 3, 0], [3, 5, 0], [5, 1, 0], [5, 3, 0], [5, 5, 0]]
xl_1 = [[8, 3, 1], [7.8, 3, 1], [8.2, 3, 1], [8, 3.2, 1], [8, 2.8, 1], [7.9, 2.9, 1], [7.9, 3.1, 1], [8.1, 2.9, 1], [8.1, 3.1, 1]]
xl = xl_0 + xl_1
fris = FRiS_STOLP()
x_min = 5
y_min = 1
x_max = 9
y_max = 5
N = 30
M = 30
plt.figure(2, figsize = (16, 12))
dx = (x_max-x_min)/N
dy = (y_max-y_min)/M
classification_map = []
for i in range(N+1):
x = x_min + i * dx
for j in range(M+1):
y = y_min + j * dy
u = [x, y, -1]
classification_map.append(fris.FRiS_Support_Classify(u = u, xl = xl, k = 5, draw_plot_if_two_dimentional = True, draw_lines_between_u_nn_and_supports = False))
#----------------------------------------------------------
u = [6.0, 3.0, -1]
fris.FRiS_Support_Classify(u = u, xl = xl, k = 5, draw_plot_if_two_dimentional = False, draw_lines_between_u_nn_and_supports = True)
plt.show()
#----------------------------------------------------------
plt.figure(2, figsize = (16, 12))
N = 30
M = 30
dx = (x_max - x_min) / (N+1)
dy = (y_max - y_min) / (M+1)
xx, yy = np.arange(x_min, x_max, dx), np.arange(y_min, y_max, dy)
xv, yv = np.meshgrid(xx, yy)
cmap_light = matplotlib.colors.ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
c = np.empty(xv.shape, dtype = int)
for x_it in range(xv.shape[0]):
for y_it in range(yv.shape[0]):
c[y_it][x_it] = classification_map[x_it*yv.shape[0]+y_it]
plt.pcolormesh(xv, yv, c, cmap = cmap_light)
x1_coord = []
x2_coord = []
y = []
for i in range(len(xl)):
x1_coord.append(xl[i][0])
x2_coord.append(xl[i][1])
y.append(xl[i][2])
plt.scatter(x1_coord, x2_coord, c = y, cmap = plt.cm.Set1, s = 25)
plt.show()
# Создание объекта FRIS-STOLP
fris = FRiS_STOLP()
# Получили эталоны для выборки по всем классам
Etalons = fris.Get_Etalons(data)
all_data = np.loadtxt(open("./wine_data.csv","r"), delimiter = ",", skiprows = 0, dtype = np.float64)
# load class labels from column 1
y_wine = all_data[:,0]
#print (y_wine)
# conversion of the class labels to integer-type array
y_wine = y_wine.astype(np.int64, copy = False)
# load the 14 features
X_wine = all_data[:,1:]
data = all_data[:, 6:8] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 6:9
x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
plt.figure(2, figsize = (8, 6))
plt.clf()
# Plot the training points
plt.scatter(data[:, 0], data[:, 1], c = y_wine, cmap = plt.cm.Set1, edgecolor="k")
plt.xlabel('Flavanoids')
plt.ylabel('Nonflavanoid phenols')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
x_coord = []
y_coord = []
labels = []
Etalons = fris.Merge_Elements(Etalons)
for i in range(len(Etalons)):
x_coord.append(Etalons[i][0])
for i in range(len(Etalons)):
y_coord.append(Etalons[i][1])
#plt.plot(iris_data[119][0], iris_data[119][1], 'ro', color="g")
plt.plot(x_coord, y_coord, 'ro', color="b")
plt.show()
Creating a classification map:
def Create_Classification_Map(x_min, x_max, N, y_min, y_max, M, Etalons):
dx = (x_max - x_min)/N
dy = (y_max - y_min)/M
obj_list = [list() for t in range(N*M)]
for i in range(N):
x = x_min + i * dx
for j in range(M):
y = y_min + j * dy
u = [x, y, -1]
label = fris_for_iris.Classify_By_1NN(u, Etalons)
obj = [x, y, label]
obj_list[i*M+j] = obj
return obj_list
fris_for_iris = FRiS_STOLP()
Etalons_iris = fris_for_iris.Get_Etalons(iris_data)
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, 2:4] # we only take the first two features.
y = iris.target
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize = (10, 8))
plt.clf()
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c = y, edgecolor="k", alpha = 0.5)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.axis
x_coord = []
y_coord = []
labels = []
Etalons_iris = fris_for_iris.Merge_Elements(Etalons_iris)
classification_map = Create_Classification_Map(x_min, x_max, 100, y_min, y_max, 100, Etalons_iris)
x_coords_for_classification_map = []
y_coords_for_classification_map = []
c_for_classification_map = []
for i in range(len(classification_map)):
obj = classification_map[i]
label = Get_Class_Label_Of_Object(obj)
cur_col = "white"
if label == 0:
cur_col="red"
if label == 1:
cur_col="green"
if label == 2:
cur_col="yellow"
x_coords_for_classification_map.append(classification_map[i][0])
y_coords_for_classification_map.append(classification_map[i][1])
c_for_classification_map.append(label)
N = 100
M = 100
dx = (x_max - x_min) / N
dy = (y_max - y_min) / M
xx, yy = np.arange(x_min, x_max, dx), np.arange(y_min, y_max, dy)
xv, yv = np.meshgrid(xx, yy)
cmap_light = matplotlib.colors.ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
c = np.empty(xv.shape, dtype = int)
for x_it in range(xv.shape[0]):
for y_it in range(yv.shape[0]):
c[x_it][y_it] = c_for_classification_map[y_it*xv.shape[0]+x_it]
plt.pcolormesh(xv, yv, c, cmap = cmap_light)
for i in range(len(X)):
obj = X[i]
label = y[i]
cur_col = "white"
if label == 0:
cur_col="red"
if label == 1:
cur_col="green"
if label == 2:
cur_col="yellow"
plt.plot([obj[0]], [obj[1]], 'ro', color = cur_col, alpha = 1.0)
for i in range(len(Etalons_iris)):
x_coord.append(Etalons_iris[i][0])
for i in range(len(Etalons_iris)):
y_coord.append(Etalons_iris[i][1])
for i in range(len(Etalons_iris)):
shape="s"
obj = Etalons_iris[i]
label = Get_Class_Label_Of_Object(obj)
cur_col = "white"
if label == 0:
cur_col = "red"
if label == 1:
cur_col = "green"
if label == 2:
cur_col = "yellow"
plt.plot([x_coord[i]], [y_coord[i]], "ro", color = cur_col, markersize = 10, linestyle="-", linewidth = 5, markeredgecolor="black")
plt.show()