# k-means clustering import numpy as np from sklearn import preprocessing from sklearn.cluster import KMeans from sklearn.metrics import accuracy_score from matplotlib import pyplot import pandas as pd import neurokit2 as nk # function to find the minimum length of the signalá def get_minimum_length(data:np.ndarray, threshold:int = 80000): """ Function finds the minimum length of the signal :param data: list of numpy arrays :param threshold: expected minimum length of the signal :return: ndarray of numpy array signals """ min_length = threshold for x in data: length = x.shape[0] if length < min_length: min_length = length return min_length # function to equally down sample the signal to the desired size def down_sample(data:np.ndarray, size:int ): """ Function to equally down sample the signal to the desired size :param size: desired size of the signal :return: ndarray of numpy array signals """ down_sampled_data = [] for x in data: # you can also use FFT method for more accurate down sampling of the signal down_sampled_x = nk.signal_resample(x, desired_length=size, method="interpolation") down_sampled_data.append(down_sampled_x) return np.array(down_sampled_data, dtype=np.float32) # loading data from the .csv file and converting it to numpy array df = pd.read_csv("../cognitive_load _analysis/smell_cognitive_load_analysis.csv") eda = np.load("../cognitive_load _analysis/Hautleitfähigkeit_smell.npy", allow_pickle=True) # down sampling the eda signal eda = down_sample(eda, size=get_minimum_length(eda)) group = df["smell"].to_numpy() hrv = np.load("../cognitive_load _analysis/HR_smell.npy", allow_pickle=True) # down sampling the breathing signal hrv = down_sample(hrv, size=get_minimum_length(hrv)) labels = df["intensity"].to_numpy() # selecting a particular taste group , here we are selecting sweet taste group you can select any group index = np.where(group == "beef") # creating labels for given values le = preprocessing.LabelEncoder() le.fit(labels) found_classes = le.classes_ transformed_classes = le.transform(labels) # Here you can choose which parameters you want to use for clustering # un comment the line to use the parameter for clustering X = eda[index] #X = hrv[index] # you can use the combined values of eda and rsp # X = np.hstack([eda[index], rsp[index]]) # define the model model = KMeans(n_clusters=2, n_init=3, max_iter=1000, verbose=1, tol=1e-6, random_state=91058) # fit the model model.fit(X) # assign a cluster to each example predictions = model.predict(X) acc = accuracy_score(transformed_classes[index], predictions) print("Accuracy: %.2f" % (acc * 100)) # retrieve unique clusters