# k-means clustering
import numpy as np
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from matplotlib import pyplot
import pandas as pd
import neurokit2 as nk

# function to find the minimum length of the signalá
def get_minimum_length(data:np.ndarray, threshold:int = 80000):
    """
    Function finds the minimum length of the signal
    :param data: list of numpy arrays
    :param threshold: expected minimum length of the signal
    :return: ndarray of numpy array signals
    """
    min_length = threshold
    for x in data:
        length = x.shape[0]
        if length < min_length:
            min_length = length
    return min_length

# function to equally down sample the signal to the desired size
def down_sample(data:np.ndarray, size:int ):
    """
    Function to equally down sample the signal to the desired size
    :param size: desired size of the signal
    :return: ndarray of numpy array signals
    """
    down_sampled_data = []
    for x in data:
        # you can also use FFT method for more accurate down sampling of the signal
        down_sampled_x = nk.signal_resample(x, desired_length=size, method="interpolation")
        down_sampled_data.append(down_sampled_x)
    return np.array(down_sampled_data, dtype=np.float32)


# loading data from the .csv file and converting it to numpy array
df = pd.read_csv("../cognitive_load _analysis/smell_cognitive_load_analysis.csv")

eda = np.load("../cognitive_load _analysis/Hautleitfähigkeit_smell.npy", allow_pickle=True)
# down sampling the eda signal
eda = down_sample(eda, size=get_minimum_length(eda))

group = df["smell"].to_numpy()
hrv = np.load("../cognitive_load _analysis/HR_smell.npy", allow_pickle=True)
# down sampling the breathing signal
hrv = down_sample(hrv, size=get_minimum_length(hrv))

labels = df["intensity"].to_numpy()


# selecting a particular taste group , here we are selecting sweet taste group you can select any group
index = np.where(group == "beef")

# creating labels for given values
le = preprocessing.LabelEncoder()
le.fit(labels)

found_classes = le.classes_

transformed_classes = le.transform(labels)

# Here you can choose which parameters you want to use for clustering
# un comment the line to use the parameter for clustering
X = eda[index]
#X = hrv[index]
# you can use the combined values of eda and rsp
# X = np.hstack([eda[index], rsp[index]])

# define the model
model = KMeans(n_clusters=2, n_init=3, max_iter=1000, verbose=1, tol=1e-6, random_state=91058)
# fit the model
model.fit(X)
# assign a cluster to each example
predictions = model.predict(X)

acc = accuracy_score(transformed_classes[index], predictions)
print("Accuracy: %.2f" % (acc * 100))
# retrieve unique clusters