diff --git a/.idea/AIS_Regress.iml b/.idea/AIS_Regress.iml
index 8dc09e5476bcb840206461450ae44f23421d964a..74e2033a0a270c6e69b102c4cc7d107819aa6b2b 100644
--- a/.idea/AIS_Regress.iml
+++ b/.idea/AIS_Regress.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (BaseEnv)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index d1e22ecb89619a9c2dcf51a28d891a196d2462a0..e5f7f469df51294ab9ccfd5f85eee4b788eb4dfa 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (BaseEnv)" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
 </project>
\ No newline at end of file
diff --git a/Experiments/1-VariablesEvaluation.py b/Experiments/1-VariablesEvaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a57dcddb550d4b2f280328ac11da52efbb8dd66
--- /dev/null
+++ b/Experiments/1-VariablesEvaluation.py
@@ -0,0 +1,200 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.statistics_utils import get_pvalue, compute_basic_statistics, compute_bivariate_statistics
+from IO_utils.List_reader_utils import cross_check_dictionary, remove_features, get_all_dict_types, treat_missing_values
+from _utils.plot_utils import plot_mv, plot_distribution_categorical, plot_distribution_numerical, \
+    plot_significant_values
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+import yaml
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+
+
+# %% DATALOADIND
+def get_statistics(input_df, data_dictionary, output_dir='../../out/data_exploration/statistics'):
+    tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+
+    d, all_indices = cross_check_dictionary(input_df, data_dictionary, tables, output=[])
+
+    # Reorder dataframe according to data dictionary
+    reordered_df = input_df.reindex(columns=all_indices).drop(columns=['Id'])
+
+    types = []
+
+    all_df, all_missing_values, clean_keys = remove_features(reordered_df, p=1, exclude=[])
+
+    for t in tables:
+        types.extend(get_all_dict_types(data_dictionary[t], types=[]))
+
+    # Compute statistics of the selected tables
+    statistics = compute_basic_statistics(all_df)
+    statistics_bivariate = compute_bivariate_statistics(all_df, input_df['dmRS'], input_df['mortality'], types)
+
+    p_dmRS, methods = get_pvalue(all_df, input_df['dmRS'], types)
+    p_mortality, _ = get_pvalue(all_df, input_df['mortality'], types)
+    # p_shiftmRS, _ = get_pvalue(all_df, input_df['shift_mRS'], types)
+
+    statistics['p_dmRS'] = p_dmRS
+    statistics['p_mortality'] = p_mortality
+    # statistics['shift_mRS'] = p_shiftmRS
+    statistics['method'] = methods
+
+    statistics['missing_values'] = all_missing_values
+    statistics['Percentage (%)'] = (statistics['missing_values'] * 100. / input_df.shape[0]).to_list()
+    statistics['types'] = types
+
+    statistics_bivariate['p_dmRS'] = p_dmRS
+    statistics_bivariate['p_mortality'] = p_mortality
+
+    #plot_significant_values(statistics, value='p_mortality', th=0.01, out_dir=output_dir)
+    #plot_significant_values(statistics, value='p_dmRS', th=0.01, out_dir=output_dir)
+
+    #plot_significant_values(statistics, value='p_mortality', th=0.05, out_dir=output_dir)
+    #plot_significant_values(statistics, value='p_dmRS', th=0.05, out_dir=output_dir)
+
+    # Save file
+    file_path = os.path.join(output_dir, 'output.xlsx')
+    # assert os.path.isfile(file_path), 'File already exists'
+    with pd.ExcelWriter(file_path) as writer:
+        statistics.to_excel(writer, sheet_name='Sheet1')
+
+    # Save file
+    file_path_bivariate = os.path.join(output_dir, 'output_bivariate.xlsx')
+    # assert os.path.isfile(file_path), 'File already exists'
+    with pd.ExcelWriter(file_path_bivariate) as writer:
+        statistics_bivariate.to_excel(writer, sheet_name='Sheet1')
+
+    return statistics
+
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+output_features = ['dmRS', 'mRS90d', 'shift_mRS', 'mortality']
+tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+data_dicts = yaml.load(open("../dictionaries/dictionary_timepoints.yml"), Loader=yaml.Loader)
+dir_out = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration"
+
+# Output paths
+missing_values_path = os.path.join(dir_out, 'missing_values')
+statistics_path = os.path.join(dir_out, 'statistics')
+features_path = os.path.join(dir_out, 'features')
+
+# Get statistics without removing missing values etc
+statistics = get_statistics(clean_df, data_dicts,
+                            output_dir=statistics_path)
+
+#### Steps from List Reader
+selected_d, all_keys = cross_check_dictionary(clean_df, data_dicts, tables, output_features)
+reordered_df = clean_df.reindex(columns=all_keys)
+clean_df, _, clean_keys = remove_features(reordered_df, p=0.1, exclude=output_features)
+keys = [i for c, i in enumerate(all_keys) if not clean_keys[c]]
+for k in keys:
+    selected_d.pop(k)
+final_df = treat_missing_values(clean_df, method='median')
+FP = FeaturePreprocessing(final_df.drop(columns=['dmRS', 'mRS90d', 'shift_mRS', 'mortality']), selected_d)
+
+# Remove data dictionaries of removed features
+# Get more important features from lr and random forest
+
+for output_feature in ['dmRS', 'mortality']:
+    output_vector = final_df[output_feature].to_numpy(dtype=int).squeeze()
+    feature_vector = FP.create_features(final_df.drop(columns=['dmRS', 'mRS90d', 'shift_mRS', 'mortality']))
+    names = FP.get_feature_names()
+
+    rf = RandomForestClassifier(random_state=True, max_depth=5, n_estimators=100)
+    f = rf.fit(feature_vector, output_vector)
+    from sklearn.inspection import permutation_importance
+    result = permutation_importance(
+        f, feature_vector, output_vector, n_repeats=10, random_state=42, n_jobs=2
+    )
+    importance = result.importances_mean
+    importance_indices = np.argsort(importance)[::-1]
+
+    plt.figure(figsize=(15, 6))
+    x =[names[x] for x in importance_indices[0:20]]
+    plt.bar(x, importance[importance_indices[0:20]], yerr=result.importances_std[importance_indices[0:20]])
+    plt.xticks(rotation=15, ha='right')
+
+    plt.show()
+
+
+## Missing values
+missing_values = statistics['missing_values']
+plot_mv(missing_values, th=0.1 * clean_df.shape[0], out_dir=missing_values_path)
+
+with pd.ExcelWriter(os.path.join(missing_values_path, 'missing_values.xlsx')) as writer:
+    statistics[['missing_values', 'Percentage (%)']].to_excel(writer, float_format="%0.1f")
+
+if not os.path.isdir(os.path.join(features_path, 'Target')):
+    os.mkdir(os.path.join(features_path, 'Target'))
+## Target
+plot_distribution_categorical(clean_df, 'mRS90d', table=data_dicts['Output'],
+                              title='Distribution of mRS at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'dmRS', table=data_dicts['Output'],
+                              title='Distribution of functional outcome at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'shift_mRS', table=data_dicts['Output'],
+                              title='Distribution shift in mRS at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'mortality', table=data_dicts['Output'],
+                              title='Distribution of mortality at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+#### Table details
+tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+for t in tables:
+    keys_table = list(data_dicts[t].keys())
+    if not os.path.isdir(os.path.join(features_path, t)):
+        os.mkdir(os.path.join(features_path, t))
+
+    # Save individual statistics of each table
+    # with pd.ExcelWriter(os.path.join(statistics_path, 'statistics_{}.xlsx'.format(t))) as writer:
+    #    statistics_table = statistics.loc[keys_table, :]
+    #    statistics_table.to_excel(writer, float_format="%0.5f")
+
+    # Add target values to k for visualization and check that they are on the clean table
+    keys_table = keys_table + ['mRS90d', 'dmRS', 'shift_mRS', 'mortality']
+    keys_table = list(set(keys_table))
+    keys_table = [a for a in keys_table if a in clean_df.columns]
+    df_table = clean_df[keys_table]
+
+    # plot_correlation_features(df_table, os.path.join(statistics_path,
+    #                                                 'Correlation_{}_table.png'.format(t)))
+
+    for k in keys_table:
+        print(k)
+        if k in ['mRS90d', 'dmRS', 'shift_mRS', 'mortality']:
+            continue
+        p_values = [statistics.loc[k, target] for target in ['p_dmRS', 'p_mortality']]
+        type_k = data_dicts[t][k]['type']
+        if type_k in ['cat', 'ord']:
+            plot_distribution_categorical(clean_df, k,
+                                          table=data_dicts[t], title='Distribution {}'.format(k),
+                                          out_dir='C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration/features/{}'.
+                                          format(t),
+                                          p_values=p_values)
+
+        elif type_k in ['int', 'float']:
+            print('int')
+
+            plot_distribution_numerical(clean_df, k,
+                                        title='Distribution {}'.format(k),
+                                        out_dir='C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration/features/{}'
+                                        .format(t),
+                                        p_values=p_values)
+
+plt.close()
diff --git a/Experiments/2-VariableSelection.py b/Experiments/2-VariableSelection.py
new file mode 100644
index 0000000000000000000000000000000000000000..1205afa82d34c84727170cafb108f9347ff19082
--- /dev/null
+++ b/Experiments/2-VariableSelection.py
@@ -0,0 +1,162 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['dmRS'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+methods = ['all', 'p_value', 'random_forest', 'mrmr']
+#methods = ['mrmr']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for method in methods:
+
+    for k in [3, 5, 10, 15, 20, 30, 50]:
+    #for k in [5,10]:
+        if method == 'all' and k > 3:
+            continue
+
+        features = table.select_features(method=method, k=k, fold_indices=fold_indices)
+
+        feature_vector = table.final_df[features]
+        FP = FeaturePreprocessing(feature_vector, table.selected_d)
+        feature_vector = FP.create_features(feature_vector)
+
+        config = {'lr': 0.001,
+                  'momentum': 0,
+                  'weight_decay': 0.001,
+                  'layers': {'number': 3,
+                             'layer1': 80,
+                             'layer2': 20
+
+                             },
+                  'dropout': 0,
+                  'classification': True,
+                  'out_classes': 2}
+
+        config_graph = {
+            'Age': True,
+            'beta_Age': 2,
+            'Sex': False,
+            'pre-mRS': True,
+            'beta_mRS': 1,
+            'NIHSS': False,
+            'beta_NIHSS': 1
+
+        }
+        metrics_FCN = {}
+        metrics_LR = {}
+        metrics_RF = {}
+        metrics_MLP = {}
+        metrics_XGB = {}
+        metrics_Graph = {}
+        for f in range(5):
+            # FCN
+            ("Training FCN of fold {}".format(f))
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                           table.selected_d,load_images=False, meta=meta_vector,  one_hot=True)
+
+            dl = dataloader_fold.get_loaders()
+            dl_graph = dataloader_fold.build_graph(config_graph)
+
+            torch.manual_seed(0)
+            print('-----------------Training FCN--------------------- ')
+
+            result, model = train_model(config, loaders=dl)
+            metrics_FCN['Fold {0}'.format(f)] = result['val_metrics']
+            print('-------------------------------------- ')
+
+            # LR
+            print('-----------------Training LR--------------------- ')
+            _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                    dataloader_fold.test_features, dataloader_fold.train_output,
+                                    dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_LR['Fold {0}'.format(f)] = result_LR[1]
+            print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                    result_LR[1]['auc'],
+                                                                                                    result_LR[2][
+                                                                                                        'auc']))
+            print('-------------------------------------- ')
+
+            # RF
+            print('-----------------Training RF--------------------- ')
+
+            _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                               dataloader_fold.test_features, dataloader_fold.train_output,
+                                               dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_RF['Fold {0}'.format(f)] = result_RF[1]
+            print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                    result_RF[1]['auc'],
+                                                                                                    result_RF[2][
+                                                                                                        'auc']))
+            print('-------------------------------------- ')
+            # MLP
+            print('-----------------Training MLP--------------------- ')
+
+            _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                      dataloader_fold.test_features, dataloader_fold.train_output,
+                                      dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_MLP['Fold {0}'.format(f)] = result_MLP[1]
+            print(
+                "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                                  result_MLP[1]['auc'],
+                                                                                                  result_MLP[2]['auc']))
+            print('-------------------------------------- ')
+            # XGB Boost
+            print('-----------------Training XGB Boost--------------------- ')
+
+            _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_XGB['Fold {0}'.format(f)] = result_XGB[1]
+            print(
+                "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                                  result_XGB[1]['auc'],
+                                                                                                  result_XGB[2]['auc']))
+            print('-------------------------------------- ')
+            print('-----------------Training Graph--------------------- ')
+
+            result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+            metrics_Graph['Fold {0}'.format(f)] = result['val_metrics']
+            print('-------------------------------------- ')
+
+        if method == 'all':
+            Result_c.update('FCN', method, metrics_FCN)
+            Result_c.update('LR', method, metrics_LR)
+            Result_c.update('RF', method, metrics_RF)
+            Result_c.update('MLP', method, metrics_MLP)
+            Result_c.update('XGB', method, metrics_XGB)
+            Result_c.update('Graph', method, metrics_Graph)
+
+        else:
+            Result_c.update('FCN', method + '_' + str(k), metrics_FCN)
+            Result_c.update('LR', method + '_' + str(k), metrics_LR)
+            Result_c.update('RF', method + '_' + str(k), metrics_RF)
+            Result_c.update('MLP', method + '_' + str(k), metrics_MLP)
+            Result_c.update('XGB', method + '_' + str(k), metrics_XGB)
+            Result_c.update('Graph', method + '_' + str(k), metrics_Graph)
+
+        #Saves results on validation set
+        output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+        Result_c.save(output_dir=output_dir, name='Variable_selection_val')
diff --git a/Experiments/3-MVStrategy.py b/Experiments/3-MVStrategy.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b960eee7511441bf85a824df2765582eceb3421
--- /dev/null
+++ b/Experiments/3-MVStrategy.py
@@ -0,0 +1,150 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+strategies = [ 'median', 'knn', 'mice']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for s in strategies:
+
+    table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy=s,
+                        output_feature=['dmRS'])
+
+    output_vector = table.output_vector
+    meta_vector = table.meta_df
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+    features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+    feature_vector = table.final_df[features]
+    FP = FeaturePreprocessing(feature_vector, table.selected_d)
+    feature_vector = FP.create_features(feature_vector)
+
+    config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+    metrics_FCN = {}
+    metrics_LR = {}
+    metrics_RF = {}
+    metrics_MLP = {}
+    metrics_XGB = {}
+    metrics_Graph = {}
+
+    config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+    for f in range(5):
+        ("Training  fold {}".format(f))
+
+        # GCN
+        print('-----------------Training GCN--------------------- ')
+
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d, load_images=False,meta =meta_vector, one_hot=True)
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        metrics_Graph['Fold {0}'.format(f)] = result['test_metric']
+
+        # FCN
+        print('-----------------Training LR--------------------- ')
+
+        
+        dl = dataloader_fold.get_loaders()
+        torch.manual_seed(0)
+
+        result, model = train_model(config, loaders=dl)
+        metrics_FCN['Fold {0}'.format(f)] = result['test_metric']
+        # LR
+        print('-----------------Training LR--------------------- ')
+        _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                dataloader_fold.test_features, dataloader_fold.train_output,
+                                dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_LR['Fold {0}'.format(f)] = result_LR[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                result_LR[1]['auc'],
+                                                                                                result_LR[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+
+        # RF
+        print('-----------------Training RF--------------------- ')
+
+        _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_RF['Fold {0}'.format(f)] = result_RF[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                result_RF[1]['auc'],
+                                                                                                result_RF[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+        # MLP
+        print('-----------------Training MLP--------------------- ')
+
+        _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                  dataloader_fold.test_features, dataloader_fold.train_output,
+                                  dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_MLP['Fold {0}'.format(f)] = result_MLP[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                              result_MLP[1]['auc'],
+                                                                                              result_MLP[2]['auc']))
+        print('-------------------------------------- ')
+        # XGB Boost
+        print('-----------------Training XGB Boost--------------------- ')
+
+        _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                       dataloader_fold.test_features, dataloader_fold.train_output,
+                                       dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_XGB['Fold {0}'.format(f)] = result_XGB[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                              result_XGB[1]['auc'],
+                                                                                              result_XGB[2]['auc']))
+        print('-------------------------------------- ')
+
+    Result_c.update('FCN', s, metrics_FCN)
+    Result_c.update('LR', s, metrics_LR)
+    Result_c.update('RF', s, metrics_RF)
+    Result_c.update('MLP', s, metrics_MLP)
+    Result_c.update('XGB', s, metrics_XGB)
+
+    Result_c.update('Graph', s, metrics_Graph)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+Result_c.save(output_dir=output_dir, name='MVStrategy_selection_test_mrmr10')
diff --git a/Experiments/4-Timepoints.py b/Experiments/4-Timepoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6d15f96322d0c2c7c41a79d524d66c4d4262dff
--- /dev/null
+++ b/Experiments/4-Timepoints.py
@@ -0,0 +1,164 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import  train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = [
+        #['NCCT', 'CTP', 'CTA'],
+        #['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out'],
+        #['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT'],
+
+        #['Metadata', 'NCCT', 'CTP', 'CTA'],
+        #['Metadata', 'NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out'],
+        #['Metadata', 'NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT'],
+
+        #['Metadata'],
+        #['Clinical'],
+        #['Metadata', 'Clinical'],
+        #['Treatment', 'Treatment_out', 'Metadata', 'Clinical']]
+        ['NCCT', 'CTP', 'CTA', 'Metadata', 'Clinical'],
+        ['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Metadata', 'Clinical'],
+        ['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT', 'Metadata', 'Clinical']]
+
+
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for t in tables_modalities:
+
+    table = TableReader(input_df=clean_df, tables=t, data_dictionaries='modalities',
+                        mv_strategy='median',
+                        output_feature=['mortality'])
+
+    output_vector = table.output_vector
+    meta_vector = table.meta_df
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+    print(t)
+    features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+    feature_vector = table.final_df[features]
+    FP = FeaturePreprocessing(feature_vector, table.selected_d)
+    feature_vector = FP.create_features(feature_vector)
+
+    config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+    config_graph = {
+        'Age': True,
+        'beta_Age': 4,
+        'Sex': True,
+        'pre-mRS': False,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+    metrics_FCN = {}
+    metrics_LR = {}
+    metrics_RF = {}
+    metrics_MLP = {}
+    metrics_XGB = {}
+    metrics_Graph = {}
+    for f in range(5):
+        # GCN
+
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d, load_images=False, meta=meta_vector, one_hot=True)
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        metrics_Graph['Fold {0}'.format(f)] = result['test_metric']
+
+        
+        # FCN
+        ("Training FCN of fold {}".format(f))
+      
+        dl = dataloader_fold.get_loaders()
+        torch.manual_seed(0)
+
+        result, model = train_model(config, loaders=dl)
+        metrics_FCN['Fold {0}'.format(f)] = result['test_metric']
+        
+        # LR
+        print('-----------------Training LR--------------------- ')
+        _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                dataloader_fold.test_features, dataloader_fold.train_output,
+                                dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_LR['Fold {0}'.format(f)] = result_LR[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                result_LR[1]['auc'],
+                                                                                                result_LR[2][
+                                                                                         'auc']))
+        print('-------------------------------------- ')
+
+        # RF
+        print('-----------------Training RF--------------------- ')
+
+        _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_RF['Fold {0}'.format(f)] = result_RF[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                result_RF[1]['auc'],
+                                                                                                result_RF[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+        # MLP
+        print('-----------------Training MLP--------------------- ')
+
+        _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                  dataloader_fold.test_features, dataloader_fold.train_output,
+                                  dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_MLP['Fold {0}'.format(f)] = result_MLP[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                              result_MLP[1]['auc'],
+                                                                                              result_MLP[2]['auc']))
+        print('-------------------------------------- ')
+        # XGB Boost
+        print('-----------------Training XGB Boost--------------------- ')
+
+        _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                       dataloader_fold.test_features, dataloader_fold.train_output,
+                                       dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_XGB['Fold {0}'.format(f)] = result_XGB[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                              result_XGB[1]['auc'],
+                                                                                              result_XGB[2]['auc']))
+        print('-------------------------------------- ')
+
+    Result_c.update('FCN', '_'.join(t), metrics_FCN)
+    Result_c.update('LR', '_'.join(t), metrics_LR)
+    Result_c.update('RF', '_'.join(t), metrics_RF)
+    Result_c.update('MLP', '_'.join(t), metrics_MLP)
+    Result_c.update('XGB', '_'.join(t), metrics_XGB)
+    Result_c.update('Graph', '_'.join(t), metrics_Graph)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results'
+Result_c.save(output_dir=output_dir, name='Timepoints_K10_mortality')
\ No newline at end of file
diff --git a/Experiments/5-Uncertainty.py b/Experiments/5-Uncertainty.py
new file mode 100644
index 0000000000000000000000000000000000000000..5940be756ab10a5b6e4f11b8361a71aaa1b7a082
--- /dev/null
+++ b/Experiments/5-Uncertainty.py
@@ -0,0 +1,100 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+
+from evaluate_model import test, get_metrics_unc, plot_selectedsamples_metrics, plot_uncetainties
+from train import train_model
+import torch
+import os
+import  numpy as np
+# %% DATALOADIND
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['dmRS'])
+
+output_vector = table.output_vector
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+feature_vector = table.final_df[features]
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.01,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20,
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+########
+mean_preds = []
+combined = []
+epistemic = []
+cls = []
+results_pred= {}
+results_epis= {}
+for p in[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,0.1]:
+        results_pred[p]={}
+        results_epis[p] = {}
+
+for k in range(5):
+    dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k],
+                                       table.selected_d, one_hot=True)
+    dl = dataloader_fold.get_loaders()
+    fold_name = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/FCNEnsemble"
+
+    torch.manual_seed(0)
+    for i in range(10):
+
+        if not os.path.exists(fold_name):
+            os.mkdir(fold_name)
+
+        path_model = os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k))
+        if os.path.isfile(path_model):
+            continue
+        else:
+            print("Training model {} of fold {}".format(i, k))
+            _, model = train_model(config, loaders=dl)
+            torch.save(model, path_model)
+
+    state_dict_paths = [os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k)) for i in range(10)]
+
+    pred, unc, epistemic_unc, y = test(config, dl[2], state_dict_paths)
+    mean_preds.extend(pred.tolist())
+    combined.extend(unc.tolist())
+    epistemic.extend(epistemic_unc.tolist())
+    cls.extend(y.tolist())
+
+################### Test
+p = np.array(mean_preds)
+y = np.array(cls)
+c = np.array(combined)
+e = np.array(epistemic)
+
+# with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+    results_pred[per] = get_metrics_unc(c, p, y, per)
+
+plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+plot_uncetainties(p, y, c, e)
+import  matplotlib.pyplot as plt
+plt.show()
+
+
+
diff --git a/Experiments/6-Graph_constr.py b/Experiments/6-Graph_constr.py
new file mode 100644
index 0000000000000000000000000000000000000000..a37da69da6140a5198c232eaee670978069ddc71
--- /dev/null
+++ b/Experiments/6-Graph_constr.py
@@ -0,0 +1,116 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train_graph import train_model_graph
+from sklearn.model_selection import ParameterGrid
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = ['all_timepoints']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['Graph'])
+#### From all variables evaluate different variable selection methods
+
+
+
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=['dmRS'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+feature_vector = table.final_df[features]
+
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+metrics = {}
+
+
+grid_config_graph = [
+
+{
+    'Age': [True],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [True],
+    'beta_mRS': [1, 2],
+    'NIHSS': [False]
+},
+
+{
+    'Age': [True],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [False],
+    'NIHSS': [False]},
+
+{
+    'Age': [False],
+    'Sex': [True, False],
+    'pre-mRS': [True],
+    'beta_mRS': [1, 2],
+    'NIHSS': [False]},
+    {
+        'Age': [False],
+        'Sex': [True],
+        'pre-mRS': [False],
+        'NIHSS': [False]},
+
+]
+
+"""grid_config_graph = {
+    'Age': [True, False],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [True, False],
+    'beta_mRS': [1, 2],
+    'NIHSS': [True, False],
+    'beta_NIHSS': [1, 3, 5, 7, 10]
+}"""
+grid = ParameterGrid(grid_config_graph)
+
+for config_graph in grid:
+    print(config_graph)
+    for f in range(5):
+            # FCN
+            ("Training FCN of fold {}".format(f))
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                           table.selected_d,load_images=False, meta= meta_vector, one_hot=True)
+
+            loader = dataloader_fold.build_graph(config_graph)
+
+            torch.manual_seed(0)
+            result, model = train_model_graph(config, loaders=loader, indices=fold_indices[f])
+            metrics['Fold {0}'.format(f)] = result['val_metrics']
+            # LR
+
+            print('-------------------------------------- ')
+    import json
+    Result_c.update('Graph', json.dumps(config_graph), metrics)
+
+    output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+    Result_c.save(output_dir=output_dir, name='Graph_grid_validation_new')
diff --git a/Experiments/7-Graph_model.py b/Experiments/7-Graph_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3fa88cfb31e18960ca6e6105d899fac14c88e81
--- /dev/null
+++ b/Experiments/7-Graph_model.py
@@ -0,0 +1,82 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train_graph import train_model_graph
+from sklearn.model_selection import ParameterGrid
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = ['all_timepoints']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['Graph'])
+#### From all variables evaluate different variable selection methods
+
+
+output_feature = 'dmRS'
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=[output_feature])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+feature_vector = table.final_df[features]
+
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+metrics = {}
+
+config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+
+
+for f in range(5):
+        # FCN
+        ("Training FCN of fold {}".format(f))
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d,load_images=False, meta= meta_vector,
+                                       one_hot=True)
+
+        loader = dataloader_fold.build_graph(config_graph)
+
+        torch.manual_seed(0)
+        result, model = train_model_graph(config, loaders=loader, indices=fold_indices[f])
+        metrics['Fold {0}'.format(f)] = result['test_metric']
+        # LR
+
+        print('-------------------------------------- ')
+Result_c.update('Graph', 'Results', metrics)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results'
+Result_c.save(output_dir=output_dir, name='Graph_{}'.format(output_feature))
\ No newline at end of file
diff --git a/Experiments/8-Uncertainty_graph.py b/Experiments/8-Uncertainty_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..4635eca6fe8440473c84ed6404b440494e4b49b4
--- /dev/null
+++ b/Experiments/8-Uncertainty_graph.py
@@ -0,0 +1,145 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+
+from evaluate_model import test_graph, get_metrics_unc, plot_selectedsamples_metrics, plot_uncetainties, \
+    plot_age_uncertainty, plot_boxplots
+from train_graph import train_model_graph
+import torch
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import ParameterGrid
+
+# %% DATALOADIND
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['mortality'])
+
+output_vector = table.output_vector
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+feature_vector = table.final_df[features]
+metadata_vector = table.meta_df
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+"""config = {'lr': 0.001,
+          'momentum': 0,
+          'weight_decay': 0.001,
+          'layers': {'number': 5,
+                     'layer1': 96,
+                     'layer2': 32,
+                     'layer3': 8,
+                     'layer4': 4
+
+                     },
+          'dropout': 0.2,
+          'classification': True,
+          'out_classes': 2}"""
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+########
+mean_preds = []
+combined = []
+epistemic = []
+cls = []
+ages = []
+results_pred = {}
+results_epis = {}
+for p in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+    results_pred[p] = {}
+    results_epis[p] = {}
+
+for k in range(5):
+
+    dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k],
+                                   table.selected_d, load_images=False, meta=metadata_vector,
+                                   one_hot=True)
+    ages_fold = dataloader_fold.meta['Age'].values[fold_indices[k][2]]
+
+    """config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 3
+
+    }"""
+    config_graph = {
+        'Age': True,
+        'beta_Age': 4,
+        'Sex': True,
+        'pre-mRS': False,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 3
+
+    }
+    fold_name = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/EdgeDropout3_mortality"
+    loader = dataloader_fold.build_graph(config_graph=config_graph)
+    torch.manual_seed(0)
+    for i in range(10):
+
+        if not os.path.exists(fold_name):
+            os.mkdir(fold_name)
+
+        path_model = os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k))
+        if os.path.isfile(path_model):
+            continue
+        else:
+            print("Training model {} of fold {}".format(i, k))
+            _, model = train_model_graph(config, loaders=loader, indices=fold_indices[k])
+            torch.save(model, path_model)
+
+    state_dict_paths = [os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k)) for i in range(10)]
+
+    pred, unc, epistemic_unc, y = test_graph(config, loader, fold_indices[k][2], state_dict_paths)
+
+    ages.extend(ages_fold.tolist())
+    mean_preds.extend(pred.tolist())
+    combined.extend(unc.tolist())
+    epistemic.extend(epistemic_unc.tolist())
+    cls.extend(y.tolist())
+
+################### Test
+p = np.array(mean_preds)
+y = np.array(cls)
+c = np.array(combined)
+e = np.array(epistemic)
+a = np.array(ages)
+
+# with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+for th in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1,0]:
+    results_pred[th] = get_metrics_unc(c, p, y, th)
+
+
+
+plot_boxplots(p,y,uncertainty=c)
+#plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+#plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+#plot_uncetainties(p, y, c, e)
+#plot_age_uncertainty(p, y, c, a)
+
+plt.show()
diff --git a/Experiments/9-Images.py b/Experiments/9-Images.py
new file mode 100644
index 0000000000000000000000000000000000000000..677d4a6f88dd2ccea4f016eb50972d14fb449573
--- /dev/null
+++ b/Experiments/9-Images.py
@@ -0,0 +1,55 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=6)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['mortality'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+ids_vector = table.patient_ids
+
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+## Load the images given by fold indices
+
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['3DCNN'])
+
+#### From all variables evaluate different variable selection methods
+
+for f in range(5):
+        # FCN
+        print("Training FCN of fold {}".format(f))
+        """
+        #dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+        #                                table.selected_d,meta_vector,  one_hot=True)
+
+        dl = dataloader_fold.get_loaders()
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+       
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        print('-------------------------------------- ')
+
+
+        Result_c.update('Graph', method + '_' + str(k), metrics_Graph)
+"""
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+Result_c.save(output_dir=output_dir, name='Variable_selection_Graph_mort_all')
diff --git a/Experiments/_t_test.py b/Experiments/_t_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..90bf206aca5bee2210df7e5dfc1a0fd707ac54c0
--- /dev/null
+++ b/Experiments/_t_test.py
@@ -0,0 +1,62 @@
+import scipy.stats
+import numpy as np
+
+
+
+
+def compute_t_value(mean_base, sd_base, means, sds, samples ):
+    for i in range(len(means)):
+        mean_2 = means[i]
+        sd_2 = sds[i]
+        t1 = (mean_base - mean_2)
+        t2 = (np.power(sd_base, 2) + np.power(sd_2, 2)) * (1 / samples)
+        t2 = np.sqrt(t2)
+
+        t =- t1 / t2
+
+        p = 2 * min(scipy.stats.t.cdf(t, samples), scipy.stats.t.cdf(-t, samples))
+        print('Paired t-test of {}+-{} against {}+-{} : p_value: {}'.format(mean_base, sd_base,
+                                                                        mean_2, sd_2,
+                                                                        p))
+
+
+
+
+samples = [160, 220, 274, 249]
+
+
+######### Calcaneus
+mean_base = 8.46
+means = [9.12, 8.69]
+sd_base = 0.63
+sds = [1.03, 1.23]
+
+print('Calceaneus ')
+compute_t_value(mean_base, sd_base, means, sds, samples[0])
+
+############## Ankle
+mean_base = 6.32
+means = [9.02, 5.86]
+sd_base = 0.25
+sds = [1.59, 0.40]
+
+print('Ankle')
+compute_t_value(mean_base, sd_base, means, sds, samples[1])
+
+############ Knee
+mean_base = 6.8
+means = [7.79, 6.49]
+sd_base = 0.55
+sds = [0.53, 1.05]
+
+print('Knee')
+compute_t_value(mean_base, sd_base, means, sds, samples[2])
+
+###########  Wrist
+mean_base = 7.85
+means = [9.59, 9.93]
+sd_base = 0.94
+sds = [2.15, 1.41]
+
+print('Wrist')
+compute_t_value(mean_base, sd_base, means, sds, samples[3])
\ No newline at end of file
diff --git a/IO_utils/Dataloader.py b/IO_utils/Dataloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..18339b7faeab4bd3d29616f6207b60ecebc3c191
--- /dev/null
+++ b/IO_utils/Dataloader.py
@@ -0,0 +1,63 @@
+from torch.utils.data import DataLoader
+from torch_geometric.loader import DataLoader as graph_Dataloader
+from IO_utils.Datasets import MyDataset, Graph_Dataset
+
+
+class MyDataLoader:
+
+    def __init__(self, feature, output, fold_indices, selected_d, load_images=False, patient_ids=None,
+                 meta=None, batch_size=None, one_hot=False):
+        train_indices, val_indices, test_indices = fold_indices
+
+        self.features = feature
+        self.output = output
+        self.meta = meta
+        self.one_hot = one_hot
+        self.patient_ids = patient_ids
+        self.load_images = load_images
+
+        self.batch_size = batch_size if batch_size is not None else int(0.8 * feature.shape[0])
+
+        train_dataset = MyDataset(feature, output, train_indices,
+                                  data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.train_loader = DataLoader(dataset=train_dataset,
+                                       batch_size=self.batch_size,
+                                       shuffle=True,
+                                       num_workers=0)
+
+        val_dataset = MyDataset(feature, output, val_indices,
+                                data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.val_loader = DataLoader(dataset=val_dataset,
+                                     batch_size=self.batch_size,
+                                     num_workers=0)
+
+        test_dataset = MyDataset(feature, output, test_indices,
+                                 data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.test_loader = DataLoader(dataset=test_dataset,
+                                      batch_size=self.batch_size,
+                                      num_workers=0)
+
+        self.train_features, self.train_output = feature[train_indices, :], output[train_indices].squeeze()
+        self.test_features, self.test_outout = feature[test_indices, :], output[test_indices].squeeze()
+        self.val_features, self.val_outout = feature[val_indices, :], output[val_indices].squeeze()
+
+    def get_loaders(self):
+        return self.train_loader, self.val_loader, self.test_loader
+
+    def build_graph(self, config_graph):
+        # print(df_feature)
+        train_dataset = Graph_Dataset(self.meta,
+                                      self.features, self.output, config_graph, self.one_hot)
+
+        train_loader = graph_Dataloader(dataset=train_dataset.get(),
+                                        batch_size=self.batch_size,
+                                        num_workers=0)
+
+        return train_loader
+
+
+
+
diff --git a/IO_utils/Datasets.py b/IO_utils/Datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ac784292687f45b572fdc30a401ad4b4851dd1
--- /dev/null
+++ b/IO_utils/Datasets.py
@@ -0,0 +1,140 @@
+import torch
+import numpy as np
+from torch_geometric.data import Data
+from scipy.spatial.distance import correlation
+
+class MyDataset:
+
+    def __init__(self, features, labels, indices, data_dictionaries, one_hot=False,
+                 images= False, patient_ids=None):
+
+        self.features = features[indices, :]
+        self.labels = labels[indices]
+        self.patient_indices = patient_ids[indices, 0] if patient_ids is not None else None
+        self.data_dictionaries = data_dictionaries
+        self.one_hot = one_hot
+        self.images = images
+        self.patients_ids = patient_ids
+
+        self.volumes = {}
+        if self.images:
+
+            self.load_images()
+
+    def load_images (self):
+        #Load images
+        for i in range(len(self.patient_indices)):
+            #Load volume
+            self.volumes[i]= {}
+            for modality in ['NCCT', 'CTP']:
+                #Load modality
+                self.volumes[i][modality]=None
+
+
+    def __len__(self):
+        return self.features.shape[0]
+
+    def __getitem__(self, i):
+        # Get features
+        features = self.features[i, :]
+        x = torch.from_numpy(features.squeeze()).float()
+        if self.one_hot:
+            label = np.zeros((np.max(self.labels + 1)))
+            label[self.labels[i]] = 1
+            y = torch.from_numpy(label)
+        else:
+            y = torch.from_numpy(np.array(self.labels[i])).float()
+        data = {'x': x,
+                'y': y
+                }
+
+        if self.images:
+            for m in ['NCCT', 'CTP']:
+                data[m]= self.volumes[i][m]
+
+
+
+        return data
+
+class Graph_Dataset:
+
+    def __init__(self, meta, features, labels, config_graph, one_hot):
+        self.meta = meta
+        self.features = features
+        self.labels = labels
+
+        x = torch.tensor(features, dtype=torch.float)
+
+        if one_hot:
+            labels_onehot = np.zeros((labels.shape[0], len(np.unique(labels))))
+            for i in range(labels.shape[0]):
+                labels_onehot[i, labels[i]] = 1
+            y = torch.tensor(labels_onehot, dtype=torch.float)
+        else:
+            y = torch.tensor(labels, dtype=torch.float)
+
+        self.edge_index, self.weights = self.create_graph(config_graph)
+
+        self.data = Data(x=x, y=y, edge_index=self.edge_index, weights=self.weights, labels=labels)
+        print("Number of nodes ", self.data.num_nodes)
+        print("Number of features ", self.data.num_node_features)
+        print("Number of edges ", self.data.num_edges)
+        print("Graph contain isolated nodes ", self.data.contains_isolated_nodes())
+        # self.plot_graph()
+
+    def len(self):
+        return self.meta.shape[0]
+
+    def get(self):
+
+        return [self.data]
+
+    def create_graph(self, config_graph):
+
+        v1 = []
+        v2 = []
+
+        nodes = self.meta.shape[0]
+        weights = []
+
+        for i in range(nodes):
+            for ii in range(nodes):
+                if i != ii:
+                    condition_Age = np.abs(self.meta['Age'].values[i] - self.meta['Age'].values[ii]) < \
+                                    config_graph['beta_Age'] if config_graph['Age'] else True
+                    condition_Sex = self.meta['Sex'].values[i] == self.meta['Sex'].values[ii] \
+                        if config_graph['Sex'] else True
+                    condition_mRS = (self.meta['pre-mRS'].values[i] - self.meta['pre-mRS'].values[ii]) < \
+                                    config_graph['beta_mRS'] if config_graph['pre-mRS'] else True
+                    condition_NIHSS = (self.meta['NIHSS'].values[i] - self.meta['NIHSS'].values[ii]) < \
+                                      config_graph['beta_NIHSS'] if config_graph['NIHSS'] else True
+
+                    if condition_Age and condition_Sex and condition_mRS and condition_NIHSS:
+                        v1.append(i)
+                        v2.append(ii)
+                        dist = correlation(self.features[i, :], self.features[ii, :])
+                        if np.isnan(dist):
+                            dist = 1.0
+                        weights.extend([dist])
+        for i in range(nodes):
+            if i not in v1:
+                #print('Node {0} not connected'.format(i))
+                #print('Age {0}, Sex {1}, pre-mRS {2} and NIHSS {3} '.format(self.meta['Age'].values[i],
+                #                                                            self.meta['Sex'].values[i],
+                #                                                            self.meta['pre-mRS'].values[i],
+                #                                                            self.meta['NIHSS'].values[i]))
+                #
+                corr = [correlation(self.meta.iloc[i, :], self.meta.iloc[ii, :]) for ii in range(nodes)]
+                sorted_corr = np.argsort(corr)
+                for j in range(1):
+                    v1.append(i)
+                    v2.append(sorted_corr[j+1])
+                    weights.extend([corr[sorted_corr[j+1]]])
+                    v2.append(i)
+                    v1.append(sorted_corr[j+1])
+                    weights.extend([corr[sorted_corr[j+1]]])
+
+        edge_index = torch.tensor([v1, v2], dtype=torch.long)
+        weight_vector = torch.tensor(weights, dtype=torch.float)
+
+        return edge_index, weight_vector
diff --git a/IO_utils/FeaturePreprocessing.py b/IO_utils/FeaturePreprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..d31fbbd509a46e31c4be2c4df8f161dc8e9ba42a
--- /dev/null
+++ b/IO_utils/FeaturePreprocessing.py
@@ -0,0 +1,109 @@
+import copy
+import numpy as np
+
+class FeaturePreprocessing:
+    """
+    Normalize numerical values and transform to one-hot vector encoding categorical values
+    :param df: Input data frame
+    :param data_dictionaries: dictionaries with information about the features
+    :param exclude: columns that have to be excluded from the feature vector
+    :return:
+    """
+
+    def __init__(self, df, data_dictionaries, exclude=[]):
+
+        exclude_vector = copy.copy(exclude)
+        exclude_vector.extend(['Id'])
+        feature_vector = []
+
+        self.features = [i for i in df.columns if i not in exclude_vector]
+        self.min = [df[column].min() for column in df.columns if column != 'Id']
+        self.max = [df[column].max() for column in df.columns if column != 'Id']
+        self.data_dictionaries = data_dictionaries
+        # print('### Reading table(s)')
+        # print('### {} Features: {}'.format(len(features), features))
+
+    def create_features(self, df):
+        # Iterate over columns of dataframe and process them according to its data
+        feature_vector = []
+        self.feature_vector_names = []
+        for count, column in enumerate(self.features):
+
+            d = self.data_dictionaries[column]
+            if d['type'] in ['cat', 'ord']:
+
+                ordered = True if d['type'] == 'ord' else False
+                output = cat_to_one_hot(df[column], d, ordered=ordered)
+                for k in list(d['description'].values()):
+                    self.feature_vector_names.extend([column + '#' + k])
+            elif d['type'] in ['int', 'float']:
+
+                output = df[column] * 0 if self.max[count] == self.min[count] else \
+                    (df[column] - self.min[count]) / (self.max[count] - self.min[count])
+                output = np.reshape(output.to_numpy(), (output.shape[0], 1))
+
+                self.feature_vector_names.extend([column])
+
+            else:
+                raise ValueError('Column {} not included in the features'.format(column))
+
+            # print(feature_vector_names)
+            feature_vector.extend([output])
+
+            final_feature_vector = np.concatenate(feature_vector, axis=1)
+
+            # Saved all the features to an excel table
+            # features_df = pd.DataFrame(data=final_feature_vector, index = df['Id'], columns=feature_vector_names)
+            # new_dir = '../out/test/Features.xlsx'
+            # features_df.to_excel(new_dir, columns=feature_vector_names, index=True)
+        return final_feature_vector
+
+    def get_feature_names(self):
+        return self.feature_vector_names
+
+def cat_to_one_hot(features, dictionary, ordered=False):
+    """
+    Method that transform categorical variables into one-hot vector
+
+    :param features: Feature vector size n x 1
+    :param dictionary: Dictionary with fields 'info', 'categories', and 'description'
+    :return: one-hot feature vector of size nxc where m are the number of classes
+    """
+    feature_vector = copy.copy(features)
+    # Number of patients
+    patients = feature_vector.shape[0]
+    # Number of Categories
+    categories = int(dictionary['categories'])
+    assert isinstance(categories, int), 'Categories in data dictionary should be integer'
+    # Description of the categories
+    description = dictionary['description']
+
+    # Create the feature vector size patients x categories
+    one_hot_vector = np.zeros((patients, categories))
+
+    # Normal case where the feature categories are given
+    if description != 'None':
+
+        # Check that the number of categories matches
+        assert categories == len(
+            description.keys()), '{}: categories and its description does not match in data dictionary'
+
+        # If the keys given in the description are not [0, 1, ..., n] replace the values in the feature vector
+        expected_keys = list(range(categories))
+        k = list(map(int, list(description.keys())))
+        if expected_keys != k:
+            for i in range(categories):
+                feature_vector = feature_vector.replace(k[i], expected_keys[i])
+
+    # Get one hot vector for each patient
+    for count, i in enumerate(feature_vector.index):
+        try:
+            if ordered:
+                one_hot_vector[count, :int(feature_vector[i])] = 1
+            else:
+                one_hot_vector[count, int(feature_vector[i])] = 1
+
+        except ValueError:
+            print('{} cannot be converted to int'.format(feature_vector[i]))
+
+    return one_hot_vector
\ No newline at end of file
diff --git a/IO_utils/List_Reader.py b/IO_utils/List_Reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..f77e6cc5f9616121671f70521ccf9ad3e5d31d16
--- /dev/null
+++ b/IO_utils/List_Reader.py
@@ -0,0 +1,125 @@
+import copy
+import yaml
+import os
+
+from IO_utils.List_reader_utils import cross_check_dictionary, remove_features, treat_missing_values, \
+    get_all_dict_types, save_df
+from IO_utils.mv_strategies import select_features_pvalue, select_features_RF, select_features_RFE, \
+    combined_selection, select_features_MRMR
+from IO_utils.statistics_utils import get_pvalue, compute_basic_statistics
+
+
+class TableReader(object):
+
+    def __init__(self, input_df, tables, data_dictionaries='timepoints', mv_strategy='median',
+                 output_feature=None):
+
+
+        ROOT_DIR = "C:/Users/martinca1/PhD/Projects/AI_Stroke/AIS_Regress"
+        dir_data_dicts = "dictionaries/dictionary_modalities.yml" if data_dictionaries == 'modalities' else \
+            "dictionaries/dictionary_timepoints.yml"
+
+        dir_data_dicts = os.path.join(ROOT_DIR, dir_data_dicts)
+        self.data_dicts = yaml.load(open(dir_data_dicts), Loader=yaml.Loader)
+        self.output_feature = copy.copy(output_feature)
+
+        # Check that the output feature is one of the possible options
+        self.output_features = ['mRS90d', 'shift_mRS', 'dmRS', 'mortality']
+        assert self.output_feature[0] in self.output_features, 'Output feature should be one of: ' \
+                                                               'mRS90d, shift_mRS, dmRS, mortality and is' \
+                                                               ' {} '.format(self.output_feature[0])
+
+        if tables[0] == 'all_timepoints':
+            self.tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+
+        else:
+            self.tables = tables
+        assert len(tables) == len(set(tables)), 'Tables list contains repeated element'
+
+        # --------------------------------- 1- Select the tables ----------------------------------------------
+        # Check that all elements from the selected tables are in the data frame and retrieve the corresponding
+        # data dictionaries and the indices from all the tables including the output indices
+        # All the possible outputs are included
+        self.selected_d, all_keys = cross_check_dictionary(input_df, self.data_dicts, self.tables,
+                                                           self.output_features)
+
+        # Select columns given by tables and and reorder dataframe to match data dictionaries order
+        self.reordered_df = input_df.reindex(columns=all_keys)
+
+        # -----------------------  2- Remove features with more than 10% missing values-----------------------
+        clean_df, missing_values, clean_keys = remove_features(self.reordered_df, p=0.1, exclude=self.output_features)
+
+        # Remove data dictionaries of removed features
+        keys = [i for c, i in enumerate(all_keys) if not clean_keys[c]]
+        for k in keys:
+            self.selected_d.pop(k)
+
+        # ----------------------------- 3  Handle missing values ------------------------------------------------
+        # Apply the missing value strategy
+        self.final_df = treat_missing_values(clean_df, method=mv_strategy)
+        self.meta_df = input_df.loc[self.final_df.index, ['Age', 'Sex', 'pre-mRS', 'NIHSS']]
+
+        self.patient_ids = input_df.loc[self.final_df.index, ['Id']]
+        #  ---------------------------- 5 Set output  --------------------------------------
+        # This is used for fold split
+        self.output_vector = self.final_df[self.output_feature[0]].to_numpy(dtype=int).squeeze()
+
+
+    def select_features(self, method='all', k=10, fold_indices=None):
+
+        # ----------------------------- Remove output features ------------------------------------------------
+        features_df = copy.copy(self.final_df)
+        features_df = features_df.drop(columns=self.output_features)
+        output_vector = copy.copy(self.final_df[self.output_feature[0]])
+
+        if method == 'all':
+            selected_features = features_df.columns.tolist()
+
+        elif method == 'p_value':
+            selected_features = select_features_pvalue(features_df, self.selected_d, output_vector, fold_indices,
+                                                       k=k)
+        elif method == 'random_forest':
+            selected_features = select_features_RF(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'rfe':
+            selected_features = select_features_RFE(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'combined':
+            selected_features = combined_selection(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'mrmr':
+            selected_features = select_features_MRMR(features_df, self.selected_d, output_vector, fold_indices, k=k)
+        else:
+            raise ValueError("Not implemented")
+
+        return selected_features
+
+    def get_statistics(self, output_dir='../out/data_exploration/statistics', output_name='all_statistics.xlsx'):
+
+        # Reorder dataframe according to data dictionary
+        reordered_df = self.reordered_df.drop(columns=['Id'])
+        types = []
+        all_df, all_missing_values, clean_keys = remove_features(reordered_df, p=1, exclude=[])
+
+        tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+        for t in tables:
+            types.extend(get_all_dict_types(self.data_dicts[t], types=[]))
+
+        # Compute statistics of the selected tables
+        statistics = compute_basic_statistics(all_df)
+        p_dmRS, methods = get_pvalue(all_df, all_df['dmRS'], types)
+        p_mortality, _ = get_pvalue(all_df, all_df['mortality'], types)
+        # p_shiftmRS, _ = get_pvalue(all_df, input_df['shift_mRS'], types)
+
+        statistics['p_dmRS'] = p_dmRS
+        statistics['p_mortality'] = p_mortality
+        # statistics['shift_mRS'] = p_shiftmRS
+        statistics['method'] = methods
+
+        statistics['missing_values'] = all_missing_values
+        statistics['Percentage (%)'] = (statistics['missing_values'] * 100. / all_df.shape[0]).to_list()
+        statistics['types'] = types
+
+        save_df(statistics, output_dir=output_dir, name=output_name, overwrite=True)
+
+        return statistics
diff --git a/IO_utils/List_reader_utils.py b/IO_utils/List_reader_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..57599c412af9a7e3a4933023887c125449468e64
--- /dev/null
+++ b/IO_utils/List_reader_utils.py
@@ -0,0 +1,108 @@
+import numpy as np
+import os
+import pandas as pd
+import sklearn
+from sklearn.experimental import enable_iterative_imputer
+from sklearn.impute import KNNImputer, IterativeImputer
+def cross_check_dictionary(input_df, data_dictionary, tables, output):
+    d = {'Id': {}}
+    all_indices = ['Id']
+    for t in tables:
+        indices = list(data_dictionary[t].keys())
+        for i in indices:
+            d[i] = data_dictionary[t][i]
+        all_indices.extend(indices)
+
+    # Check that all the features from the data dictionary are in the table
+    columns_df = input_df.columns
+    for index in all_indices:
+        if index not in columns_df and index != 'Id':
+            raise ValueError('Feature {} from data dictionary is not in data frame '.format(index))
+
+    for o in output:
+        if o not in all_indices:
+            all_indices.extend([o])
+
+    return d, all_indices
+
+def treat_missing_values(df, method):
+    if method == 'drop':
+
+        final_df = df.dropna()
+    elif method == 'median':
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        final_df = final_df.fillna(df.median())
+
+    elif method == 'knn':
+        imputer =KNNImputer(n_neighbors=5, weights="uniform")
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        imputer.fit(final_df)
+        final_df[final_df.columns] = imputer.transform(final_df.to_numpy())
+
+    elif method == 'mice':
+        imputer = IterativeImputer(random_state=0,
+                                                  estimator=sklearn.tree.DecisionTreeRegressor(max_features='sqrt',
+                                                                                               random_state=0))
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        imputer.fit(final_df)
+        final_df[final_df.columns] = imputer.transform(final_df.to_numpy())
+    else:
+        raise ValueError("{} mv strategy not  implemented ")
+
+    return final_df
+
+def remove_features(df, p, exclude):
+    """
+    Remove columns from a dataframe with more than p*100% missing values
+    :param df: Pandas dataframe
+    :param p: Percentage of missing values
+    :param exclude: Columns to exclude
+    :return: Clean dataframe, number of missing values of the original df and the indices of the selected columns
+    """
+
+    # print('### Removing features with more than {}% of missing values'.format(p * 100))
+    patients = df.shape[0]
+
+    # Get indices of features with more than 10% of missing values
+    missing_values = np.array(df.isnull().sum(axis=0))
+    selected_features_idx = (missing_values <= (p * patients))
+    # Get indices of features to exclude
+    idx_exclude = [df.columns.get_loc(i) for i in exclude]
+    selected_features_idx[idx_exclude] = True
+    # Select the features from the original df
+    final_df = df[df.columns[selected_features_idx]]
+
+    # Print information about the removed features
+    removed_columns = df.columns[[not i for i in selected_features_idx]]
+    removed_number = missing_values[[not i for i in selected_features_idx]]
+    for i in range(removed_columns.shape[0]):
+        print("### {} - Feature {} removed with {} missing  ({:.2f} %)".format(i, removed_columns[i],
+                                                                               removed_number[i],
+                                                                               (removed_number[i] / patients) * 100))
+    return final_df, missing_values, selected_features_idx
+
+
+def get_all_dict_types(nested_dictionary, types=[]):
+    for key, value in nested_dictionary.items():
+
+        if type(value) is dict:
+            get_all_dict_types(value, types=types)
+        if key == 'type':
+            types.extend([value])
+        else:
+            continue
+
+    return types
+
+def save_df(df, output_dir, name='output.xlsx', overwrite=False, file_format='Excel'):
+    file_path = os.path.join(output_dir, name)
+    if not overwrite:
+        assert os.path.isfile(file_path), 'File already exists'
+
+    if file_format == 'Excel':
+        with pd.ExcelWriter(file_path) as writer:
+            df.to_excel(writer, sheet_name='Sheet1')
+
diff --git a/IO_utils/__pycache__/Dataloader.cpython-37.pyc b/IO_utils/__pycache__/Dataloader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e09a70de9bc92e145ca06daa4644ac277fbf28f
Binary files /dev/null and b/IO_utils/__pycache__/Dataloader.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/Datasets.cpython-37.pyc b/IO_utils/__pycache__/Datasets.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00eb6a5eee63c0a4b4d6db939d63add7517a2947
Binary files /dev/null and b/IO_utils/__pycache__/Datasets.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc b/IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89e3f62800b5a6305c10bb2c37cf7c1b8079f7b8
Binary files /dev/null and b/IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/List_Reader.cpython-37.pyc b/IO_utils/__pycache__/List_Reader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..163df555b090f376793b1f44a6ed072895a93f63
Binary files /dev/null and b/IO_utils/__pycache__/List_Reader.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/List_reader_utils.cpython-37.pyc b/IO_utils/__pycache__/List_reader_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e354aae7e8356f0dd5e32343905b3449e000d779
Binary files /dev/null and b/IO_utils/__pycache__/List_reader_utils.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/clean_table.cpython-37.pyc b/IO_utils/__pycache__/clean_table.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a0b33d4af8f0f7e35599a5f2b3e483631a1cb92
Binary files /dev/null and b/IO_utils/__pycache__/clean_table.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/mv_strategies.cpython-37.pyc b/IO_utils/__pycache__/mv_strategies.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c082ebb7d9f04c45bba16f774987e8536198cb6f
Binary files /dev/null and b/IO_utils/__pycache__/mv_strategies.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/split_utils.cpython-37.pyc b/IO_utils/__pycache__/split_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a492db74b2ce1f97daca711e86495299dd7ca9f
Binary files /dev/null and b/IO_utils/__pycache__/split_utils.cpython-37.pyc differ
diff --git a/IO_utils/__pycache__/statistics_utils.cpython-37.pyc b/IO_utils/__pycache__/statistics_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4e069e116665318b8fc45e73187d04699eba9aa
Binary files /dev/null and b/IO_utils/__pycache__/statistics_utils.cpython-37.pyc differ
diff --git a/IO_utils/clean_table.py b/IO_utils/clean_table.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf39a3040a8e82d06d9d5371c470c133b141d712
--- /dev/null
+++ b/IO_utils/clean_table.py
@@ -0,0 +1,321 @@
+import pandas as pd
+import numpy as np
+
+
+def clean_table(excel_dir, pre_mRS=2):
+    """
+    This method creates a clean list processing parameter by parameter in the original list to create
+    a list that can be directly processed and save it
+    :param excel_dir: path of the excel list
+    :return:
+    """
+    print('###############################################################')
+    print('##############Cleaning table ##################################')
+    print('###############################################################')
+
+    input_df = pd.read_excel(excel_dir)
+    # Remove first row (info about notation) and last row (empty)
+    input_df = input_df.drop([0, input_df.shape[0] - 1], axis='rows')
+    print('######## Initial table with {} patients and {} feature'.format(
+        input_df.shape[0], input_df.shape[1]))
+    # Middle cerebral stroke selection
+    Territoium = input_df['Territoium']
+    list_territoium = [Territoium[Territoium == i].count() for i in [0, 1, 2]]
+    print(
+        '######## Anterior circulation: {}, Middle circulation {}, Posterior circulation: {}'.format(*list_territoium))
+    mc_df = input_df[input_df['Territoium'] == 1]
+    mc_df = mc_df.drop(['Territoium'], axis=1)
+
+    print('######## Select {} patients with middle circulation stroke'.format(mc_df.shape[0]))
+
+    mc_df = mc_df[mc_df['mRS90d'].notna()]
+    print('######## Select {} patients with valid output'.format(mc_df.shape[0]))
+    # Remove patients with shift in mRS <0
+    shift = mc_df['mRS90d'] - mc_df['pre-mRS']
+    mc_df = mc_df[shift >= 0]
+
+    print('######## Select {} patients with non-negative shift'.format(mc_df.shape[0]))
+    # Remove patients with pre-mRS > 2
+    mc_df = mc_df[mc_df['pre-mRS'] <= pre_mRS]
+
+    print('######## Select {} patients with mRS < {}'.format(mc_df.shape[0], pre_mRS))
+    # Remove patients not treated with either trhombectomy or stenting
+    #rm_indices = mc_df[(mc_df['Stenting '] == 0) & (mc_df['Thrombektomie'] == 0)]
+    #print('######## Remove {} patients not treated with either stenting or thrombectomy'.format(rm_indices.shape[0]))
+    mc_df = mc_df[(mc_df['Stenting '] == 1) | (mc_df['Thrombektomie'] == 1)]
+    # Meta data: id, age and sex
+    print('######################################## 1- Meta data preprocessing ...')
+    mc_df = clean_meta_data(mc_df)
+
+    # NCCT values preprocessing
+    print('######################################## 2 - NCCT values preprocessing ...')
+    mc_df = clean_ncct_data(mc_df)
+
+    # CTP values preprocessing
+    print('######################################## 3 - CTP values preprocessing ...')
+    mc_df = clean_ctp_data(mc_df)
+
+    ##### CTA preprocessing
+    print('######################################## 4 - CTA values preprocessing ...')
+    mc_df = clean_cta_data(mc_df)
+
+    ###### Treatment preprocessing
+    print('######################################## 5 - Treatment values preprocessing ...')
+    mc_df = clean_treatment_data(mc_df)
+
+    #### Control CT preprocessing
+    print('######################################## 6 - Control CT values preprocessing ...')
+    mc_df = clean_ControlCT_data(mc_df)
+
+    #### Preprocessing clinical data
+    print('######################################## 7 - Clinical values preprocessing ...')
+    mc_df = clean_clinical_data(mc_df)
+
+    #### Remove dates
+    print('######################################## 8 - Remove dates ...')
+    mc_df = remove_dates(mc_df)
+    print('######## Clean table with {} patients and {} features'.format(mc_df.shape[0], mc_df.shape[1]))
+
+    # mc_df.to_excel(out_dir, columns=mc_df.columns, index=False)
+
+    print('###############################################################')
+    print('###############################################################')
+
+    return mc_df
+
+
+def remove_dates(mc_df):
+    mc_df = mc_df.drop(['Geburtsdatum', 'Aufnahmedatum UKER', 'Beginn Angio', 'Zeitpunkt Rekanalisation',
+                        'Puncture-to-Rekan', 'Zeitpunkt Verlaufs-CT \n(post 24h)', 'Zeit von Rekan bis Kontrolle',
+                        'Puncture-to-Rekan', 'Zeit von Bildgebung bis Rekan', 'Zeitpunkt 1. Bildgebung',
+                        'Symptom onset'], axis=1)
+    return mc_df
+
+
+def clean_meta_data(mc_df):
+    # Rename columns names
+    mc_df = mc_df.rename(columns={'Nummer': 'Id', 'Sex (0=männlich, 1=weiblich': 'Sex', 'Alter': 'Age'})
+
+    # Check that ages are correct
+    assert np.floor(mc_df['Age']).equals(
+        np.floor((mc_df['Aufnahmedatum UKER'] - mc_df['Geburtsdatum']) / np.timedelta64(1, 'Y')))
+
+    # Replace float Age by int Age
+    print('#### 1.1- Convert Age to int')
+    mc_df['Age'] = np.floor(mc_df['Age'])
+
+    # Remove modality of first image (all are CT)
+    mc_df = mc_df.drop(['1. Bildgebung UKER', '1. Bildgebung CT'], axis=1)
+
+    return mc_df
+
+
+def clean_ncct_data(mc_df):
+    mc_df = mc_df.rename(columns={'e-ASPECTS bzw pc-ASPECTS': 'e-ASPECTS',
+                                  'Volumen e-ASPECTS': 'Volume e-ASPECTS',
+                                  'IVCgesund': 'IVC gesund',
+                                  'ICV-Index': 'ICV Index',
+                                  'Ort Gefäßverschluss \nbei Aufnahme (proximalster)': 'Vessel Occlusion Location Admission',
+                                  'Seite Gefäßverschluss \nbei Aufnahme': 'Vessel Occlusion Side Admission',
+                                  'Symptomatische/vorgeschaltete Gefäßstenose bei Aufnahme': 'Vessel Stenosis',
+                                  'Ort symptomatische/vorgeschaltete Gefäßstenose bei Aufnahme': 'Vessel Stenosis Location',
+                                  'Gefäßdissektion bei Aufnahme': 'Arterial Dissection',
+                                  'Ort Gefäßdissektion': 'Arterial Dissection Location',
+                                  'ASPECTS oberfl/tief': 'ASPECTS oberfl_tief'
+                                  })
+
+    ASPECT_areas = ['C_br', 'IC_br', 'INS_br', 'L_br', 'M1_br', 'M2_br', 'M3_br', 'M4_br', 'M5_br', 'M6_br']
+    for area in ASPECT_areas:
+        mc_df[area] = replace_values(mc_df[area], [9, 'n/a'], [0, np.nan])
+
+    deep_areas = ['C_br', 'IC_br', 'L_br']
+    superficial_areas = ['INS_br', 'M1_br', 'M2_br', 'M3_br', 'M4_br', 'M5_br', 'M6_br']
+    for index, patient in mc_df.iterrows():
+        deep_stroke = patient[deep_areas].values
+        superficial_stroke = patient[superficial_areas].values
+        if any(deep_stroke) == 1:
+            if any(superficial_stroke) == 1:
+                a = 2
+            else:
+                a = 1
+
+        elif any(superficial_stroke) == 1:
+            if any(deep_stroke) == 1:
+                a = 2
+            else:
+                a = 0
+        else:
+            a = 3
+
+        mc_df.loc[index, 'ASPECTS oberfl_tief'] = a
+
+    mc_df['e-ASPECTS'] = replace_values(mc_df['e-ASPECTS'], [11], [np.nan])
+
+    mc_df['pc-ASPECTS'] = replace_values(mc_df['pc-ASPECTS'], [0], [np.nan])
+    mc_df['Volume e-ASPECTS'] = replace_values(mc_df['Volume e-ASPECTS'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_ctp_data(mc_df):
+    # Two patient with missing CTP,
+    # One -only patient with a image missing+ missing mRS+ a lot of data (posterior circulation)
+    # Second also posterior circulation
+    # mc_df['1. Bildgebung CT-P'] = replace_values(mc_df['1. Bildgebung CT-P'], ['n/a', 0], [np.nan, np.nan])
+    # mc_df = mc_df.drop(mc_df[mc_df['1. Bildgebung CT-P'] == np.nan].index)
+    # Remove CTP - if using posterior circulation I have to modify this
+    mc_df = mc_df.drop(['1. Bildgebung CT-P'], axis=1)
+
+    mc_df = mc_df.rename(columns={'Mismatch Volumen nach RAPID': 'Mismatch Volume',
+                                  'Mismatch Ratio nach RAPID': 'Mismatch Ratio',
+                                  'HypoperfusionIndex (Tmax10s/&max6.5s': 'Hypoperfusion Index',
+                                  'CBV Index (rCBV in Tmax>6': 'CBV Index',
+                                  'CBF <30% lesion volume': 'CBF_lower30_volume',
+                                  'Tmax >6s lesion volume': 'Tmax_greater6s_volume'
+                                  })
+
+    list_pct_features = ['CBF_lower30_volume', 'Tmax_greater6s_volume', 'Mismatch Volume',
+                         'Hypoperfusion Index', 'CBV Index']
+
+    for feature in list_pct_features:
+        mc_df[feature] = replace_values(mc_df[feature], ['n/a'], [np.nan])
+
+    # Infinite value is not replaced
+    # mc_df['Mismatch Ratio'] = replace_values(mc_df['Mismatch Ratio'], ['n/a', 'none'], [np.nan, np.nan])
+    print('#### 3.1 - Calculate inverse mismatch ratio - avoid infinite')
+    mc_df['Inverse Mismatch Ratio'] = mc_df['CBF_lower30_volume'] / replace_values(mc_df['Tmax_greater6s_volume'],
+                                                                                   [0], [0.001])
+
+    print('#### 3.2 - Remove Mismatch Ratio ')
+    mc_df = mc_df.drop(['Mismatch Ratio'], axis=1)
+    # Delete values from Brainomix, only in patient from 2018
+    print('#### 3.3 - Remove features obtained with Brainomix (only in patients from 2018) ')
+    mc_df = mc_df.drop(['Braino CBF<30%', 'Braino Tmax >6s', 'Braino Mismatch vol', 'Braino Hypoperfindex'], axis=1)
+
+    return mc_df
+
+
+def clean_cta_data(mc_df):
+    # All patients have CTA - remove
+    mc_df = mc_df.drop(['Akute DSA '], axis=1)
+
+    mc_df = mc_df.rename(columns={'Gefäßverschluss DSA ': 'Vessel Occlusion CTA'
+                                  })
+
+    # Remove empty column
+    mc_df = mc_df.drop(['TTP lesion volume'], axis=1)
+    print('#### 4.1 - Remove tandem stenosis feature- no patient with it')
+    # Remove empty column
+    mc_df = mc_df.drop(['Tandemstenose'], axis=1)
+
+    # Replace n/a
+    for feature in ['Tan Score', 'Coves Score', 'BATMAN', 'Clot Burden Score']:
+        mc_df[feature] = replace_values(mc_df[feature], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_treatment_data(mc_df):
+    mc_df = mc_df.rename(columns={'Thrombektomie': 'Thrombectomy',
+                                  'ggf. weiteres Device': 'PTA',
+                                  'Anzahl der Manöver': 'Number Maneuver',
+                                  'Puncture-to-Rekan in min.': 'Time_Puncture_to_Recan.',
+                                  'frustrane Rekanalisation': 'Frustrated Recanalization',
+                                  'Gefäßverschluss nach Rekanalisation': 'Vessel Occlusion after Recan.',
+                                  'TICI ': 'TICI',
+                                  'Lyse i.a.': 'Lysis i.a.',
+                                  'Lysemenge': 'Lysis quantity',
+                                  'Gefäßverschluss in neuem Versorgungsgebiet \n(während/nach Intervention)': 'Vessel Occlusion new SupplyArea',
+                                  'Lokalisation Gefäßverschluss': 'Vessel Occlusion new SupplyArea Location',
+                                  'Behandlung (des neuen Gefäßverschlusses)': 'Vessel Occlusion new SupplyArea Treatment',
+                                  'Infarkt in neuem Versorgungsgebiet': 'Infarct new SupplyArea',
+                                  'Stenting ': 'Stenting'
+                                  })
+
+    mc_df['Device'] = replace_values(mc_df['Device'], [0], [np.nan])
+    mc_df['PTA'] = replace_values(mc_df['PTA'], [np.nan], [0])
+    mc_df['Time_Puncture_to_Recan.'] = replace_values(mc_df['Time_Puncture_to_Recan.'], ['#ZAHL!'], [np.nan])
+    mc_df['Lysis i.a.'] = replace_values(mc_df['Lysis i.a.'], [9], [0])
+    mc_df['Lysis quantity'] = replace_values(mc_df['Lysis quantity'], [np.nan, 'n/a'], [0, 0])
+    mc_df['Infarct new SupplyArea'] = replace_values(mc_df['Infarct new SupplyArea'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_ControlCT_data(mc_df):
+    # Missing in 5 patients: 4 died? during recanalization, 1 because new recanalization was performed
+    # Remove empty column
+
+    mc_df = mc_df.rename(columns={'Zeit von Rekan bis Kontrolle in min.': 'Time_Recan_to_Control',
+                                  'Zeit von Bildgebung bis Rekan in min': 'Time_CT_to_Angio.',
+                                  'lakuärer Infarkt (keine Kortexbeteiligung und Infarkt ≤1,5cm '
+                                  '(≤2,0cm falls mittels DWI in MRT gemessen) im größten Durchmesser f'
+                                  'alls ausschließlich subkortikal)': 'Lacunar Infarct',
+                                  'Infarktvolumen Verlaufs-CT': 'Infarct Volume ControlCT',
+                                  'Hyperdense Mediazeichen': 'Hyperdense Media Sign',
+                                  'pc-Aspect verlaufss-CT': 'pc-Aspect ControlCT',
+                                  'Aspect Verlaufs-CT': 'Aspect ControlCT',
+                                  })
+
+    mc_df = mc_df.drop(['Dauer der Rekan'], axis=1)
+    mc_df['Time_CT_to_Angio.'] = replace_values(mc_df['Time_CT_to_Angio.'], ['#WERT!'], [np.nan])
+
+    mc_df['Infarct Volume ControlCT'] = replace_values(mc_df['Infarct Volume ControlCT'], ['n/a'], [np.nan])
+    mc_df['pc-Aspect ControlCT'] = replace_values(mc_df['pc-Aspect ControlCT'], ['n/a', 0], [np.nan, np.nan, ])
+    mc_df['Aspect ControlCT'] = replace_values(mc_df['Aspect ControlCT'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_clinical_data(mc_df):
+    #
+    mc_df = mc_df.rename(columns={'Symptom onset / LSN': 'Symptom onset',
+                                  'unknown onset(1=onset unbekannt, LSN angegeben)': 'Unknown Onset'
+                                  })
+
+    #
+
+    # Create dichotomized functional outome
+
+    print('#### 7.1 - Add dichotomized mRS')
+    mc_df['dmRS'] = [1 if i > 2 else (np.nan if np.isnan(i) else 0) for i in mc_df['mRS90d']]
+    print('#### 7.2 - Add  mRS shift')
+    mc_df['shift_mRS'] = mc_df['mRS90d'] - mc_df['pre-mRS']
+    print('#### 7.3 - Add mortality')
+    mc_df['mortality'] = [1 if i == 6 else (np.nan if np.isnan(i) else 0) for i in mc_df['mRS90d']]
+
+    mc_df['Unknown Onset'] = replace_values(mc_df['Unknown Onset'], [np.nan], [1])
+    mc_df['Zeitpunkt 1. Bildgebung'] = pd.to_datetime(mc_df['Zeitpunkt 1. Bildgebung'])
+    mc_df['Time_Onset_to_Admission'] = (mc_df['Zeitpunkt 1. Bildgebung'] - mc_df['Symptom onset']).dt.seconds / 60
+    #mc_df.loc[ mc_df['Unknown Onset']==1,'Time_Onset_to_Admission'] = -1
+
+    # Handle unknown times
+    # u = (mc_df['Unknown Onset'] == 1).values
+    # mean = np.nanmedian(mc_df['Time_Onset_to_Admission'][u])
+    # mc_df['Time_Onset_to_Admission'] = replace_values(mc_df['Time_Onset_to_Admission'], [np.nan], [mean])
+
+    return mc_df
+
+
+def replace_values(column, original_values=[], target_values=[]):
+    assert len(original_values) == len(target_values), 'Length of both lists have to be the same'
+
+    for (i, j) in zip(original_values, target_values):
+        column.replace(i, j, inplace=True)
+
+    return column
+
+
+def get_values(column):
+    unique_values = column.unique()
+    values = []
+    number = []
+
+    for i in unique_values:
+        values.extend([i])
+        if pd.isna(i):
+            number.extend([column.isna().sum()])
+        else:
+            number.extend([column[column == i].count()])
+
+    print(values, number)
diff --git a/IO_utils/mv_strategies.py b/IO_utils/mv_strategies.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac266c8690073b1023fae5a1abfa4edce7f7d398
--- /dev/null
+++ b/IO_utils/mv_strategies.py
@@ -0,0 +1,258 @@
+import copy
+from mrmr import mrmr_classif
+import numpy as np
+import pandas as pd
+from sklearn.feature_selection import RFE
+from sklearn.ensemble import RandomForestClassifier
+
+from IO_utils.statistics_utils import get_pvalue
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+
+
+def select_features_pvalue(df, data_dictionaries, output_vector, fold_indices, k=10):
+    # Get types of features -- different methods to compute p value
+    types = []
+    for t in data_dictionaries.keys():
+        if t not in ['Id']:
+            types.extend([data_dictionaries[t]['type']])
+    all_p_values = np.zeros((df.shape[1] - 1))
+    for indices in fold_indices:
+        _, _, test_indices = indices
+        drop_indices = df.index[test_indices]
+        df_copy = copy.copy(df.drop(drop_indices))
+
+        p_values, _ = get_pvalue(df_copy.drop(columns=['Id']), output_vector, types)
+        all_p_values += np.array(p_values)
+
+    #sorted_indices = np.argsort(all_p_values)
+    #sorted_values = np.sort(all_p_values)
+    #for i, v in zip(sorted_indices, sorted_values):
+    #    print(df.columns[i + 1], 'P_value: {} '.format(v))
+
+    smaller_indices = np.argsort(all_p_values)[:k]
+    indices = [True if a in smaller_indices else False for a in range(df.shape[1] - 1)]
+    indices = [False] + indices
+    selected_features = df.columns[indices]
+
+    print("Features selected (p-value): {} ".format(selected_features.values))
+
+    return selected_features
+
+
+def select_features_RF(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_imp = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=100, random_state=0)
+        clf.fit(features, output)
+        all_features_imp = all_features_imp + clf.feature_importances_
+
+    # plt.figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_imp, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=False)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # plt.show()
+    print("Features selected (Ranfom-forest): {} ".format(features))
+
+    return features
+
+
+def select_features_RFE(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=100, random_state=0)
+        rfe = RFE(estimator=clf, n_features_to_select=k, step=1)
+        rfe.fit(features, output)
+        all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=True)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+
+    # plt.show()
+    print("Features selected (RFE): {} ".format(features))
+
+    return features
+
+
+def combined_selection(df, data_dictionaries, output_vector, fold_indices, k=10):
+    # Get types of features -- different methods to compute p value
+
+    ### 1 - Select k*2 features by p value
+    types = []
+    for t in data_dictionaries.keys():
+        if t not in ['Id']:
+            types.extend([data_dictionaries[t]['type']])
+    all_p_values = np.zeros((df.shape[1] - 1))
+    for indices in fold_indices:
+        _, _, test_indices = indices
+        drop_indices = df.index[test_indices]
+        df_copy = copy.copy(df.drop(drop_indices))
+
+        p_values, _ = get_pvalue(df_copy.drop(columns=['Id']), output_vector, types)
+        all_p_values += np.array(p_values)
+
+    sorted_indices = np.argsort(all_p_values)
+    sorted_values = np.sort(all_p_values)
+    for i, v in zip(sorted_indices, sorted_values):
+        print(df.columns[i + 1], 'P_value: {} '.format(v))
+
+    smaller_indices = np.argsort(all_p_values)[:k * 2]
+    indices = [True if a in smaller_indices else False for a in range(df.shape[1] - 1)]
+    indices = [False] + indices
+    selected_features = df.columns[indices]
+
+    print("Features selected (p-value): {} ".format(selected_features.values))
+
+    global all_featuress_imp
+    df_reduced = df[selected_features]
+    FP = FeaturePreprocessing(df_reduced, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=5, max_depth=5, random_state=0)
+        rfe = RFE(estimator=clf, n_features_to_select=k, step=1)
+        rfe.fit(features, output)
+        all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=True)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+
+    # plt.show()
+    print("Features selected (RFE): {} ".format(features))
+
+    return features
+
+
+def select_features_MRMR(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        selected_features = mrmr_classif(features, output, K=k)
+        for counter, i in enumerate(selected_features):
+            all_features_ranking[i] += len(selected_features) - counter
+        #
+        # all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=False)
+    for next_feature, next_value in zip(sorted_value.index, sorted_value.values):
+
+        feature = next_feature.split('#')[0]
+        # print(feature,  next_value)
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+    # plt.show()
+    print("Features selected (MRMR): {} ".format(features))
+
+    return features
+
+
+def select_features_MRMR_fold(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+    all_features = []
+
+    for i, indices in enumerate(fold_indices):
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        selected_features = mrmr_classif(features, output, K=k)
+        names = FP.get_feature_names()
+        selecte_features_names = [names[j].split('#')[0] for j in selected_features]
+        s = [f for f in df.columns if f in selecte_features_names]
+        all_features.append(s)
+
+        print("Features selected in fold {} (MRMR): {} ".format(i, s))
+
+    return all_features
diff --git a/IO_utils/split_utils.py b/IO_utils/split_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcd345d7c96964e38e012b83e1afdde2c56ff9ea
--- /dev/null
+++ b/IO_utils/split_utils.py
@@ -0,0 +1,63 @@
+import copy
+import random
+import numpy as np
+
+def split_data_cv(output, seed, cv=5, p=1):
+    np.random.seed(seed)
+    random.seed(seed)
+    classes = np.unique(output)
+    indices_per_class = [np.where(output == c)[0] for c in classes]
+
+    # Make train and test splits with same number of classes
+    p_split = np.ones(cv) * 1 / cv
+    all_indices = split_stratified(p_split, classes, indices_per_class)
+    folds_indices = []
+
+    for fold in range(cv):
+        cv_test_indices = copy.copy(all_indices[fold])
+        cv_train_folds = copy.copy(all_indices)
+        cv_train_folds.pop(fold)
+
+
+        cv_train_val_indices = [item for sublist in cv_train_folds for item in sublist]
+        cv_train_val_indices_array = np.array(cv_train_val_indices)
+
+        output_train_val = output[cv_train_val_indices]
+        indices_train_val_per_class = [cv_train_val_indices_array[np.where(output_train_val == c)[0].tolist()] for c in classes]
+        train_val_all_indices = split_stratified((0.8, 0.2), classes, indices_train_val_per_class)
+
+        #random.shuffle(cv_train_val_indices)
+        #sp = int(0.8 * len(cv_train_val_indices))
+        #cv_train_indices = cv_train_val_indices[:sp]
+        #cv_train_indices = cv_train_indices[:(int(p * len(cv_train_indices)))]
+        #cv_val_indices = cv_train_val_indices[sp:]
+
+        cv_train_indices = copy.copy(train_val_all_indices[0])
+        cv_val_indices = copy.copy(train_val_all_indices[1])
+
+        folds_indices.append([cv_train_indices, cv_val_indices, cv_test_indices])
+
+    return folds_indices
+
+def split_stratified(p_split, classes, indices_per_class):
+    t = [0] * len(classes)
+    indices_split = []
+
+    for c in (range(len(classes))):
+        random.shuffle(indices_per_class[c])
+
+    for count, sp in enumerate(p_split):
+
+        indices = []
+
+        for c in range(len(classes)):
+            indices_c = indices_per_class[c]
+            t0 = t[c]
+            t1 = np.int(t0 + (indices_c.shape[0] * sp) + 0.5) if count != len(p_split) - 1 else indices_c.shape[0]
+            a = list(indices_c[t0:t1])
+            indices.extend(a)
+            t[c] = t1
+        random.shuffle(indices)
+        indices_split.append(indices)
+
+    return indices_split
diff --git a/IO_utils/statistics_utils.py b/IO_utils/statistics_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7e58a96b46a93bdfda515412cdaaa409144c81e
--- /dev/null
+++ b/IO_utils/statistics_utils.py
@@ -0,0 +1,159 @@
+import numpy as np
+import pandas as pd
+import rpy2.robjects as robjects
+from rpy2.robjects import numpy2ri
+import scipy
+
+
+def compute_basic_statistics(df):
+    output = pd.DataFrame(index=df.columns)
+
+    means = []
+    medians = []
+    stds = []
+    ci = []
+
+    for i, column in enumerate(list(df)):
+        rmeans = robjects.r['mean']
+        meanr = rmeans(robjects.FloatVector(df[column]))[0]
+
+        mean = np.nanmean(df[column].astype('float32'))
+        means.extend([mean])
+        median = np.nanmedian(df[column].astype('float32'))
+        medians.extend([median])
+        std = np.nanstd(df[column].astype('float32'), ddof=1)
+        stds.extend([std])
+        if mean != 0 and std != 0:
+            ci_low, ci_high = scipy.stats.norm.interval(0.95, loc=mean, scale=std)
+        else:
+            ci_low, ci_high = 0, 0
+        ci.extend([[ci_low, ci_high]])
+
+    output['mean'] = means
+    output['std'] = stds
+    output['0.95 confidence interval'] = ci
+    output['median'] = medians
+
+    return output
+
+
+def compute_bivariate_statistics(df, dmRS, mortality, categories_list):
+
+    output_bivartiate = pd.DataFrame(index=df.columns)
+
+    means1 = []
+    means2 = []
+    means3 = []
+    means4 = []
+
+    stds1 = []
+    stds2 = []
+    stds3 = []
+    stds4 = []
+
+    for i, column in enumerate(list(df)):
+
+        if categories_list[i] == 'cat':
+
+            count1 = len(df[(dmRS == 0) & (df[column]==0)])
+            means1.extend([count1])
+            stds1.extend([0])
+
+            count2 = len(df[(dmRS == 1) & (df[column] == 0)])
+            means2.extend([count2])
+            stds2.extend([0])
+
+            count3 = len(df[(mortality == 0) & (df[column] == 0)])
+            means3.extend([count3])
+            stds3.extend([0])
+
+            count4 = len(df[(mortality == 1) & (df[column]==0)])
+            means4.extend([count4])
+            stds4.extend([0])
+
+        else:
+
+            mean1 = np.nanmean(df[dmRS == 0][column].astype('float32'))
+            means1.extend([mean1])
+            std1 = np.nanstd(df[dmRS == 0][column].astype('float32'), ddof=1)
+            stds1.extend([std1])
+
+            mean2 = np.nanmean(df[dmRS == 1][column].astype('float32'))
+            means2.extend([mean2])
+            std2 = np.nanstd(df[dmRS == 1][column].astype('float32'), ddof=1)
+            stds2.extend([std2])
+
+            mean3 = np.nanmean(df[mortality == 0][column].astype('float32'))
+            means3.extend([mean3])
+            std3 = np.nanstd(df[mortality== 0][column].astype('float32'), ddof=1)
+            stds3.extend([std3])
+
+            mean4 = np.nanmean(df[mortality== 1][column].astype('float32'))
+            means4.extend([mean4])
+            std4 = np.nanstd(df[mortality == 1][column].astype('float32'), ddof=1)
+            stds4.extend([std4])
+
+    output_bivartiate['mean_mrs0'] = means1
+    output_bivartiate['mean_mrs1'] = means2
+    output_bivartiate['mean_mortality0'] = means3
+    output_bivartiate['mean_mortality1'] = means4
+
+    output_bivartiate['std_mrs0'] = stds1
+    output_bivartiate['std_mrs1'] = stds2
+    output_bivartiate['std_mortality0'] = stds3
+    output_bivartiate['std_mortality1'] = stds4
+
+    return output_bivartiate
+
+
+def get_pvalue(df, target, categories_list):
+    methods = []
+    p_values = []
+
+    for i, column in enumerate(list(df)):
+        # print(column)
+        if df[column].isnull().all():
+            methods.extend(['None'])
+            p_values.extend([1])
+            continue
+
+        if categories_list[i] == 'cat':
+            contingency_table = pd.crosstab(index=df[column], columns=target)
+            # This test should only be used if the observed and expected frequencies in each cell are at least 5
+
+            if np.min(contingency_table.values) < 5:
+                if contingency_table.shape[1] > 1 and contingency_table.shape[0] > 1:
+                    # oddsratio, p = scipy.stats.fisher_exact(contingency_table)
+                    # print(contingency_table)
+                    ## R Code
+                    FisherTestR = robjects.r['fisher.test']
+                    numpy2ri.activate()
+                    p = float(np.array(FisherTestR(contingency_table.to_numpy(), workspace=2e8)[0])[0])
+
+                    method = 'Fisher test'
+                else:
+                    p = 1
+                    method = 'None'
+
+            else:
+                chi2, p, dof, expected = scipy.stats.chi2_contingency(contingency_table)
+                method = 'Chi-squared'
+
+        elif categories_list[i] in ['int', 'float', 'ord']:
+
+            a = df[column][target == 0]
+            b = df[column][target == 1]
+            stat, p = scipy.stats.ttest_ind(a.dropna(), b.dropna())
+            method = 'T-test'
+
+        else:
+            if column == 'Id':
+                p = 0
+                method = 'None'
+            else:
+                raise ValueError(column, 'Statistical test for', categories_list[i], 'not implemented')
+
+        methods.extend([method])
+        p_values.extend([p])
+
+    return p_values, methods
diff --git a/Loss/Loss_uncertainty.py b/Loss/Loss_uncertainty.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c749b78d6206270f7117d97139b01ff8621612b
--- /dev/null
+++ b/Loss/Loss_uncertainty.py
@@ -0,0 +1,67 @@
+import torch
+
+
+def old_loss(y, f, s):
+    y = torch.squeeze(y)
+
+    """"
+    a1= torch.exp(-sigma)
+    a2= torch.log(f)
+    a3= torch.mul(y, a2)
+    a = torch.mul(a1, a3)
+    b = torch.mul(sigma, y)
+    loss = torch.div((-a+b),2)
+    loss = torch.sum(loss, dim=1)
+    """
+    ce1 = torch.log(f)
+    ce2 = torch.mul(ce1, y)
+    cross_entropy_error = torch.mean(ce2, dim=1)
+    sigma = torch.exp(-s)
+    term1 = -torch.mul(cross_entropy_error, sigma)
+    term2 = s / 2
+
+    loss = torch.div(term1 + term2, 2)
+
+    return loss
+
+
+class loss_uncertainty(object):
+
+    def __init__(self, weights):
+        self.total_loss = torch.zeros([1])
+        self.sum = torch.zeros([1])
+        self.elements = 0
+        self.weights = weights
+
+
+    def get_loss(self, f, y):
+        y = torch.squeeze(y)
+        assert y.shape[1] == self.weights.shape[0]
+
+        weights_vector = self.weights.expand([f.shape[0], -1])
+        weights_vector = torch.mul(weights_vector, y)
+        weights_vector = torch.sum(weights_vector, dim=1)
+
+
+        a = torch.mul(f, y)
+        ce2 = -torch.sum(a, dim=1)
+        class_weighted_ce2 = torch.mul(weights_vector, ce2)
+
+        self.update_total_loss(class_weighted_ce2, f.shape[0])
+
+        total_loss = torch.mean(class_weighted_ce2)
+
+        return total_loss
+
+    def update_total_loss(self, loss, elements):
+        self.sum = torch.sum(loss).detach().cpu().numpy()
+        self.elements += elements
+        self.total_loss = self.sum / self.elements
+
+    def get_total_loss(self):
+        return self.total_loss
+
+    def clear(self):
+        self.total_loss = torch.zeros([1])
+        self.sum = torch.zeros([1])
+        self.elements = 0
diff --git a/Loss/__pycache__/Loss_uncertainty.cpython-37.pyc b/Loss/__pycache__/Loss_uncertainty.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7655bee12f0561e92ed7baebd3a93b4325afb6dd
Binary files /dev/null and b/Loss/__pycache__/Loss_uncertainty.cpython-37.pyc differ
diff --git a/Metrics/ClassificationMetrics.py b/Metrics/ClassificationMetrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e40387b359c62c6172d96187c74e9f082e3a738
--- /dev/null
+++ b/Metrics/ClassificationMetrics.py
@@ -0,0 +1,126 @@
+import numpy as np
+from sklearn.metrics import roc_auc_score
+import torch
+
+
+class ClassificationMetrics(object):
+
+    def __init__(self, classes):
+        self.classes = classes
+        self.cm = np.zeros((self.classes, self.classes))
+        self.accuracy = 0
+        self.balanced_accuracy = 0
+        self.recall = 0
+        self.precision = 0
+        self.f1 = 0
+        self.auc = 0
+        self.nll = 0
+        self.pred = []
+        self.y = []
+
+    def compute_metrics(self, pred, y):
+        """
+
+        :param pred:
+        :param y:
+        :return:
+        """
+
+        self.pred.extend([pred])
+        self.y.extend([y])
+
+        self.cm = confusion_matrix(self.cm, pred, y)
+        self.accuracy = np.sum(np.diagonal(self.cm) / np.sum(np.sum(self.cm)))
+        self.nll = -np.mean(np.sum(np.multiply(self.y[0], np.log(self.pred[0])), axis=1))
+        epsilon = np.finfo(float).eps
+        self.precision = [self.cm[i, i] / (np.sum(self.cm[:, i] + epsilon)) for i in range(self.classes)]
+        self.recall = [self.cm[i, i] / (np.sum(self.cm[i, :] + epsilon)) for i in range(self.classes)]
+        self.f1 = [(2 * self.precision[i] * self.recall[i]) / (self.precision[i] + self.recall[i] + epsilon)
+                   for i in range(self.classes)]
+        self.balanced_accuracy = np.mean(self.recall)
+
+        try:
+            if len(self.y[0].squeeze().shape) > 1:
+                a = np.argmax(self.y[0], axis=1)
+            else:
+                a = np.array(self.y[0]).squeeze()
+
+            # self.auc = roc_auc_score(a, b, multi_class=mc)
+            self.auc = self.custom_auc(self.pred[0], a)
+
+        except:
+            self.auc = 0.5
+
+        metrics = {'cm': self.cm,
+                   'accuracy': self.accuracy,
+                   'precision': self.precision,
+                   'recall': self.recall,
+                   'f1': self.f1,
+                   'balanced_accuracy': self.balanced_accuracy,
+                   'auc': self.auc,
+                   'nll': self.nll}
+
+        return metrics
+
+    def clear(self):
+        self.cm = np.zeros((self.classes, self.classes))
+        self.accuracy = 0
+        self.balanced_accuracy = 0
+        self.recall = 0
+        self.precision = 0
+        self.f1 = 0
+        self.auc = 0
+        self.pred = []
+        self.y = []
+
+    def multiclass_to_binary(self, pred):
+
+        binary_pred = np.zeros((pred.shape[0], 2))
+
+        prob_classs0 = np.divide(np.sum(pred[:, 0:2], axis=1), 3)
+        prob_classs1 = np.divide(np.sum(pred[:, 2:6], axis=1), 4)
+
+        binary_pred[:, 0] = prob_classs0
+        binary_pred[:, 1] = prob_classs1
+
+        return binary_pred
+
+    def custom_auc(self, prob, labels):
+
+        unique_labels = np.unique(labels)
+        all_auc = 0
+
+        for l, label in enumerate(unique_labels):
+
+            prob_l = prob[:, l]
+            class_indices = [i for i in range(len(labels)) if labels[i] == label]
+            rest_indices = [i for i in range(len(labels)) if labels[i] != label]
+
+            suma = 0
+            auc = 0
+            for i in class_indices:
+                for j in rest_indices:
+                    auc += (int(prob_l[i] < prob_l[j])) + (0.5 * int(prob_l[i] == prob_l[j]))
+                    suma += 1
+
+            all_auc += 1 - (auc / suma)
+
+        return all_auc / unique_labels.shape[0]
+
+
+def confusion_matrix(cm, pred, y):
+    # assert (pred.shape == y.shape), "Input size does not match target size"
+    for i in range(pred.shape[0]):
+        if len(pred.squeeze().shape) > 1:
+            a = np.argmax(pred[i, :])
+        else:
+            a = int(np.round(pred[i]))
+
+        if len(y.squeeze().shape) > 1:
+            b = np.argmax(y[i, :])
+        else:
+            b = int(y[i])
+
+        cm[a][b] += 1
+
+    return cm
diff --git a/Metrics/RegressionMetrics.py b/Metrics/RegressionMetrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b11007ab0889279ff2d44773c71ba1a8830af1c
--- /dev/null
+++ b/Metrics/RegressionMetrics.py
@@ -0,0 +1,55 @@
+import numpy as np
+from Metrics._utils import int_to_binary
+from Metrics.ClassificationMetrics import ClassificationMetrics
+
+
+class RegressionMetrics(object):
+
+    def __init__(self):
+        self.dif = []
+        self.mse = 0
+        self.mae = 0
+        self.median_ae = 0
+
+        self.pred = []
+        self.y = []
+
+        self.cm = ClassificationMetrics(classes=2)
+
+    def compute_metrics(self, pred, y):
+        """
+
+        :param pred:
+        :param y:
+        :return:
+        """
+
+        self.pred.extend([pred[:, 0]])
+        self.y.extend([y])
+
+        self.dif = np.abs(self.pred[0].squeeze() - self.y[0])
+        self.mae = np.mean(self.dif)
+        self.median_ae = np.median(self.dif)
+        self.mse = np.mean(np.power(self.dif, 2))
+
+        metrics = {'mae': self.mae,
+                   'mse': self.mse,
+                   'median_ae': self.median_ae
+                   }
+
+        binary_y = int_to_binary(y, threshold=2)
+        binary_pred = int_to_binary(pred[:, 0], threshold=2)
+        clas_metrics = self.cm.compute_metrics(binary_pred, binary_y)
+        metrics.update(clas_metrics)
+        # Convert to binary prediction
+
+        return metrics
+
+    def clear(self):
+        self.mae = 0
+        self.mse = 0
+        self.median_ae = 0
+
+        self.pred = []
+        self.y = []
+        self.cm.clear()
diff --git a/Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc b/Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c52322bd8b9f23331bd9504604d94943e5fea4f4
Binary files /dev/null and b/Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc differ
diff --git a/Metrics/__pycache__/RegressionMetrics.cpython-37.pyc b/Metrics/__pycache__/RegressionMetrics.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2df34842674b3cb390c2ab9f89223d4a7582fef2
Binary files /dev/null and b/Metrics/__pycache__/RegressionMetrics.cpython-37.pyc differ
diff --git a/Metrics/__pycache__/_utils.cpython-37.pyc b/Metrics/__pycache__/_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1210ca5be0dcb0a67b7f8deba0ec3accdefa8cc2
Binary files /dev/null and b/Metrics/__pycache__/_utils.cpython-37.pyc differ
diff --git a/Metrics/_utils.py b/Metrics/_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8b3432c6ab665565d6ecf8566c712fedd18d020
--- /dev/null
+++ b/Metrics/_utils.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+
+def int_to_binary(vector, threshold):
+    a = [1 if np.floor(i*6) > threshold else 0 for i in vector]
+
+    return np.array(a)
diff --git a/README.md b/README.md
deleted file mode 100644
index 0074788a857f219f7b06573fcfacfb2dc9647cd7..0000000000000000000000000000000000000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# AIS_Regress
-
diff --git a/__pycache__/evaluate_model.cpython-37.pyc b/__pycache__/evaluate_model.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..322b92ce507f58b1850b31118dd901c41cf6da33
Binary files /dev/null and b/__pycache__/evaluate_model.cpython-37.pyc differ
diff --git a/__pycache__/train.cpython-37.pyc b/__pycache__/train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e05fa6688d18b45eec70987ff4883af0f7ac0409
Binary files /dev/null and b/__pycache__/train.cpython-37.pyc differ
diff --git a/__pycache__/train_graph.cpython-37.pyc b/__pycache__/train_graph.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..966d938bfed7c0d6f1b8803f3d022ec82c7a5032
Binary files /dev/null and b/__pycache__/train_graph.cpython-37.pyc differ
diff --git a/_utils/Result_container.py b/_utils/Result_container.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e95deeced1960ec25562a723503368028c48b35
--- /dev/null
+++ b/_utils/Result_container.py
@@ -0,0 +1,54 @@
+import numpy as np
+import os
+import pandas as pd
+import pprint
+
+def mean_dicts(list_dicts):
+    number_dicts = len(list_dicts)
+    keys_dicts = list_dicts['Fold 0'].keys()
+
+    mean_dict = {}
+    for k in keys_dicts:
+
+        values = [list_dicts['Fold {}'.format(l)][k] for l in range(number_dicts)]
+
+        mean_value = values[number_dicts - 1]
+        for l in range(number_dicts - 1):
+            mean_value = np.add(mean_value, values[l])
+        if k != 'cm':
+            mean_value /= number_dicts
+        mean_dict[k] = mean_value
+    return mean_dict
+
+class Result_container(object):
+
+    def __init__(self, target_metrics, output):
+
+        self.target_metrics = {}
+        self.results = {}
+        for o in output:
+            self.results[o]= {}
+            for m in target_metrics:
+                self.results[o][m] = {}
+
+    def update(self,output, method, metrics):
+
+
+        mean_m = mean_dicts(metrics)
+        #pprint.pprint(mean_m)
+        for k in self.results[output].keys():
+            self.results[output][k][method] = mean_m[k]
+
+    def save(self, output_dir, name=None):
+        dir_name = os.path.join(output_dir, (name+'.xlsx'))
+        writer = pd.ExcelWriter(path=dir_name)
+
+        for o in self.results.keys():
+
+            # Write each dataframe containing a metric for all methods to a different worksheet
+            #items = self.results.items()
+            sheet_metric = pd.DataFrame.from_dict(self.results[o])
+            sheet_metric.to_excel(writer, sheet_name=o)
+
+        # Close the Pandas Excel writer and output the Excel file.
+        writer.save()
diff --git a/_utils/__pycache__/Result_container.cpython-37.pyc b/_utils/__pycache__/Result_container.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb327b26fa5271aa851b3f12d0161b29996ea1dd
Binary files /dev/null and b/_utils/__pycache__/Result_container.cpython-37.pyc differ
diff --git a/_utils/__pycache__/plot_utils.cpython-37.pyc b/_utils/__pycache__/plot_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08e282b35f76714334f984cdfdfdb75929a6f35c
Binary files /dev/null and b/_utils/__pycache__/plot_utils.cpython-37.pyc differ
diff --git a/_utils/plot_utils.py b/_utils/plot_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..73fd8e9042a1381d0184240972261cba10509d72
--- /dev/null
+++ b/_utils/plot_utils.py
@@ -0,0 +1,203 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import seaborn as sns
+
+
+def plot_significant_values(df, value, th,  out_dir):
+    df = df.sort_values(by=[value])
+    labels = df.index
+    values = df[value].values
+    indices = np.where(values < th)
+
+    df= pd.DataFrame(data=values[indices], index=labels[indices])
+    file_path = os.path.join(out_dir, '{}_{}.xlsx'.format(value, th))
+    with pd.ExcelWriter(file_path) as writer:
+        df.to_excel(writer, sheet_name='Sheet1')
+
+
+
+    inv_values = 1 / values
+
+    fig, ax = plt.subplots(figsize=(20, 12))
+    my_cmap = plt.cm.get_cmap('YlGnBu')
+    colors = my_cmap(np.log(inv_values) / np.max(np.log(inv_values)))
+    rect1 = ax.bar(labels[indices], inv_values[indices], log=True, color=colors)
+    for i, rect in enumerate(rect1):
+        height = rect.get_height()
+        ax.annotate('{}'.format(format(values[i], '.2e')),
+                    xy=(rect.get_x() + (rect.get_width() / 2), height + 1),
+                    xytext=(0, 1),
+                    textcoords="offset points",
+                    ha='center', va='bottom')
+
+    # ax.plot([0, len(indices[0])], [th, th], "k--")
+    plt.xticks(rotation=15, ha='right')
+    # plt.subplots_adjust(bottom=0.5)
+    plt.ylim(0.1, 1.3*np.max(inv_values))
+    fig_path = os.path.join(out_dir, '{}_{}.png'.format(value,th))
+    plt.savefig(fig_path)
+    plt.close()
+
+
+def plot_mv(mv, th, out_dir):
+    th = int(th)
+
+    labels = mv.index
+    values = mv.values
+    indices = np.where(values > 0)
+    above_th = np.maximum(values - th, 0)
+    below_th = np.minimum(values, th)
+
+    fig, ax = plt.subplots(figsize=(20, 5))
+    rect1 = ax.bar(labels[indices], below_th[indices], log=True, color='g')
+    rect2 = ax.bar(labels[indices], above_th[indices], log=True, color='r',
+                   bottom=below_th[indices])
+
+    for rect_below, rect_above in zip(rect1, rect2):
+        height = (rect_below.get_height() + rect_above.get_height())
+        ax.annotate('{}'.format(height),
+                    xy=(rect_above.get_x() + (rect_above.get_width() / 2), height + 1),
+                    xytext=(0, 1),
+                    textcoords="offset points",
+                    ha='center', va='bottom')
+
+    ax.plot([0, len(indices[0])], [th, th], "k--")
+    plt.xticks(rotation=45, ha='right')
+    plt.subplots_adjust(bottom=0.5)
+    fig_path = os.path.join(out_dir, 'missing_values.png')
+    plt.savefig(fig_path)
+
+
+def plot_correlation_features(df, out_dir):
+    corr = df.corr()
+    mask = np.triu(np.ones_like(corr, dtype=bool))
+    f, ax = plt.subplots()
+    sns.heatmap(data=corr, mask=mask, center=0,
+                cmap=sns.diverging_palette(230, 20), square=True)
+
+    ax.margins(0.01)
+    plt.xticks(rotation=45, ha='right')
+    plt.tight_layout()
+    plt.savefig(out_dir)
+
+
+def plot_p_values(statistics):
+    a = 0
+
+
+def plot_distribution_categorical(df, variable, table, title='', out=False, out_dir='', p_values=[]):
+    #
+    present_values = sorted(list(np.unique(df[variable])))
+    expected_values = list(table[variable]['description'].keys())
+    variables_names = list(table[variable]['description'].values())
+
+    variables_names = [variables_names[count] for count, v in enumerate(expected_values) if v in present_values]
+
+    data = df[variable].value_counts().sort_index().values.tolist()
+    colors = plt.cm.get_cmap('Set3')(np.linspace(0., 1, num=len(variables_names)))
+
+    fig, axs = plt.subplots(figsize=(15, 6), nrows=1, ncols=2)
+
+    bars = axs[0].bar(variables_names, data, color=colors)
+    for bar in bars:
+        axs[0].annotate('{}'.format(bar.get_height()),
+                        xy=(bar.get_x() + (bar.get_width() / 2), bar.get_height() + 0.5),
+                        xytext=(0, 1),
+                        textcoords="offset points",
+                        ha='center', va='bottom'
+                        )
+    axs[0].tick_params(labelrotation=45)
+
+    def func(pct, allvals):
+        absolute = int(round(pct / 100. * np.sum(allvals)))
+        return "{:.1f}%\n({:d} patients)".format(pct, absolute)
+
+    wedges, texts, autotext = axs[1].pie(x=data,
+                                         autopct=lambda pct: func(pct, data),
+                                         textprops=dict(color='k'),
+                                         counterclock=False,
+                                         colors=colors)
+    y = np.asarray(data)
+    porcent = 100. * y / y.sum()
+    labels = ['{0} - {1:1.2f}%'.format(i, j) for i, j in zip(variables_names, porcent)]
+    axs[1].legend(wedges, labels,
+                  loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))
+
+    plt.setp(autotext, size=10)
+    fig.suptitle(title, size=20)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
+    if not out:
+        fig, ax = plt.subplots(2, 2, figsize=(12, 8))
+
+        labels = {'mRS90d': ['mRS 0', 'mRS 1', 'mRS 2', 'mRS 3', 'mRS 4', 'mRS 5', 'mRS 6'],
+                  'dmRS': ['Good outcome', 'Bad outcome (>2mRS)'],
+                  'shift_mRS': ['shift of 0', 'shift of 1', 'shift of 2', 'shift of 3',
+                                'shift of 4', 'shift of 5', 'shift of 6'],
+                  'mortality': ['Alive after 90d', 'Dead after 90 d']}
+
+        for count, output in enumerate(['mRS90d', 'shift_mRS', 'dmRS', 'mortality']):
+            x = int(np.floor(count / 2))
+            y = int(count % 2)
+            sns.countplot(ax=ax[x, y], x=output, hue=variable, data=df, palette="pastel")
+            ax[x, y].set_xticklabels(labels[output])
+            ax[x, y].get_legend().remove()
+            if count > 1:
+                p = p_values[count - 2]
+                color = 'k' if p > 0.05 else 'r'
+                ax[x, y].text(0.2, 0.95, 'P value: {:.6f}'.format(p_values[count - 2]),
+                              ha='center', va='center', transform=ax[x, y].transAxes, fontsize=9, color=color)
+
+        fig.legend(title=variable, labels=variables_names)
+        fig.suptitle(title, size=20)
+        plt.tight_layout()
+        fig_path = os.path.join(out_dir, '{0}_distribution_target.png'.format(variable))
+        plt.savefig(fig_path)
+    plt.close()
+
+
+def plot_distribution_numerical(df, variable, title='', out_dir='', p_values=[]):
+    fig, ax = plt.subplots(1, 2, figsize=(15, 6))
+    df[variable] = df[variable].astype('float32')
+    sns.histplot(ax=ax[0], data=df, x=variable, palette='pastel', kde=True)
+    # sns.kdeplot(ax=ax[0], data=df, x=variable, palette='pastel')
+
+    sns.boxplot(ax=ax[1], y=variable, data=df, palette="pastel")
+    sns.swarmplot(ax=ax[1], y=variable, color="k", size=3, data=df)
+    fig.suptitle(title)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
+
+    fig, ax = plt.subplots(2, 2, figsize=(12, 8))
+
+    labels = {'mRS90d': ['mRS 0', 'mRS 1', 'mRS 2', 'mRS 3', 'mRS 4', 'mRS 5', 'mRS 6'],
+              'dmRS': ['Good outcome', 'Bad outcome (>2mRS)'],
+              'shift_mRS': ['shift of 0', 'shift of 1', 'shift of 2', 'shift of 3',
+                            'shift of 4', 'shift of 5', 'shift of 6'],
+              'mortality': ['Alive after 90d', 'Dead after 90 d']}
+
+    for count, output in enumerate(['mRS90d', 'shift_mRS', 'dmRS', 'mortality']):
+        x = int(np.floor(count / 2))
+        y = int(count % 2)
+        sns.boxplot(ax=ax[x, y], x=output, y=variable, data=df, palette="pastel")
+        sns.swarmplot(ax=ax[x, y], x=output, y=variable, color="k", size=3, data=df)
+        # ax[x,y].legend(title=variable, labels= variables_names)
+        ax[x, y].set_xticklabels(labels[output])
+        # ax[x, y].get_legend().remove()
+        if count > 0:
+            p = p_values[count - 2]
+            color = 'k' if p > 0.05 else 'r'
+            ax[x, y].text(0.2, 0.95, 'P value: {:.6f}'.format(p_values[count - 2]),
+                          ha='center', va='center', transform=ax[x, y].transAxes, fontsize=9, color=color)
+
+    fig.suptitle(title, size=20)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution_target.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
diff --git a/architectures/3D_CNN.py b/architectures/3D_CNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..21a40e996f3b3b67441b0bbb9f45e23778e07698
--- /dev/null
+++ b/architectures/3D_CNN.py
@@ -0,0 +1,64 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+
+
+class _3D_CNN(nn.Module):
+    def __init__(self, num_output):
+        super(_3D_CNN, self).__init__()
+        self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)
+        self.conv4 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
+        self.conv5 = nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1)
+
+        self.BN1 = nn.BatchNorm3d(num_features=8)
+        self.BN2 = nn.BatchNorm3d(num_features=16)
+        self.BN3 = nn.BatchNorm3d(num_features=32)
+        self.BN4 = nn.BatchNorm3d(num_features=64)
+        self.BN5 = nn.BatchNorm3d(num_features=128)
+
+        self.pool1 = nn.AdaptiveAvgPool3d((64, 64, 64))
+        self.pool2 = nn.AdaptiveAvgPool3d((32, 32, 32))
+        self.pool3 = nn.AdaptiveAvgPool3d((16, 16, 16))
+        self.pool4 = nn.AdaptiveAvgPool3d((8,8,8))
+        self.pool5 = nn.AdaptiveAvgPool3d((4,4,4))
+
+        self.fc1 = nn.Linear(10244, 1300)
+        self.fc2 = nn.Linear(1300, 50)
+        self.fc3 = nn.Linear(50, num_output)
+
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight.data)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Conv3d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out',nonlinearity='relu')
+                nn.init.constant_(m.bias, 0)
+
+
+
+    def forward(self, x):
+
+        x = F.relu(self.BN1(self.conv1(x)))
+        x = self.pool1(x)
+
+        x = F.relu(self.BN2(self.conv2(x)))
+        x = self.pool2(x)
+
+        x = F.relu(self.BN3(self.conv3(x)))
+        x = self.pool3(x)
+
+        x = F.relu(self.BN4(self.conv4(x)))
+        x = self.pool4(x)
+
+        x = F.relu(self.BN5(self.conv5(x)))
+        x = self.pool5(x)
+
+        x = x.view(x.size(0), -1)
+
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = torch.log_softmax(self.fc3(x), dim=1)
+
+        return x
diff --git a/architectures/Edge_GCN.py b/architectures/Edge_GCN.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa90f026594c82b98f7f661100a66d010af4f73
--- /dev/null
+++ b/architectures/Edge_GCN.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import ChebConv, EdgeConv
+from torch_geometric.utils import dropout_adj
+
+class Edge_GCN(nn.Module):
+    def __init__(self, nfeat, nclass, dropout=0):
+        super(Edge_GCN, self).__init__()
+
+        self.dropout = dropout
+
+        self.gc1 = EdgeConv(nn.Sequential(nn.Linear(nfeat, int(nfeat/2)), nn.ReLU()))
+        self.gc2 = EdgeConv(nn.Sequential(nn.Linear(int(nfeat/2), nclass), nn.ReLU()))
+
+
+    def forward(self, data, edge_index, weigths):
+        print(data.shape, edge_index.shape)
+        x = self.gc1(data, edge_index)
+        edge_index_drop, _ = dropout_adj(edge_index, p= self.dropout)
+        x = self.gc2(x, edge_index_drop)
+        x2= torch.log_softmax(x, dim=1)
+
+        return x2
+
diff --git a/architectures/FCN.py b/architectures/FCN.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2de6cbb3ad50185d48c0bf0dabd1d01a62a32c1
--- /dev/null
+++ b/architectures/FCN.py
@@ -0,0 +1,52 @@
+import torch
+import torch.nn as nn
+
+
+class Basic_FCN(nn.Module):
+
+    def __init__(self, in_features, layers, out_features, dropout_rate):
+        super(Basic_FCN, self).__init__()
+
+        self.layers = []
+        self.n_layers = layers['number']
+        l = nn.Linear(in_features, layers['layer1'])
+        self.layers.extend([l])
+
+        if self.n_layers > 2:
+            for i in range(self.n_layers - 2):
+                name0 = 'layer{}'.format(i + 1)
+                name1 = 'layer{}'.format(i + 2)
+                l = nn.Linear(layers[name0], layers[name1])
+                self.layers.extend([l])
+
+        if self.n_layers >= 2:
+            name0 = 'layer{}'.format(self.n_layers - 1)
+            l = nn.Linear(layers[name0], out_features)
+            self.layers.extend([l])
+        self.layers = nn.ModuleList(self.layers)
+
+        self.dropout = nn.Dropout(p=dropout_rate)
+
+        # self.logsoftmax = nn.LogSoftmax(dim=1)
+
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight.data)
+                nn.init.constant_(m.bias.data, 0.03)
+
+    def forward(self, data):
+
+        if len(data.shape) == 1:
+            data = torch.reshape(data, (data.shape[0], 1))
+        x = torch.relu(self.layers[0](data))
+
+        if self.n_layers > 2:
+            for i in range(1, self.n_layers - 1):
+                x = torch.relu(self.layers[i](x))
+                x = self.dropout(x)
+        # x = torch.sigmoid(self.layers[self.n_layers - 1](x))
+        #
+        #x = self.layers[self.n_layers - 1](x)
+        x = self.layers[self.n_layers - 1](x)
+
+        return torch.log_softmax(x, dim=1)
diff --git a/architectures/GCN.py b/architectures/GCN.py
new file mode 100644
index 0000000000000000000000000000000000000000..239fd36784d042150387c3c5bcc49578ab3591a6
--- /dev/null
+++ b/architectures/GCN.py
@@ -0,0 +1,38 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import ChebConv, EdgeConv
+from torch_geometric.utils import dropout_adj
+
+
+class GCN(nn.Module):
+    def __init__(self, nfeat, nclass, dropout=0):
+        super(GCN, self).__init__()
+
+        self.dropout = dropout
+
+        self.gc1 = ChebConv(nfeat, 200, 3)
+        self.gc2 = ChebConv(200, 50, 3)
+        self.gc3 = ChebConv(50, nclass, 3)
+        #self.gc1 = EdgeConv(nn.Sequential(nn.Linear(nfeat, nfeat/2), nn.ReLU()))
+        #self.gc2 = EdgeConv(nn.Sequential(nn.Linear(nfeat/2, nclass), nn.ReLU()))
+
+    def forward(self, data, edge_index, weigths):
+
+        #x = self.gc1(data, edge_index, weigths)
+        x = self.gc1(data, edge_index)
+        x = F.relu(x)
+        x = F.dropout(x, p=self.dropout)
+        #edge_index_drop, _ = dropout_adj(edge_index, p= self.dropout)
+        #x = self.gc2(x, edge_index, weigths)
+        x = self.gc2(x, edge_index)
+        x = F.dropout(x, p=self.dropout)
+        #edge_index_drop, _ = dropout_adj(edge_index, p=self.dropout)
+        x = F.relu(x)
+        #x = self.gc3(x, edge_index, weigths)
+        x = self.gc3(x, edge_index)
+        #x1 = F.sigmoid(x)
+        x2= torch.log_softmax(x, dim=1)
+
+        return x2
+
diff --git a/architectures/ML_algorithms.py b/architectures/ML_algorithms.py
new file mode 100644
index 0000000000000000000000000000000000000000..075f073f6e6d694c6e9fd37998d5a91c15ad86ad
--- /dev/null
+++ b/architectures/ML_algorithms.py
@@ -0,0 +1,146 @@
+from Metrics.ClassificationMetrics import ClassificationMetrics
+
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+from sklearn.neural_network import MLPClassifier
+import xgboost as xgb
+from scipy.stats import uniform, randint
+
+def apply_LR(x_train, x_val, x_test, y_train, y_val, y_test):
+    # Number of classes
+    c = np.unique(y_train).shape[0]
+
+    # Parameters for hyperparamenter optimization
+    solvers = ['newton-cg', 'liblinear']
+    penalty = ['l2']
+    c_values = [1000, 100, 10]
+
+    # Define grid search
+    grid = dict(solver=solvers, penalty=penalty, C=c_values)
+    scoring = 'roc_auc' if c < 3 else 'f1_micro'
+    multiclass = 'auto' if c < 3 else 'ovr'
+    try:
+        grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=grid, n_jobs=-1, cv=5,
+                                   scoring=scoring, error_score=0)
+        grid_result = grid_search.fit(x_val, y_val)
+
+        # Summarize results
+        print("Best dictionaries: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+
+        # Use selected hyperparameters to train the model with the best dictionaries
+        clf = LogisticRegression(C=grid_result.best_params_['C'], solver=grid_result.best_params_['solver'],
+                                 random_state=0, max_iter=500, multi_class='ovr').fit(x_train, y_train)
+    except:
+        clf = LogisticRegression(random_state=0, max_iter=500, multi_class='ovr').fit(x_train, y_train)
+
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+
+def apply_random_forest(x_train, x_val, x_test, y_train, y_val, y_test, save=False):
+    # Parameters for hyperparamenter optimization
+    # Number of trees in random forest
+    n_estimators = [2, 5, 10, 100]
+    # Number of features to consider at every split
+
+    max_features = ['log2', 'sqrt'] + [a for a in (2,5,10, 20) if a<=x_train.shape[1]]
+
+    # Maximum number of levels in tree
+    max_depth = [5, 10, None]
+    bootstrap = [True, False]
+    criterion = ['gini', 'entropy']
+
+    # Define grid search
+    random_grid = {'n_estimators': n_estimators,
+                   'max_features': max_features,
+                   'max_depth': max_depth,
+                   'criterion': criterion,
+                   'bootstrap': bootstrap}
+
+    rf = RandomForestClassifier(random_state=True)
+    grid_search = GridSearchCV(estimator=rf, param_grid=random_grid, n_jobs=-1, cv=2,
+                               scoring='roc_auc', error_score=0)
+    grid_result = grid_search.fit(x_val, y_val)
+
+    # Summarize results
+    print("Best dictionaries: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+
+    # Use selected hyperparameters to train the model with the best dictionaries
+    clf = rf.fit(x_train, y_train)
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+
+def apply_mlp(x_train, x_val, x_test, y_train, y_val, y_test):
+    # Number of classes
+
+    mlp_gs = MLPClassifier(max_iter=300, verbose=False)
+    parameter_space = {
+        #'hidden_layer_sizes': [(10, 30, 10), (20,), (50, 20), (20, 50, 20)],
+        'hidden_layer_sizes': [ (20,), (64, 32, 16), (128, 32, 16, 8), (40, 20)],
+        'activation': ['relu'],
+        'solver': ['sgd', 'adam'],
+        'alpha': [0.0001, 0.05],
+        'learning_rate': ['constant', 'adaptive']
+    }
+
+    clf = GridSearchCV(mlp_gs, param_grid=parameter_space, n_jobs=-1, cv=2,
+                       scoring='roc_auc', error_score=0)
+    clf.fit(x_val, y_val)  # X is train samples and y is the corresponding labels
+    print('Best %f using %s '% (clf.best_score_, clf.best_params_))
+
+    # Use selected hyperparameters to train the model with the best dictionaries
+    clf = mlp_gs.fit(x_train, y_train)
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+def apply_xgbBoost(x_train, x_val, x_test, y_train, y_val, y_test):
+
+    params = {
+        'gamma': [0.5, 1, 1.5, 2,5],
+        'subsample': [0.6, 0.8, 1.0],
+        'colsample_bytree': [0.4, 0.6, 0.8, 1.0],
+        'max_depth': [2, 4, 6],
+
+
+    }
+    xgb_model = xgb.XGBClassifier( random_state=4, use_label_encoder=False, eval_metric='mlogloss')
+    grid_search = GridSearchCV(estimator=xgb_model, param_grid=params, n_jobs=-1, cv=2,
+                               scoring='roc_auc', error_score=0)
+    clf = grid_search.fit(x_val, y_val)
+    print('Best %f using %s ' % (clf.best_score_, clf.best_params_))
+    best_model = xgb_model.fit(x_train, y_train)
+
+    metrics_tensor = []
+    metrics_tensor = compute_metrics(best_model, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)] )
+    splits = [(x_train, y_train), (x_val, y_val), (x_test, y_test)]
+    """for x, y in splits:
+        prob = xgb_model.predict_proba(x)
+        output_one_hot = np.zeros((y.shape[0], len(np.unique(y))))
+        for i in range(output_one_hot.shape[0]):
+            output_one_hot[i, y[i]] = 1
+        CM = ClassificationMetrics(classes=2)
+        metrics = CM.compute_metrics(prob.squeeze(), output_one_hot)
+        metrics_tensor.extend([metrics])"""
+
+    return best_model, metrics_tensor
+
+
+
+def compute_metrics(clf, splits):
+    metrics_tensor = []
+    for x, y in splits:
+        prob = clf.predict_proba(x)
+        output_one_hot = np.zeros((y.shape[0], len(np.unique(y))))
+        for i in range(output_one_hot.shape[0]):
+            output_one_hot[i, y[i]] = 1
+        CM = ClassificationMetrics(classes=2)
+        metrics = CM.compute_metrics(prob.squeeze(), output_one_hot)
+        metrics_tensor.extend([metrics])
+
+    return metrics_tensor
diff --git a/architectures/__pycache__/Edge_GCN.cpython-37.pyc b/architectures/__pycache__/Edge_GCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e84e8b4313f41fe71b4b9ef5bca63c59c83a4320
Binary files /dev/null and b/architectures/__pycache__/Edge_GCN.cpython-37.pyc differ
diff --git a/architectures/__pycache__/FCN.cpython-37.pyc b/architectures/__pycache__/FCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48d5168787abc3f17c28ddf1a6e4d22b47176aad
Binary files /dev/null and b/architectures/__pycache__/FCN.cpython-37.pyc differ
diff --git a/architectures/__pycache__/GCN.cpython-37.pyc b/architectures/__pycache__/GCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fcd4d2fcac2fd81b246c8c8cdd10b29546684431
Binary files /dev/null and b/architectures/__pycache__/GCN.cpython-37.pyc differ
diff --git a/architectures/__pycache__/ML_algorithms.cpython-37.pyc b/architectures/__pycache__/ML_algorithms.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbb131a9cdf60971f8ccc94d907cf2f5a60ecd32
Binary files /dev/null and b/architectures/__pycache__/ML_algorithms.cpython-37.pyc differ
diff --git a/dictionaries/dictionary_modalities.yml b/dictionaries/dictionary_modalities.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b22e741092c66e6db082bf5dbf036bf3ef39f168
--- /dev/null
+++ b/dictionaries/dictionary_modalities.yml
@@ -0,0 +1,764 @@
+#### Data divided into
+# 0. Output
+# 1.Meta data
+# 2.Clinical data
+# 3.NCCT data
+# 4.CTP data
+# 5.CTA data
+# 6.Treatment
+# 7.Treatment output
+# 7.Control CT data
+# 8.Temporal
+
+# Before-Imaging: Metadata, NIHSS, Unkown_Onset, Time_Onset_to_Admission
+# Pre-treatment: Metadata, NCCT, CTP, CTA (+NIHSS, Time_Onset_to_Admission, Time_CT_to_Angio)
+# Post-treatment: Metadata, NCCT, CTP, CTA, Treatment, Treatment out (+NIHSS, Time_Onset_to_Admission, Time_CT_to_Angio, Time_Puncture_to_Recan )
+# Post-treatment 24 h: Metadata, NCCT, CTP, CTA, Treatment, Treatment out, Control CT (+NIHSS)
+
+Ouput:
+
+  mRS90d:
+    type: 'ord'
+    info: 'Functional outcome at 90 days'
+    categories: 7
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+      6: 'mRS 6'
+
+  dmRS:
+    type: 'cat'
+    info: 'Binary functional outcome at 90 days'
+    categories: 2
+    description:
+        0: 'Good outcome'
+        1: 'Bad outcome'
+
+  mortality:
+    type: 'cat'
+    info: 'Mortality at 90 days'
+    categories: 2
+    description:
+        0: 'No'
+        1: 'Yes'
+
+  shift_mRS:
+    type: 'ord'
+    info: 'Shift in mRS'
+    categories: 7
+    description:
+
+        0: 'Shift of 0'
+        1: 'Shift of 1'
+        2: 'Shift of 2'
+        3: 'Shift of 3'
+        4: 'Shift of 4'
+        5: 'Shift of 5'
+        6: 'Shift of 6'
+
+Metadata:
+
+  Sex:
+    type: 'cat'
+    info: 'Patient sex'
+    categories: 2
+    description:
+      0: 'man'
+      1: 'woman'
+
+  Age:
+    type: 'int'
+    info: 'Patient age'
+
+Clinical:
+
+  NIHSS:
+    type: 'int'
+    info: 'National Institute of Health Stroke Scale'
+    categories: 43
+    description: None
+
+  pre-mRS:
+    type: 'ord'
+    info: 'mRS previous to stroke'
+    categories: 6
+    description:
+        0: 'mRS 0'
+        1: 'mRS 1'
+        2: 'mRS 2'
+        3: 'mRS 3'
+        4: 'mRS 4'
+        5: 'mRS 5'
+
+  aHT:
+    type: 'cat'
+    info: 'arterial hypertension'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  HLP:
+    type: 'cat'
+    info: 'hyperlipidemia'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+  DM:
+    type: 'cat'
+    info: 'Diabetes Mellitus'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  aFib:
+    type: 'cat'
+    info: 'atrial fibrillation'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  s.p. stroke:
+    type: 'cat'
+    info: 'previous stroke'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH:
+    type: 'cat'
+    info: 'anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH mono:
+    type: 'cat'
+    info: ' mono anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH duo:
+    type: 'cat'
+    info: 'dual anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  OAK:
+    type: 'cat'
+    info: 'oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  VKA:
+    type: 'cat'
+    info: 'vitamin K antagonist'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+  DOAC:
+    type: 'cat'
+    info: 'direct oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+NCCT:
+  C_br:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  INS_br:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  L_br:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+  
+  e-ASPECTS:
+    type: 'ord'
+    info: 'electronic ASPECTS'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+
+  ASPECTS oberfl_tief:
+    type: 'cat'
+    info: 'depth of regions affected'
+    categories: 4
+    description:
+      0: 'superficial'
+      1: 'deep'
+      2: 'both'
+      3: 'ASPECTS 10'
+
+  pc-ASPECTS:
+    type: 'int'
+    info: 'posterior ASPECTS'
+
+
+  Volume e-ASPECTS:
+    type: 'float'
+    info: 'automatic volume on e-ASPECTS'
+
+  ICV krank:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, ipsilateral (HU)'
+
+  IVC gesund:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, contralateral (HU)'
+
+  ICV Index:
+    type: 'float'
+    info: 'ICV krank/ICV gesund'
+
+  Vessel Occlusion Location Admission:
+    type: 'cat'
+    info: 'Location of vessel occlusion at admission'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+  Vessel Occlusion Side Admission:
+    type: 'cat'
+    info: 'side of occlussion at admission'
+    categories: 3
+    description:
+      1: 'left'
+      2: 'right'
+      3: 'both'
+
+  Tan Score:
+    type: 'ord'
+    info: 'collaterals score'
+    categories: 4
+    description:
+        0: '0%'
+        1: '0-50%'
+        2: '50-100%'
+        3: '100%'
+
+  Coves Score:
+    type: 'ord'
+    info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+                  assigned to three veins in the affected hemisphere'
+    categories: 7
+    description:
+        0: 'Coves Score 0'
+        1: 'Coves Score 1'
+        2: 'Coves Score 2'
+        3: 'Coves Score 3'
+        4: 'Coves Score 4'
+        5: 'Coves Score 5'
+        6: 'Coves Score 6'
+
+
+  BATMAN:
+    type: 'int'
+    info: 'Basilar artery on CTA score'
+
+
+  Clot Burden Score:
+    type: 'ord'
+    info: 'Evaluates the extent of ipsilateral thrombus'
+    categories: 11
+    description:
+        0: 'Clot Burden Score 0'
+        1: 'Clot Burden Score 1'
+        2: 'Clot Burden Score 2'
+        3: 'Clot Burden Score 3'
+        4: 'Clot Burden Score 4'
+        5: 'Clot Burden Score 5'
+        6: 'Clot Burden Score 6'
+        7: 'Clot Burden Score 7'
+        8: 'Clot Burden Score 8'
+        9: 'Clot Burden Score 9'
+        10: 'Clot Burden Score 10'
+
+  Vessel Stenosis:
+    type: 'int'
+    info: 'Presence of arterial stenosis'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Vessel Stenosis Location:
+    type: 'cat'
+    info: 'Location of arterial stenosis'
+    categories: 8
+    description:
+      0: 'No stenosis'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'MCA'
+      4: 'PCA'
+      5: 'VA'
+      6: 'BA'
+      7: 'ACA'
+
+
+  Arterial Dissection:
+    type: 'cat'
+    info: 'Presence of arterial dissection'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Arterial Dissection Location:
+    type: 'cat'
+    info: 'Location of arterial dissection'
+    categories: 4
+    description:
+
+      0: 'No dissection'
+      1: 'ACI'
+      2: 'VA'
+      3: 'other'
+
+CTP:
+  'CBF_lower30_volume':
+    type: 'int'
+    info: 'Volume in which the CBF is lower that 30% of the CBF of the contraleteral side -- core --'
+  'Tmax_greater6s_volume':
+    type: 'int'
+    info: 'Volume in which the Tmax is greater than 6s -- penumbra + core --'
+  Mismatch Volume:
+    type: 'int'
+    info: 'penumbra volume'
+  Inverse Mismatch Ratio:
+    type: 'float'
+    info: 'volume core/total volume'
+  Hypoperfusion Index:
+    type: 'float'
+    info: 'Volume Tmax>10s/volume Tmax>6s'
+  CBV Index:
+    type: 'float'
+    info: 'Unknown'
+
+CTA:
+
+  Vessel Occlusion CTA:
+
+    type: 'cat'
+    info: 'Location of vessel occlusion on CTA'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+Treatment:
+
+ Stenting:
+  type: 'cat'
+  info: 'Use of stenting'
+  categories: 2
+  description:
+      0: 'no'
+      1: 'yes'
+
+ Thrombectomy:
+   type: 'cat'
+   info: 'Use of thrombectomy'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Device:
+  type: 'cat'
+  info: 'Device employed for thrombectomy'
+  categories: 3
+  description:
+
+      1: 'Stent retriever'
+      2: 'Aspiration'
+      3: 'Both'
+
+ PTA:
+   type: 'cat'
+   info: 'Use of percutaneous transluminal angioplasty'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Number Maneuver:
+   type: 'int'
+   info: 'number of maneuver that were necessary'
+
+ Lysis i.a.:
+   type: 'cat'
+   info: 'Use of intraarterial lysis'
+   categories: 2
+   description:
+     0: 'no'
+     1: 'yes'
+
+ Lysis quantity:
+   type: 'int'
+   info: 'quantity of lysis used'
+
+Treatment_out:
+
+ Frustrated Recanalization:
+
+   type: 'cat'
+   info: 'Whether the recanalization was frustrated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion after Recan.:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ TICI:
+   type: 'cat'
+   info: 'Reperfusion score'
+   categories: 5
+   description:
+       0: '0, No reperfusion'
+       1: '1, Minimal reperfusion'
+       2: '2a, Partial reperfusion (<50%)'
+       3: '2b Partial reperfusion(>50%)'
+       4: '3, Total reperfusion'
+
+
+ SAE:
+   type: 'cat'
+   info: 'Subarachnoid hemorrhage encephalitis'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion new SupplyArea:
+   type: 'cat'
+   info: 'Vessel occlusion in a new Supply Area'
+   categories: 2
+   description:
+       0: 'no'
+       1: 'yes'
+
+ Vessel Occlusion new SupplyArea Location:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization in a new supply area'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ Vessel Occlusion new SupplyArea Treatment:
+   type: 'cat'
+   info: 'Whether new vessel occlusion was treated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Infarct new SupplyArea:
+   type: 'cat'
+   info: 'Whether there is an infarct in the new supply area'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+Control CT:
+  Lacunar Infarct:
+    type: 'cat'
+    info: 'Presence of lacunar infarct'
+    categories: 3
+    description:
+        0: 'no infarct'
+        1: 'Lacunar infarct'
+        2: 'not a lacunar infarct'
+
+  Infarct Volume ControlCT:
+      type: 'int'
+      info: 'Volume in ml/cm³ of the infarct in the Control CT'
+
+  Hyperdense Media Sign:
+    type: 'cat'
+    info: 'Presence of hyperdense Media Sign'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+  pc-Aspect ControlCT:
+    type: 'int'
+    info: 'posterior ASPECTS in Control CT'
+
+
+  Aspect ControlCT:
+    type: 'ord'
+    info: ' ASPECTS in Control CT'
+    categories: 11
+    description:
+      0: 'ASPECTS score of 0'
+      1: 'ASPECTS score of 1'
+      2: 'ASPECTS score of 2'
+      3: 'ASPECTS score of 3'
+      4: 'ASPECTS score of 4'
+      5: 'ASPECTS score of 5'
+      6: 'ASPECTS score of 6'
+      7: 'ASPECTS score of 7'
+      8: 'ASPECTS score of 8'
+      9: 'ASPECTS score of 9'
+      10: 'ASPECTS score of 10'
+
+  C_br2:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br2:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  INS_br2:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  L_br2:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br2:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br2:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br2:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br2:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br2:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br2:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+Times:
+  Unknown Onset:
+    type: 'cat'
+    info: 'Whether onset time of symptoms is known'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Onset_to_Admission:
+    type: 'int'
+    info: 'Time from symptom onset to admission in minutes'
+
+  Time_CT_to_Angio.:
+    type: 'int'
+    info: 'Time from CT imaging to CTA imaging in minutes'
+
+  Time_Puncture_to_Recan.:
+    type: 'int'
+    info: 'Time from CTA to Recanalization in minutes'
+
+  Time_Recan_to_Control:
+    type: 'int'
+    info: 'Time from Recanalization to Control CT imaging in minutes'
+
+
+
+
+
+
+
+
+
+
diff --git a/dictionaries/dictionary_timepoints.yml b/dictionaries/dictionary_timepoints.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f8b8e9f74053cf23b59c45bd481597cf3903cdb2
--- /dev/null
+++ b/dictionaries/dictionary_timepoints.yml
@@ -0,0 +1,808 @@
+#### Data divided into
+# 1.Admission: Metadata, Clinical, Unkonwn onset, Time_Onset_Admission
+# 2.PostImaging: NECT, CTP, CTA
+# 3.PostEVT: Treatment and Treatment output
+# 4.After24h: CCT
+# 5.Output
+
+Admission:
+
+  Sex:
+    type: 'cat'
+    info: 'Patient sex'
+    categories: 2
+    description:
+      0: 'man'
+      1: 'woman'
+
+  Age:
+    type: 'int'
+    info: 'Patient age'
+
+  NIHSS:
+    type: 'int'
+    info: 'National Institute of Health Stroke Scale'
+    categories: 43
+    description: None
+
+  s.p. stroke:
+    type: 'cat'
+    info: 'previous stroke'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  Unknown Onset:
+    type: 'cat'
+    info: 'Whether onset time of symptoms is known'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Onset_to_Admission:
+    type: 'int'
+    info: 'Time from symptom onset to admission in minutes'
+
+  #pre-mRS:
+    #type: 'int'
+    #info: 'mRS previous to stroke'
+
+  pre-mRS:
+    type: 'ord'
+    info: 'mRS previous to stroke'
+    categories: 6
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+
+
+  aHT:
+    type: 'cat'
+    info: 'arterial hypertension'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  HLP:
+    type: 'cat'
+    info: 'hyperlipidemia'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  DM:
+    type: 'cat'
+    info: 'Diabetes Mellitus'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  aFib:
+    type: 'cat'
+    info: 'atrial fibrillation'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH:
+    type: 'cat'
+    info: 'anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH mono:
+    type: 'cat'
+    info: ' mono anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH duo:
+    type: 'cat'
+    info: 'dual anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  OAK:
+    type: 'cat'
+    info: 'oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  VKA:
+    type: 'cat'
+    info: 'vitamin K antagonist'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  DOAC:
+    type: 'cat'
+    info: 'direct oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+Pre-EVT:
+  C_br:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  INS_br:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  L_br:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  #e-ASPECTS:
+  #  type: 'int'
+  #  info: 'electronic ASPECTS'
+  e-ASPECTS:
+    type: 'ord'
+    info: 'electronic ASPECTS'
+    categories: 11
+    description:
+      0: 'ASPECTS score of 0'
+      1: 'ASPECTS score of 1'
+      2: 'ASPECTS score of 2'
+      3: 'ASPECTS score of 3'
+      4: 'ASPECTS score of 4'
+      5: 'ASPECTS score of 5'
+      6: 'ASPECTS score of 6'
+      7: 'ASPECTS score of 7'
+      8: 'ASPECTS score of 8'
+      9: 'ASPECTS score of 9'
+      10: 'ASPECTS score of 10'
+
+  ASPECTS oberfl_tief:
+    type: 'cat'
+    info: 'depth of regions affected'
+    categories: 4
+    description:
+      0: 'superficial'
+      1: 'deep'
+      2: 'both'
+      3: 'ASPECTS 10'
+
+  pc-ASPECTS:
+    type: 'cat'
+    info: 'posterior ASPECTS'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  Volume e-ASPECTS:
+    type: 'float'
+    info: 'automatic volume on e-ASPECTS'
+
+  ICV krank:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, ipsilateral (HU)'
+
+  IVC gesund:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, contralateral (HU)'
+
+  ICV Index:
+    type: 'float'
+    info: 'ICV krank/ICV gesund'
+
+  Vessel Occlusion Location Admission:
+    type: 'cat'
+    info: 'Location of vessel occlusion at admission'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+  Vessel Occlusion Side Admission:
+    type: 'cat'
+    info: 'side of occlussion at admission'
+    categories: 3
+    description:
+      1: 'left'
+      2: 'right'
+      3: 'both'
+
+  #Tan Score:
+  #  type: 'int'
+  #  info: 'collaterals score'
+
+  Tan Score:
+    type: 'ord'
+    info: 'collaterals score'
+    categories: 4
+    description:
+      0: '0%'
+      1: '0-50%'
+      2: '50-100%'
+      3: '100%'
+
+  #Coves Score:
+  #  type: 'int'
+  #  info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+  #                      assigned to three veins in the affected hemisphere'
+
+  Coves Score:
+   type: 'ord'
+   info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+              assigned to three veins in the affected hemisphere'
+   categories: 7
+   description:
+      0: 'Coves Score 0'
+      1: 'Coves Score 1'
+      2: 'Coves Score 2'
+      3: 'Coves Score 3'
+      4: 'Coves Score 4'
+      5: 'Coves Score 5'
+      6: 'Coves Score 6'
+
+  BATMAN:
+    type: 'cat'
+    info: 'Basilar artery on CTA score'
+    categories: 11
+    description:
+      0: 'BATMAN Score 0'
+      1: 'BATMAN Score 1'
+      2: 'BATMAN Score 2'
+      3: 'BATMAN Score 3'
+      4: 'BATMAN Score 4'
+      5: 'BATMAN Score 5'
+      6: 'BATMAN Score 6'
+      7: 'BATMAN Score 7'
+      8: 'BATMAN Score 8'
+      9: 'BATMAN Score 9'
+      10: 'BATMAN Score 10'
+
+  #Clot Burden Score:
+  #  type: 'int'
+  #  info: 'Evaluates the extent of ipsilateral thrombus'
+
+  Clot Burden Score:
+    type: 'ord'
+    info: 'Evaluates the extent of ipsilateral thrombus'
+    categories: 11
+    description:
+      0: 'Clot Burden Score 0'
+      1: 'Clot Burden Score 1'
+      2: 'Clot Burden Score 2'
+      3: 'Clot Burden Score 3'
+      4: 'Clot Burden Score 4'
+      5: 'Clot Burden Score 5'
+      6: 'Clot Burden Score 6'
+      7: 'Clot Burden Score 7'
+      8: 'Clot Burden Score 8'
+      9: 'Clot Burden Score 9'
+      10: 'Clot Burden Score 10'
+
+  Vessel Stenosis:
+    type: 'cat'
+    info: 'Presence of arterial stenosis'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Vessel Stenosis Location:
+    type: 'cat'
+    info: 'Location of arterial stenosis'
+    categories: 8
+    description:
+      0: 'No stenosis'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'MCA'
+      4: 'PCA'
+      5: 'VA'
+      6: 'BA'
+      7: 'ACA'
+
+  Arterial Dissection:
+    type: 'cat'
+    info: 'Presence of arterial dissection'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Arterial Dissection Location:
+    type: 'cat'
+    info: 'Location of arterial dissection'
+    categories: 4
+    description:
+
+      0: 'No dissection'
+      1: 'ACI'
+      2: 'VA'
+      3: 'other'
+
+  'CBF_lower30_volume':
+    type: 'int'
+    info: 'Volume in which the CBF is lower that 30% of the CBF of the contraleteral side -- core --'
+
+  'Tmax_greater6s_volume':
+    type: 'int'
+    info: 'Volume in which the Tmax is greater than 6s -- penumbra + core --'
+
+  Mismatch Volume:
+    type: 'int'
+    info: 'penumbra volume'
+
+  Inverse Mismatch Ratio:
+    type: 'float'
+    info: 'volume core/total volume'
+
+  Hypoperfusion Index:
+    type: 'float'
+    info: 'Volume Tmax>10s/volume Tmax>6s'
+
+  CBV Index:
+    type: 'float'
+    info: 'Unknown'
+
+  Vessel Occlusion CTA:
+
+    type: 'cat'
+    info: 'Location of vessel occlusion on CTA'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+  Time_CT_to_Angio.:
+     type: 'int'
+     info: 'Time from CT imaging to CTA imaging in minutes'
+
+Post-EVT:
+
+ Stenting:
+   type: 'cat'
+   info: 'Use of stenting'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Thrombectomy:
+   type: 'cat'
+   info: 'Use of thrombectomy'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Device:
+  type: 'cat'
+  info: 'Device employed for thrombectomy'
+  categories: 4
+  description:
+      0: 'No device - no thrombectomy'
+      1: 'Stent retriever'
+      2: 'Aspiration'
+      3: 'Both'
+
+ PTA:
+   type: 'cat'
+   info: 'Use of percutaneous transluminal angioplasty'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Number Maneuver:
+   type: 'int'
+   info: 'number of maneuver that were necessary'
+
+ Lysis i.a.:
+   type: 'cat'
+   info: 'Use of intraarterial lysis'
+   categories: 2
+   description:
+     0: 'no'
+     1: 'yes'
+
+ Lysis quantity:
+   type: 'int'
+   info: 'quantity of lysis used'
+
+ Frustrated Recanalization:
+
+   type: 'cat'
+   info: 'Whether the recanalization was frustrated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion after Recan.:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+#
+ #TICI:
+ #  type: 'int'
+ #  info: 'Reperfusion score'
+ TICI:
+   type: 'cat'
+   info: 'Reperfusion score'
+   categories: 5
+   description:
+      0: '0, No reperfusion'
+      1: '1, Minimal reperfusion'
+      2: '2a, Partial reperfusion (<50%)'
+      3: '2b Partial reperfusion(>50%)'
+      4: '3, Total reperfusion'
+
+ SAE:
+   type: 'cat'
+   info: 'Subarachnoid hemorrhage encephalitis'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion new SupplyArea:
+   type: 'cat'
+   info: 'Vessel occlusion in a new Supply Area'
+   categories: 2
+   description:
+       0: 'no'
+       1: 'yes'
+
+ Vessel Occlusion new SupplyArea Location:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization in a new supply area'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ Vessel Occlusion new SupplyArea Treatment:
+   type: 'cat'
+   info: 'Whether new vessel occlusion was treated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Infarct new SupplyArea:
+   type: 'cat'
+   info: 'Whether there is an infarct in the new supply area'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Time_Puncture_to_Recan.:
+    type: 'int'
+    info: 'Time from CTA to Recanalization in minutes'
+
+After24h:
+  Lacunar Infarct:
+    type: 'cat'
+    info: 'Presence of lacunar infarct'
+    categories: 3
+    description:
+        0: 'no infarct'
+        1: 'Lacunar infarct'
+        2: 'not a lacunar infarct'
+
+  Infarct Volume ControlCT:
+      type: 'int'
+      info: 'Volume in ml/cm³ of the infarct in the Control CT'
+
+  Hyperdense Media Sign:
+    type: 'cat'
+    info: 'Presence of hyperdense Media Sign'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  pc-Aspect ControlCT:
+    type: 'cat'
+    info: 'posterior ASPECTS in Control CT'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  #Aspect ControlCT:
+  #  type: 'int'
+  #  info: ' ASPECTS in Control CT'
+
+  Aspect ControlCT:
+    type: 'ord'
+    info: ' ASPECTS in Control CT'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  C_br2:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br2:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  INS_br2:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  L_br2:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br2:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br2:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br2:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br2:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br2:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br2:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Recan_to_Control:
+    type: 'int'
+    info: 'Time from Recanalization to Control CT imaging in minutes'
+
+Output:
+
+  mRS90d:
+    type: 'ord'
+    info: 'Functional outcome at 90 days'
+    categories: 7
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+      6: 'mRS 6'
+
+  dmRS:
+    type: 'cat'
+    info: 'Binary functional outcome at 90 days'
+    categories: 2
+    description:
+      0: 'Good outcome'
+      1: 'Bad outcome'
+
+  mortality:
+    type: 'cat'
+    info: 'Mortality at 90 days'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  shift_mRS:
+    type: 'ord'
+    info: 'Shift in mRS'
+    categories: 7
+    description:
+
+      0: 'Shift of 0'
+      1: 'Shift of 1'
+      2: 'Shift of 2'
+      3: 'Shift of 3'
+      4: 'Shift of 4'
+      5: 'Shift of 5'
+      6: 'Shift of 6'
+
+
+
diff --git a/evaluate_model.py b/evaluate_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ae1de8975a354db8770dd01b7973a476e3079d6
--- /dev/null
+++ b/evaluate_model.py
@@ -0,0 +1,433 @@
+from architectures.FCN import Basic_FCN
+from architectures.GCN import GCN
+from Metrics.RegressionMetrics import ClassificationMetrics
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.clean_table import clean_table
+from IO_utils.split_utils import split_data_cv
+from IO_utils.Dataloader import MyDataLoader
+from IO_utils.List_Reader import TableReader
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import torch
+import pprint
+
+
+def get_metrics_unc(uncertainty_array, mean_preds_array, label_array, p):
+
+    #order = np.argsort(uncertainty_array)
+
+    metrics = ClassificationMetrics(classes=2)
+
+    indices= uncertainty_array<p
+    m = metrics.compute_metrics(mean_preds_array[indices], label_array[indices])
+    print('####')
+    print('Metrics using >{}'.format(p))
+    print(np.count_nonzero(indices))
+    pprint.pprint(m)
+    print('####')
+
+    return m
+
+def plot_boxplots(p, y, uncertainty ):
+
+    order = np.argsort(uncertainty)
+    samples = []
+    auc = []
+    ths = []
+    n_samples = uncertainty.shape[0]
+
+    # Minimum 10% of samples
+    metrics = ClassificationMetrics(classes=2)
+
+    for i in range(order.shape[0] - int(0.2 * n_samples)):
+        number_samples = order.shape[0] - i
+        metrics.clear()
+        kn = order[:(number_samples - 1)]
+        m = metrics.compute_metrics(p[kn], y[kn])
+
+        samples.extend([number_samples])
+        auc.extend([m['auc']])
+        ths.extend([uncertainty[kn[-1]]])
+
+    true_positive= []
+    true_negative = []
+    false_positive = []
+    false_negative = []
+
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_positive.extend([uncertainty[i]])
+            else:
+                true_negative.extend([uncertainty[i]])
+        else:
+            if label == 0:
+                false_negative.extend([uncertainty[i]])
+            else:
+                false_positive.extend([uncertainty[i]])
+
+    data= [true_positive, true_negative, false_positive, false_negative]
+    data2 = [true_positive+true_negative, false_positive+false_negative]
+    palette = ['lightgreen', 'darkgreen', 'salmon', 'darkred']
+    labels = ['True Positive', 'True Negative', 'False Positive', 'False Negative']
+    xs = []
+    print(np.mean(data2[0]), np.mean(data2[1]))
+    print(np.std(data2[0]), np.std(data2[1]))
+    for i, col in enumerate(data):
+        xs.append(np.random.normal(np.round((i+1.1)/2) , 0.04, len(data[i])))
+
+    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(13,6))
+    bplot1 = ax1.boxplot(data2, labels = ['Correct predictions', 'Missclassifications'], showfliers=False)
+    for x, val, c, l in zip(xs, data, palette,labels):
+       sct1 = ax1.scatter(x, val, alpha=0.4, color=c, label=l)
+
+    ax1.legend()
+
+    ax2.set_xlabel('Uncertainty threshold')
+    ax2.set_ylabel('AUC')
+    lns1 = ax2.plot(ths, auc, 'k-', markersize=3, label="AUC")
+    ax2.invert_xaxis()
+
+    ax3 = ax2.twinx()
+    ax3.set_ylabel('# of Samples')
+    lns2 = ax3.plot(ths, samples, 'k--', markersize=3, label="# Samples")
+    ax3.invert_xaxis()
+
+    # added these three lines
+    lns = lns1 + lns2
+    labs = [l.get_label() for l in lns]
+    ax2.legend(lns, labs, loc=0)
+
+    #plt.xlabel('{} uncertainty'.format(uncertainty))
+
+
+
+
+
+
+def plot_selectedsamples_metrics(uncertainty_array, mean_preds_array, label_array, uncertainty=""):
+    order = np.argsort(uncertainty_array)
+
+    samples = []
+    auc = []
+    acc = []
+    b_acc = []
+    th_entr = []
+
+    n_samples = uncertainty_array.shape[0]
+
+    # Minimum 10% of samples
+    metrics = ClassificationMetrics(classes=2)
+
+    for i in range(order.shape[0] - int(0.2 * n_samples)):
+        number_samples = order.shape[0] - i
+        metrics.clear()
+        kn = order[:(number_samples - 1)]
+        m = metrics.compute_metrics(mean_preds_array[kn], label_array[kn])
+
+        # print('###### ')
+        # print('Number of samples {}'.format(number_samples))
+        # print(m['cm'])
+        # print(m['auc'])
+        # print('###### ')
+
+        samples.extend([number_samples])
+        auc.extend([m['auc']])
+        acc.extend([m['accuracy']])
+        b_acc.extend([m['balanced_accuracy']])
+        th_entr.extend([uncertainty_array[kn[-1]]])
+
+    plt.figure()
+
+    plt.plot(th_entr, acc, 'bo-', markersize=3, label="Accuracy")
+    plt.plot(th_entr, b_acc, 'gx-', markersize=3, label='Balanced accuracy')
+    plt.plot(th_entr, auc, 'rx-', markersize=3, label='AUC')
+    plt.xlabel('{} uncertainty'.format(uncertainty))
+
+    plt.figure()
+    plt.plot(samples, acc, 'bo-', markersize=3, label='Accuracy')
+    plt.plot(samples, b_acc, 'gx-', markersize=3, label='Balanced accuracy')
+    plt.plot(samples, auc, 'rx-', markersize=3, label='AUC')
+    plt.text(26, plt.gca().get_ylim()[0] + 0.05,
+             'AUC acc {} \n AUC bacc {} \n AUC auc {} '.format(np.round(np.trapz(acc) / len(acc), 2),
+                                                               np.round(np.trapz(b_acc) / len(b_acc), 2),
+                                                               np.round(np.trapz(auc) / len(auc), 2)))
+    plt.xlabel("Number of samples")
+    plt.ylabel("Performance")
+    plt.ylim(0.7, 1)
+    plt.legend()
+    plt.title("Metrics ")
+    # plt.show()
+
+
+def plot_uncetainties(p, y, c, e):
+    true_pred = []
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_pred.extend(['lightgreen'])
+            else:
+                true_pred.extend(['darkgreen'])
+        else:
+            if label == 0:
+                true_pred.extend(['salmon'])
+            else:
+                true_pred.extend(['darkred'])
+
+    plt.figure()
+    for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+        i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+        plt.scatter(c[i], e[i], c=g, label='test', s=10)
+    # sc =plt.scatter(c, e, c=true_pred,)
+    plt.legend(['TP', 'TN', 'FN', 'FP'])
+    plt.xlabel('Predictive uncertainty')
+    plt.ylabel('Epistemic uncertainty')
+
+def plot_age_uncertainty(p, y, c, age):
+    true_pred = []
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_pred.extend(['lightgreen'])
+            else:
+                true_pred.extend(['darkgreen'])
+        else:
+            if label == 0:
+                true_pred.extend(['salmon'])
+            else:
+                true_pred.extend(['darkred'])
+
+    plt.figure()
+    for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+        i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+        plt.scatter(age[i], c[i], c=g, label='test', s=10)
+    # sc =plt.scatter(c, e, c=true_pred,)
+    plt.legend(['TP', 'TN', 'FN', 'FP'])
+    plt.xlabel('Age')
+    plt.ylabel('Uncertainty')
+
+
+def test_graph(config, loader, test_indices, state_dicts):
+    models = []
+
+    for m in state_dicts:
+        model = GCN(nfeat=loader.dataset[0].num_features, nclass=2, dropout=config['dropout'])
+
+        model.load_state_dict(torch.load(m))
+        models.append(model)
+
+    n_models = 10
+    n_droput = 100
+
+
+    for i, batch in enumerate(loader):
+
+        samples = len(test_indices)
+        predictions_y = np.zeros([samples, 2, n_models, n_droput])
+
+        x = batch['x']
+        edge_index = batch['edge_index']
+        edge_weight = batch['weights']
+        y = batch['y'][test_indices]
+
+        for m in range(n_models):
+            for k in range(n_droput):
+                model = models[m]
+                model.train()
+                test_out = models[m].forward(x, edge_index, edge_weight)
+                f = torch.softmax(test_out, dim=1)
+                f_cloned = f.clone()
+                f_cloned[f_cloned == 0] = 1e-30
+
+                predictions_y[:, :, m, k] = f_cloned[test_indices].detach().cpu().numpy()
+
+        mean_preds = np.mean(np.mean(predictions_y, axis=2), axis=2)
+        predictive_uncertainty = np.sum(-np.multiply(np.log(mean_preds), mean_preds), axis=1)
+
+        expect_data_uncertainty = np.mean(
+            np.mean(np.sum(-np.multiply(np.log(predictions_y), predictions_y), axis=1), axis=1), axis=1)
+        epistemic_uncertainty = predictive_uncertainty - expect_data_uncertainty
+
+        """for sample in range(samples):
+            print('#####')
+            print('Sample {}'.format(sample))
+            print(' True value: {}, Prediction  mean: {}'.format(y[sample], mean_preds[sample]))
+            print(' Predictive {}, Epistemic {}'.format(predictive_uncertainty[sample], epistemic_uncertainty[sample]))
+            print('#####')"""
+
+        print('Mean predictive uncertainty ', np.mean(predictive_uncertainty))
+        print('Mean epistemic uncertainty', np.mean(epistemic_uncertainty))
+
+        return mean_preds, predictive_uncertainty, epistemic_uncertainty, y.detach().cpu().numpy()
+
+
+def test(config, test_loader, state_dict):
+    models = []
+
+    for m in state_dict:
+        model = Basic_FCN(in_features=test_loader.dataset.features.shape[1], layers=config['layers']
+                          , out_features=2,
+                          dropout_rate=config['dropout'])
+
+        model.load_state_dict(torch.load(m))
+        models.append(model)
+
+    n_models = 5
+    n_droput = 1
+
+    for batch in test_loader:
+
+        samples = batch['x'].shape[0]
+        predictions_y = np.zeros([samples, 2, n_models, n_droput])
+
+        x = batch['x']
+        y = batch['y']
+
+        for m in range(n_models):
+            for k in range(n_droput):
+                model = models[m]
+                model.train()
+                test_out = models[m].forward(x)
+                f = torch.softmax(test_out, dim=1)
+
+                predictions_y[:, :, m, k] = f.detach().cpu().numpy()
+
+        mean_preds = np.mean(np.mean(predictions_y, axis=2), axis=2)
+        predictive_uncertainty = np.sum(-np.multiply(np.log(mean_preds), mean_preds), axis=1)
+
+        expect_data_uncertainty = np.mean(
+            np.mean(np.sum(-np.multiply(np.log(predictions_y), predictions_y), axis=1), axis=1), axis=1)
+        epistemic_uncertainty = predictive_uncertainty - expect_data_uncertainty
+
+        """for sample in range(samples):
+            print('#####')
+            print('Sample {}'.format(sample))
+            print(' True value: {}, Prediction  mean: {}'.format(y[sample], mean_preds[sample]))
+            print(' Predictive {}, Epistemic {}'.format(predictive_uncertainty[sample], epistemic_uncertainty[sample]))
+            print('#####')"""
+
+        print('Mean predictive uncertainty ', np.mean(predictive_uncertainty))
+        print('Mean epistemic uncertainty', np.mean(epistemic_uncertainty))
+
+        return mean_preds, predictive_uncertainty, epistemic_uncertainty, y.detach().cpu().numpy()
+
+
+if __name__ == "__main__":
+
+    # %% DATALOADIND
+
+    ## Clean original table
+    excel_dir = "../data/TheList_anonymous_mv.xlsx"
+    clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+    # Given a clean table get features and labels
+    table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=['dmRS'])
+
+    output_vector = table.output_vector
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+    results_pred = {}
+    results_epis = {}
+    for p in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+        results_pred[p] = {}
+        results_epis[p] = {}
+
+    # for f in [5, 10, 15, 20, 25, 30, 40, 50]:
+    for f in [15]:
+        features = table.select_features(method='mrmr', k=f, fold_indices=fold_indices)
+
+        feature_vector = table.final_df[features]
+        FP = FeaturePreprocessing(feature_vector, table.selected_d)
+        feature_vector = FP.create_features(feature_vector)
+        config = {
+            'layers': {'number': 3,
+                       'layer1': 40,
+                       'layer2': 20,
+
+                       },
+
+            'dropout': 0,
+            'out_classes': 2}
+
+        ########
+        mean_preds = []
+        combined = []
+        epistemic = []
+        cls = []
+
+        for k in range(5):
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k], table.selected_d,
+                                           one_hot=True)
+            dl = dataloader_fold.get_loaders()
+            state_dict_paths = ["C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/features_{}/"
+                                "model_{}_fold_{}.pt".format(f, i, k) for i in range(5)]
+            pred, unc, epistemic_unc, y = test(config, dl[2], state_dict_paths)
+
+            # _, _, _, _ = test(config, dl[0], models)
+
+            mean_preds.extend(pred.tolist())
+            combined.extend(unc.tolist())
+            epistemic.extend(epistemic_unc.tolist())
+            cls.extend(y.tolist())
+
+        p = np.array(mean_preds)
+        y = np.array(cls)
+        c = np.array(combined)
+        e = np.array(epistemic)
+
+        # with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+        for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+            results_pred[per][f] = get_metrics_unc(c, p, y, per)
+            # pd_per = pd.DataFrame(results_pred[per]).T
+            # pd_per.to_excel(writer, sheet_name=str(per))
+
+        # with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/epistimic_uncertainty.xlsx") as writer:
+        #    for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,0.1]:
+        #        results_epis[per][f] = get_metrics_unc(e, p, y, per)
+        #        pd_per = pd.DataFrame(results_epis[per]).T
+        #        pd_per.to_excel(writer, sheet_name=str(per))
+
+        true_pred = []
+        for i in range(p.shape[0]):
+            pred = np.argmax(p[i, :])
+            label = np.argmax(y[i, :])
+            if pred == label:
+                if label == 0:
+                    true_pred.extend(['lightgreen'])
+                else:
+                    true_pred.extend(['darkgreen'])
+            else:
+                if label == 0:
+                    true_pred.extend(['salmon'])
+                else:
+                    true_pred.extend(['darkred'])
+
+        metrics = ClassificationMetrics(classes=2)
+        m = metrics.compute_metrics(p, y)
+        print('Combined metrics')
+        print(m)
+
+        plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+        plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+
+        plt.figure()
+        for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+            i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+            plt.scatter(c[i], e[i], c=g, label='test')
+        # sc =plt.scatter(c, e, c=true_pred,)
+        plt.legend(['TP', 'TN', 'FN', 'FP'])
+        plt.xlabel('Predictive uncertainty')
+        plt.ylabel('Epistemic uncertainty')
+        plt.show()
diff --git a/test.py b/test.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..056072ababd351a39c31531f26029a4728e64d66
--- /dev/null
+++ b/train.py
@@ -0,0 +1,204 @@
+from architectures.FCN import Basic_FCN
+from Metrics.RegressionMetrics import ClassificationMetrics
+from Loss.Loss_uncertainty import loss_uncertainty
+
+import copy
+from hyperopt import STATUS_OK
+from ignite.engine import Engine, Events
+import neptune
+import pprint
+import time
+import tqdm
+import torch
+
+
+def train_model(config, loaders, save_metrics=False):
+    # torch.manual_seed(0)
+
+    train_loader, val_loader, test_loader = loaders
+    device = torch.device('cuda' if torch.cuda.is_available() else False)
+    model = Basic_FCN(in_features=train_loader.dataset.features.shape[1], layers=config['layers']
+                      , out_features=2,
+                      dropout_rate=config['dropout'])
+
+    p = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print('Model parameters', p)
+    model.to(device)
+
+    # METRICS
+    train_metrics = ClassificationMetrics(classes=2)
+    val_metrics = ClassificationMetrics(classes=2)
+    test_metrics = ClassificationMetrics(classes=2)
+
+    # OPTIMIZER
+    optimizer = torch.optim.Adam(model.parameters(),
+                                lr=config['lr'],
+                                #momentum=config['momentum'],
+                                weight_decay=config['weight_decay'])
+
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
+
+    # LOSS
+    import numpy as np
+    values, counts = np.unique(train_loader.dataset.labels, return_counts=True)
+    weights = torch.FloatTensor(1- counts / np.sum(counts)).to('cuda')
+    #weights = torch.FloatTensor((0.5, 0.5)).to('cuda')
+    loss_train = loss_uncertainty(weights=weights)
+    loss_val = loss_uncertainty(weights=weights)
+    loss_test = loss_uncertainty(weights=weights)
+
+    def train(Engine, batch):
+
+        model.train()
+        optimizer.zero_grad()
+        data = batch['x'].clone().to(device)
+        y_train = batch['y'].clone().to(device)
+
+        out = model.forward(data)
+
+        Loss = loss_train.get_loss(out, y_train.unsqueeze(1))
+        m_train = train_metrics.compute_metrics(out.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+        Loss.backward()
+        optimizer.step()
+
+        Engine.state.loss = loss_train.get_total_loss()
+        Engine.state.metrics = train_metrics
+        Engine.state.m = m_train
+        Engine.state.out = out
+        Engine.state.y = y_train.detach().cpu().numpy()
+
+    def validate(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            y_val = batch['y'].clone().to(device)
+
+            out = model.forward(data)
+
+            lv = loss_val.get_loss(out, y_val.unsqueeze(1))
+            scheduler.step(lv)
+            #print("Epoch {}, lr {}".format(trainer_engine.state.epoch, optimizer.param_groups[0]['lr']))
+            m_val = val_metrics.compute_metrics(out.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+            Engine.state.loss = loss_val.get_total_loss()
+            Engine.state.metrics = val_metrics
+            Engine.state.m = m_val
+
+    def test(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            y_test = batch['y'].clone().to(device)
+
+            out = model.forward(data)
+            f = torch.softmax(out, dim=1)
+            _ = loss_test.get_loss(f, y_test.unsqueeze(1))
+            m_test = test_metrics.compute_metrics(f.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+            Engine.state.loss = loss_test.get_total_loss()
+            Engine.state.metrics = test_metrics
+            Engine.state.m = m_test
+
+    trainer_engine = Engine(train)
+    validator_engine = Engine(validate)
+    test_engine = Engine(test)
+
+    @trainer_engine.on(Events.STARTED)
+    def training_bootup(engine):
+
+        # print("Training started")
+        # print("Train_loader,: iterations/epoch: ", len(train_loader), ", total number of samples",
+        #      len(train_loader.sampler))
+        # print("Validation_loader,: iterations/epoch: ", len(val_loader), ", total number of samples",
+        #      len(val_loader.sampler))
+        # print("Test_loader,: iterations/epoch: ", len(test_loader), ", total number of samples",
+        #      len(test_loader.sampler))
+
+        time.sleep(0.001)
+        engine.pbar = tqdm.tqdm(total=300, desc='Training progress')
+        validator_engine.state.loss = 0
+        engine.state.best_epoch = 0
+        engine.state.min_loss = 1000
+        engine.state.best_train_metrics = {}
+        engine.state.best_val_metrics = {}
+        engine.state.best_test_metrics = {}
+
+        engine.count = 0
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def run_validation(engine):
+        validator_engine.run(val_loader, max_epochs=1)
+        engine.pbar.update(1)
+        engine.pbar.set_postfix({'loss': validator_engine.state.loss,
+                                 'loss_train': trainer_engine.state.loss,
+                                 'accuracy_train': trainer_engine.state.m['accuracy']}, refresh=True)
+
+        test_engine.run(test_loader, max_epochs=1)
+
+        if validator_engine.state.loss < engine.state.min_loss:
+            engine.count = 0
+            engine.state.min_loss = validator_engine.state.loss
+            #
+        else:
+            engine.count += 1
+            # print('Strike ', engine.count)
+            if engine.count > 40:
+                trainer_engine.terminate()
+        if True:
+            engine.state.best_epoch = engine.state.epoch
+            engine.state.best_train_metrics = trainer_engine.state.m
+            engine.state.best_val_metrics = validator_engine.state.m
+            engine.state.best_test_metrics = test_engine.state.m
+            engine.state.best_model = model.state_dict()
+
+        train_metrics.clear()
+        val_metrics.clear()
+        test_metrics.clear()
+
+        loss_train.clear()
+        loss_val.clear()
+        loss_test.clear()
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def write_neptune():
+
+        if save_metrics:
+            neptune.log_metric('loss_train', trainer_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_val', validator_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_test', test_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('train_AUC', trainer_engine.state.m['auc'])
+            neptune.log_metric('train_accuracy', trainer_engine.state.m['accuracy'])
+            neptune.log_metric('val_AUC', validator_engine.state.m['auc'])
+            neptune.log_metric('val_accuracy', validator_engine.state.m['accuracy'])
+            neptune.log_metric('test_AUC', test_engine.state.m['auc'])
+            neptune.log_metric('test_accuracy', test_engine.state.m['accuracy'])
+
+    @trainer_engine.on(Events.COMPLETED)
+    def run_testing(engine):
+
+        # print(time.ctime(), "Running testing")
+        # test_engine.run(test_loader, max_epochs=1)
+
+        engine.pbar.close()
+        # time.sleep(0.001)
+
+    trainer_engine.run(train_loader, max_epochs=300)
+    print(
+        "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f}".format(trainer_engine.state.m['auc'],
+                                                                                         validator_engine.state.m[
+                                                                                             'auc'],
+                                                                                         test_engine.state.m['auc']))
+    # visualize(h=trainer_engine.state.best_out, color=trainer_engine.state.best_y)
+    result = {'loss': trainer_engine.state.min_loss,
+              'epoch': trainer_engine.state.best_epoch,
+              'train_metrics': trainer_engine.state.best_train_metrics,
+              'val_metrics': trainer_engine.state.best_val_metrics,
+              'test_metric': trainer_engine.state.best_test_metrics,
+              'status': STATUS_OK}
+
+    #pprint.pprint(trainer_engine.state.best_train_metrics)
+    #pprint.pprint(trainer_engine.state.best_val_metrics)
+    #pprint.pprint(trainer_engine.state.best_test_metrics)
+
+    return result, trainer_engine.state.best_model
diff --git a/train_graph.py b/train_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..7346ef0fed266344f5f5b0454dfeafbeff96e14b
--- /dev/null
+++ b/train_graph.py
@@ -0,0 +1,223 @@
+from architectures.GCN import GCN
+
+from Metrics.RegressionMetrics import ClassificationMetrics
+from Loss.Loss_uncertainty import loss_uncertainty
+
+import copy
+from hyperopt import STATUS_OK
+from ignite.engine import Engine, Events
+import neptune
+import pprint
+import time
+import tqdm
+import torch
+
+
+def train_model_graph(config, loaders, indices, save_metrics=False):
+    # torch.manual_seed(0)
+
+    train_indices, val_indices, test_indices = indices
+
+    device = torch.device('cuda' if torch.cuda.is_available() else False)
+    model = GCN(nfeat=loaders.dataset[0].num_features, nclass=2, dropout=config['dropout'])
+
+    p = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print('Model parameters', p)
+    model.to(device)
+
+    # METRICS
+    train_metrics = ClassificationMetrics(classes=2)
+    val_metrics = ClassificationMetrics(classes=2)
+    test_metrics = ClassificationMetrics(classes=2)
+
+    # OPTIMIZER
+    optimizer = torch.optim.Adam(model.parameters(),
+                                lr=config['lr'],
+                                #momentum=config['momentum'],
+                                weight_decay=config['weight_decay'])
+
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.7)
+
+    # LOSS
+    import numpy as np
+    values, counts = np.unique(loaders.dataset[0].labels.data, return_counts=True)
+    weights = torch.FloatTensor(1- counts / np.sum(counts)).to('cuda')
+    #weights = torch.FloatTensor((0.5, 0.5)).to('cuda')
+    loss_train = loss_uncertainty(weights=weights)
+    loss_val = loss_uncertainty(weights=weights)
+    loss_test = loss_uncertainty(weights=weights)
+
+    def train(Engine, batch):
+
+        model.train()
+        optimizer.zero_grad()
+        data = batch['x'].clone().to(device)
+        edge_index = batch['edge_index'].clone().to(device)
+        edge_weight = batch['weights'].clone().to(device)
+        y_train = batch['y'].clone().to(device)
+
+        out = model.forward(data, edge_index, edge_weight)
+        # f = torch.softmax(out, dim=1)
+        f_cloned = out.clone()
+        #f_cloned[f_cloned == 0] = 1e-30
+
+
+        Loss = loss_train.get_loss(f_cloned[train_indices], y_train[train_indices].unsqueeze(1))
+
+        m_train = train_metrics.compute_metrics(f_cloned[train_indices].detach().cpu().numpy(),
+                                                batch['y'][train_indices].detach().cpu().numpy())
+
+        Loss.backward()
+        optimizer.step()
+
+        Engine.state.loss = loss_train.get_total_loss()
+        Engine.state.metrics = train_metrics
+        Engine.state.m = m_train
+        Engine.state.out = out
+        Engine.state.y = y_train.detach().cpu().numpy()
+
+    def validate(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            edge_index = batch['edge_index'].clone().to(device)
+            edge_weight = batch['weights'].clone().to(device)
+            y_val = batch['y'].clone().to(device)
+
+            out = model.forward(data, edge_index, edge_weight)
+            #f = torch.softmax(out, dim=1)
+            f_cloned = out.clone()
+            f_cloned[f_cloned == 0] = 1e-30
+
+            lv = loss_val.get_loss(f_cloned[val_indices], y_val[val_indices].unsqueeze(1))
+            scheduler.step(lv)
+            #print("Epoch {}, lr {}".format(trainer_engine.state.epoch, optimizer.param_groups[0]['lr']))
+            m_val = val_metrics.compute_metrics(f_cloned[val_indices].detach().cpu().numpy(),
+                                                batch['y'][val_indices].detach().cpu().numpy())
+
+            Engine.state.loss = loss_val.get_total_loss()
+            Engine.state.metrics = val_metrics
+            Engine.state.m = m_val
+
+    def test(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            edge_index = batch['edge_index'].clone().to(device)
+            edge_weight = batch['weights'].clone().to(device)
+            y_test = batch['y'].clone().to(device)
+
+            out = model.forward(data, edge_index, edge_weight)
+            # f = torch.softmax(out, dim=1)
+            f_cloned = out.clone()
+            f_cloned[f_cloned == 0] = 1e-30
+
+            _ = loss_test.get_loss(f_cloned[test_indices], y_test[test_indices].unsqueeze(1))
+            m_test = test_metrics.compute_metrics(f_cloned[test_indices].detach().cpu().numpy(),
+                                                  batch['y'][test_indices].detach().cpu().numpy())
+
+            Engine.state.loss = loss_test.get_total_loss()
+            Engine.state.metrics = test_metrics
+            Engine.state.m = m_test
+
+    trainer_engine = Engine(train)
+    validator_engine = Engine(validate)
+    test_engine = Engine(test)
+
+    @trainer_engine.on(Events.STARTED)
+    def training_bootup(engine):
+
+        # print("Training started")
+        # print("Train_loader,: iterations/epoch: ", len(train_loader), ", total number of samples",
+        #      len(train_loader.sampler))
+        # print("Validation_loader,: iterations/epoch: ", len(val_loader), ", total number of samples",
+        #      len(val_loader.sampler))
+        # print("Test_loader,: iterations/epoch: ", len(test_loader), ", total number of samples",
+        #      len(test_loader.sampler))
+
+        time.sleep(0.001)
+        engine.pbar = tqdm.tqdm(total=300, desc='Training progress')
+        validator_engine.state.loss = 0
+        engine.state.best_epoch = 0
+        engine.state.min_loss = 1000
+        engine.state.best_train_metrics = {}
+        engine.state.best_val_metrics = {}
+        engine.state.best_test_metrics = {}
+
+        engine.count = 0
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def run_validation(engine):
+        validator_engine.run(loaders, max_epochs=1)
+        engine.pbar.update(1)
+        engine.pbar.set_postfix({'loss': validator_engine.state.loss,
+                                 'loss_train': trainer_engine.state.loss,
+                                 'accuracy_train': trainer_engine.state.m['accuracy']}, refresh=True)
+
+        test_engine.run(loaders, max_epochs=1)
+
+        if validator_engine.state.loss < engine.state.min_loss:
+            engine.count = 0
+            engine.state.min_loss = validator_engine.state.loss
+            #
+        else:
+            engine.count += 1
+            if engine.count > 20:
+                trainer_engine.terminate()
+        if True:
+            engine.state.best_epoch = engine.state.epoch
+            engine.state.best_train_metrics = trainer_engine.state.m
+            engine.state.best_val_metrics = validator_engine.state.m
+            engine.state.best_test_metrics = test_engine.state.m
+            engine.state.best_model = model.state_dict()
+
+        train_metrics.clear()
+        val_metrics.clear()
+        test_metrics.clear()
+
+        loss_train.clear()
+        loss_val.clear()
+        loss_test.clear()
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def write_neptune():
+
+        if save_metrics:
+            neptune.log_metric('loss_train', trainer_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_val', validator_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_test', test_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('train_AUC', trainer_engine.state.m['auc'])
+            neptune.log_metric('train_accuracy', trainer_engine.state.m['accuracy'])
+            neptune.log_metric('val_AUC', validator_engine.state.m['auc'])
+            neptune.log_metric('val_accuracy', validator_engine.state.m['accuracy'])
+            neptune.log_metric('test_AUC', test_engine.state.m['auc'])
+            neptune.log_metric('test_accuracy', test_engine.state.m['accuracy'])
+
+    @trainer_engine.on(Events.COMPLETED)
+    def run_testing(engine):
+
+        # print(time.ctime(), "Running testing")
+        # test_engine.run(test_loader, max_epochs=1)
+
+        engine.pbar.close()
+        # time.sleep(0.001)
+
+    trainer_engine.run(loaders, max_epochs=300)
+    print(
+        "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f}".format(trainer_engine.state.m['auc'],
+                                                                                         validator_engine.state.m[
+                                                                                             'auc'],
+                                                                                         test_engine.state.m['auc']))
+    # visualize(h=trainer_engine.state.best_out, color=trainer_engine.state.best_y)
+    result = {'loss': trainer_engine.state.min_loss,
+              'epoch': trainer_engine.state.best_epoch,
+              'train_metrics': trainer_engine.state.best_train_metrics,
+              'val_metrics': trainer_engine.state.best_val_metrics,
+              'test_metric': trainer_engine.state.best_test_metrics,
+              'status': STATUS_OK}
+
+    #pprint.pprint(trainer_engine.state.best_train_metrics)
+    #pprint.pprint(trainer_engine.state.best_val_metrics)
+    #pprint.pprint(trainer_engine.state.best_test_metrics)
+
+    return result, trainer_engine.state.best_model