From ccfbd1a93258168d55348abebd8b830f04174a03 Mon Sep 17 00:00:00 2001
From: Celia Martin Vicario <celia.martin@fau.de>
Date: Thu, 19 Jan 2023 10:55:15 +0100
Subject: [PATCH] Initial commit

---
 .idea/AIS_Regress.iml                         |   2 +-
 .idea/misc.xml                                |   5 +-
 Experiments/1-VariablesEvaluation.py          | 200 +++++
 Experiments/2-VariableSelection.py            | 162 ++++
 Experiments/3-MVStrategy.py                   | 150 ++++
 Experiments/4-Timepoints.py                   | 164 ++++
 Experiments/5-Uncertainty.py                  | 100 +++
 Experiments/6-Graph_constr.py                 | 116 +++
 Experiments/7-Graph_model.py                  |  82 ++
 Experiments/8-Uncertainty_graph.py            | 145 ++++
 Experiments/9-Images.py                       |  55 ++
 Experiments/_t_test.py                        |  62 ++
 IO_utils/Dataloader.py                        |  63 ++
 IO_utils/Datasets.py                          | 140 +++
 IO_utils/FeaturePreprocessing.py              | 109 +++
 IO_utils/List_Reader.py                       | 125 +++
 IO_utils/List_reader_utils.py                 | 108 +++
 .../__pycache__/Dataloader.cpython-37.pyc     | Bin 0 -> 1850 bytes
 IO_utils/__pycache__/Datasets.cpython-37.pyc  | Bin 0 -> 4138 bytes
 .../FeaturePreprocessing.cpython-37.pyc       | Bin 0 -> 3546 bytes
 .../__pycache__/List_Reader.cpython-37.pyc    | Bin 0 -> 3883 bytes
 .../List_reader_utils.cpython-37.pyc          | Bin 0 -> 3532 bytes
 .../__pycache__/clean_table.cpython-37.pyc    | Bin 0 -> 9996 bytes
 .../__pycache__/mv_strategies.cpython-37.pyc  | Bin 0 -> 6790 bytes
 .../__pycache__/split_utils.cpython-37.pyc    | Bin 0 -> 1944 bytes
 .../statistics_utils.cpython-37.pyc           | Bin 0 -> 3419 bytes
 IO_utils/clean_table.py                       | 321 +++++++
 IO_utils/mv_strategies.py                     | 258 ++++++
 IO_utils/split_utils.py                       |  63 ++
 IO_utils/statistics_utils.py                  | 159 ++++
 Loss/Loss_uncertainty.py                      |  67 ++
 .../Loss_uncertainty.cpython-37.pyc           | Bin 0 -> 1917 bytes
 Metrics/ClassificationMetrics.py              | 126 +++
 Metrics/RegressionMetrics.py                  |  55 ++
 .../ClassificationMetrics.cpython-37.pyc      | Bin 0 -> 4067 bytes
 .../RegressionMetrics.cpython-37.pyc          | Bin 0 -> 1563 bytes
 Metrics/__pycache__/_utils.cpython-37.pyc     | Bin 0 -> 518 bytes
 Metrics/_utils.py                             |   7 +
 README.md                                     |   2 -
 __pycache__/evaluate_model.cpython-37.pyc     | Bin 0 -> 10401 bytes
 __pycache__/train.cpython-37.pyc              | Bin 0 -> 5576 bytes
 __pycache__/train_graph.cpython-37.pyc        | Bin 0 -> 5978 bytes
 _utils/Result_container.py                    |  54 ++
 .../Result_container.cpython-37.pyc           | Bin 0 -> 1794 bytes
 _utils/__pycache__/plot_utils.cpython-37.pyc  | Bin 0 -> 7536 bytes
 _utils/plot_utils.py                          | 203 +++++
 architectures/3D_CNN.py                       |  64 ++
 architectures/Edge_GCN.py                     |  25 +
 architectures/FCN.py                          |  52 ++
 architectures/GCN.py                          |  38 +
 architectures/ML_algorithms.py                | 146 ++++
 .../__pycache__/Edge_GCN.cpython-37.pyc       | Bin 0 -> 1235 bytes
 architectures/__pycache__/FCN.cpython-37.pyc  | Bin 0 -> 1531 bytes
 architectures/__pycache__/GCN.cpython-37.pyc  | Bin 0 -> 1173 bytes
 .../__pycache__/ML_algorithms.cpython-37.pyc  | Bin 0 -> 4090 bytes
 dictionaries/dictionary_modalities.yml        | 764 +++++++++++++++++
 dictionaries/dictionary_timepoints.yml        | 808 ++++++++++++++++++
 evaluate_model.py                             | 433 ++++++++++
 test.py                                       |   0
 train.py                                      | 204 +++++
 train_graph.py                                | 223 +++++
 61 files changed, 5856 insertions(+), 4 deletions(-)
 create mode 100644 Experiments/1-VariablesEvaluation.py
 create mode 100644 Experiments/2-VariableSelection.py
 create mode 100644 Experiments/3-MVStrategy.py
 create mode 100644 Experiments/4-Timepoints.py
 create mode 100644 Experiments/5-Uncertainty.py
 create mode 100644 Experiments/6-Graph_constr.py
 create mode 100644 Experiments/7-Graph_model.py
 create mode 100644 Experiments/8-Uncertainty_graph.py
 create mode 100644 Experiments/9-Images.py
 create mode 100644 Experiments/_t_test.py
 create mode 100644 IO_utils/Dataloader.py
 create mode 100644 IO_utils/Datasets.py
 create mode 100644 IO_utils/FeaturePreprocessing.py
 create mode 100644 IO_utils/List_Reader.py
 create mode 100644 IO_utils/List_reader_utils.py
 create mode 100644 IO_utils/__pycache__/Dataloader.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/Datasets.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/List_Reader.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/List_reader_utils.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/clean_table.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/mv_strategies.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/split_utils.cpython-37.pyc
 create mode 100644 IO_utils/__pycache__/statistics_utils.cpython-37.pyc
 create mode 100644 IO_utils/clean_table.py
 create mode 100644 IO_utils/mv_strategies.py
 create mode 100644 IO_utils/split_utils.py
 create mode 100644 IO_utils/statistics_utils.py
 create mode 100644 Loss/Loss_uncertainty.py
 create mode 100644 Loss/__pycache__/Loss_uncertainty.cpython-37.pyc
 create mode 100644 Metrics/ClassificationMetrics.py
 create mode 100644 Metrics/RegressionMetrics.py
 create mode 100644 Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc
 create mode 100644 Metrics/__pycache__/RegressionMetrics.cpython-37.pyc
 create mode 100644 Metrics/__pycache__/_utils.cpython-37.pyc
 create mode 100644 Metrics/_utils.py
 delete mode 100644 README.md
 create mode 100644 __pycache__/evaluate_model.cpython-37.pyc
 create mode 100644 __pycache__/train.cpython-37.pyc
 create mode 100644 __pycache__/train_graph.cpython-37.pyc
 create mode 100644 _utils/Result_container.py
 create mode 100644 _utils/__pycache__/Result_container.cpython-37.pyc
 create mode 100644 _utils/__pycache__/plot_utils.cpython-37.pyc
 create mode 100644 _utils/plot_utils.py
 create mode 100644 architectures/3D_CNN.py
 create mode 100644 architectures/Edge_GCN.py
 create mode 100644 architectures/FCN.py
 create mode 100644 architectures/GCN.py
 create mode 100644 architectures/ML_algorithms.py
 create mode 100644 architectures/__pycache__/Edge_GCN.cpython-37.pyc
 create mode 100644 architectures/__pycache__/FCN.cpython-37.pyc
 create mode 100644 architectures/__pycache__/GCN.cpython-37.pyc
 create mode 100644 architectures/__pycache__/ML_algorithms.cpython-37.pyc
 create mode 100644 dictionaries/dictionary_modalities.yml
 create mode 100644 dictionaries/dictionary_timepoints.yml
 create mode 100644 evaluate_model.py
 delete mode 100644 test.py
 create mode 100644 train.py
 create mode 100644 train_graph.py

diff --git a/.idea/AIS_Regress.iml b/.idea/AIS_Regress.iml
index 8dc09e5..74e2033 100644
--- a/.idea/AIS_Regress.iml
+++ b/.idea/AIS_Regress.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (BaseEnv)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index d1e22ec..e5f7f46 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (BaseEnv)" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
 </project>
\ No newline at end of file
diff --git a/Experiments/1-VariablesEvaluation.py b/Experiments/1-VariablesEvaluation.py
new file mode 100644
index 0000000..4a57dcd
--- /dev/null
+++ b/Experiments/1-VariablesEvaluation.py
@@ -0,0 +1,200 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.statistics_utils import get_pvalue, compute_basic_statistics, compute_bivariate_statistics
+from IO_utils.List_reader_utils import cross_check_dictionary, remove_features, get_all_dict_types, treat_missing_values
+from _utils.plot_utils import plot_mv, plot_distribution_categorical, plot_distribution_numerical, \
+    plot_significant_values
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+import yaml
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+
+
+# %% DATALOADIND
+def get_statistics(input_df, data_dictionary, output_dir='../../out/data_exploration/statistics'):
+    tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+
+    d, all_indices = cross_check_dictionary(input_df, data_dictionary, tables, output=[])
+
+    # Reorder dataframe according to data dictionary
+    reordered_df = input_df.reindex(columns=all_indices).drop(columns=['Id'])
+
+    types = []
+
+    all_df, all_missing_values, clean_keys = remove_features(reordered_df, p=1, exclude=[])
+
+    for t in tables:
+        types.extend(get_all_dict_types(data_dictionary[t], types=[]))
+
+    # Compute statistics of the selected tables
+    statistics = compute_basic_statistics(all_df)
+    statistics_bivariate = compute_bivariate_statistics(all_df, input_df['dmRS'], input_df['mortality'], types)
+
+    p_dmRS, methods = get_pvalue(all_df, input_df['dmRS'], types)
+    p_mortality, _ = get_pvalue(all_df, input_df['mortality'], types)
+    # p_shiftmRS, _ = get_pvalue(all_df, input_df['shift_mRS'], types)
+
+    statistics['p_dmRS'] = p_dmRS
+    statistics['p_mortality'] = p_mortality
+    # statistics['shift_mRS'] = p_shiftmRS
+    statistics['method'] = methods
+
+    statistics['missing_values'] = all_missing_values
+    statistics['Percentage (%)'] = (statistics['missing_values'] * 100. / input_df.shape[0]).to_list()
+    statistics['types'] = types
+
+    statistics_bivariate['p_dmRS'] = p_dmRS
+    statistics_bivariate['p_mortality'] = p_mortality
+
+    #plot_significant_values(statistics, value='p_mortality', th=0.01, out_dir=output_dir)
+    #plot_significant_values(statistics, value='p_dmRS', th=0.01, out_dir=output_dir)
+
+    #plot_significant_values(statistics, value='p_mortality', th=0.05, out_dir=output_dir)
+    #plot_significant_values(statistics, value='p_dmRS', th=0.05, out_dir=output_dir)
+
+    # Save file
+    file_path = os.path.join(output_dir, 'output.xlsx')
+    # assert os.path.isfile(file_path), 'File already exists'
+    with pd.ExcelWriter(file_path) as writer:
+        statistics.to_excel(writer, sheet_name='Sheet1')
+
+    # Save file
+    file_path_bivariate = os.path.join(output_dir, 'output_bivariate.xlsx')
+    # assert os.path.isfile(file_path), 'File already exists'
+    with pd.ExcelWriter(file_path_bivariate) as writer:
+        statistics_bivariate.to_excel(writer, sheet_name='Sheet1')
+
+    return statistics
+
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+output_features = ['dmRS', 'mRS90d', 'shift_mRS', 'mortality']
+tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+data_dicts = yaml.load(open("../dictionaries/dictionary_timepoints.yml"), Loader=yaml.Loader)
+dir_out = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration"
+
+# Output paths
+missing_values_path = os.path.join(dir_out, 'missing_values')
+statistics_path = os.path.join(dir_out, 'statistics')
+features_path = os.path.join(dir_out, 'features')
+
+# Get statistics without removing missing values etc
+statistics = get_statistics(clean_df, data_dicts,
+                            output_dir=statistics_path)
+
+#### Steps from List Reader
+selected_d, all_keys = cross_check_dictionary(clean_df, data_dicts, tables, output_features)
+reordered_df = clean_df.reindex(columns=all_keys)
+clean_df, _, clean_keys = remove_features(reordered_df, p=0.1, exclude=output_features)
+keys = [i for c, i in enumerate(all_keys) if not clean_keys[c]]
+for k in keys:
+    selected_d.pop(k)
+final_df = treat_missing_values(clean_df, method='median')
+FP = FeaturePreprocessing(final_df.drop(columns=['dmRS', 'mRS90d', 'shift_mRS', 'mortality']), selected_d)
+
+# Remove data dictionaries of removed features
+# Get more important features from lr and random forest
+
+for output_feature in ['dmRS', 'mortality']:
+    output_vector = final_df[output_feature].to_numpy(dtype=int).squeeze()
+    feature_vector = FP.create_features(final_df.drop(columns=['dmRS', 'mRS90d', 'shift_mRS', 'mortality']))
+    names = FP.get_feature_names()
+
+    rf = RandomForestClassifier(random_state=True, max_depth=5, n_estimators=100)
+    f = rf.fit(feature_vector, output_vector)
+    from sklearn.inspection import permutation_importance
+    result = permutation_importance(
+        f, feature_vector, output_vector, n_repeats=10, random_state=42, n_jobs=2
+    )
+    importance = result.importances_mean
+    importance_indices = np.argsort(importance)[::-1]
+
+    plt.figure(figsize=(15, 6))
+    x =[names[x] for x in importance_indices[0:20]]
+    plt.bar(x, importance[importance_indices[0:20]], yerr=result.importances_std[importance_indices[0:20]])
+    plt.xticks(rotation=15, ha='right')
+
+    plt.show()
+
+
+## Missing values
+missing_values = statistics['missing_values']
+plot_mv(missing_values, th=0.1 * clean_df.shape[0], out_dir=missing_values_path)
+
+with pd.ExcelWriter(os.path.join(missing_values_path, 'missing_values.xlsx')) as writer:
+    statistics[['missing_values', 'Percentage (%)']].to_excel(writer, float_format="%0.1f")
+
+if not os.path.isdir(os.path.join(features_path, 'Target')):
+    os.mkdir(os.path.join(features_path, 'Target'))
+## Target
+plot_distribution_categorical(clean_df, 'mRS90d', table=data_dicts['Output'],
+                              title='Distribution of mRS at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'dmRS', table=data_dicts['Output'],
+                              title='Distribution of functional outcome at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'shift_mRS', table=data_dicts['Output'],
+                              title='Distribution shift in mRS at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+plot_distribution_categorical(clean_df, 'mortality', table=data_dicts['Output'],
+                              title='Distribution of mortality at 90 days',
+                              out=True,
+                              out_dir=os.path.join(features_path, 'Target'))
+
+#### Table details
+tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+for t in tables:
+    keys_table = list(data_dicts[t].keys())
+    if not os.path.isdir(os.path.join(features_path, t)):
+        os.mkdir(os.path.join(features_path, t))
+
+    # Save individual statistics of each table
+    # with pd.ExcelWriter(os.path.join(statistics_path, 'statistics_{}.xlsx'.format(t))) as writer:
+    #    statistics_table = statistics.loc[keys_table, :]
+    #    statistics_table.to_excel(writer, float_format="%0.5f")
+
+    # Add target values to k for visualization and check that they are on the clean table
+    keys_table = keys_table + ['mRS90d', 'dmRS', 'shift_mRS', 'mortality']
+    keys_table = list(set(keys_table))
+    keys_table = [a for a in keys_table if a in clean_df.columns]
+    df_table = clean_df[keys_table]
+
+    # plot_correlation_features(df_table, os.path.join(statistics_path,
+    #                                                 'Correlation_{}_table.png'.format(t)))
+
+    for k in keys_table:
+        print(k)
+        if k in ['mRS90d', 'dmRS', 'shift_mRS', 'mortality']:
+            continue
+        p_values = [statistics.loc[k, target] for target in ['p_dmRS', 'p_mortality']]
+        type_k = data_dicts[t][k]['type']
+        if type_k in ['cat', 'ord']:
+            plot_distribution_categorical(clean_df, k,
+                                          table=data_dicts[t], title='Distribution {}'.format(k),
+                                          out_dir='C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration/features/{}'.
+                                          format(t),
+                                          p_values=p_values)
+
+        elif type_k in ['int', 'float']:
+            print('int')
+
+            plot_distribution_numerical(clean_df, k,
+                                        title='Distribution {}'.format(k),
+                                        out_dir='C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results/data_exploration/features/{}'
+                                        .format(t),
+                                        p_values=p_values)
+
+plt.close()
diff --git a/Experiments/2-VariableSelection.py b/Experiments/2-VariableSelection.py
new file mode 100644
index 0000000..1205afa
--- /dev/null
+++ b/Experiments/2-VariableSelection.py
@@ -0,0 +1,162 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['dmRS'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+methods = ['all', 'p_value', 'random_forest', 'mrmr']
+#methods = ['mrmr']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for method in methods:
+
+    for k in [3, 5, 10, 15, 20, 30, 50]:
+    #for k in [5,10]:
+        if method == 'all' and k > 3:
+            continue
+
+        features = table.select_features(method=method, k=k, fold_indices=fold_indices)
+
+        feature_vector = table.final_df[features]
+        FP = FeaturePreprocessing(feature_vector, table.selected_d)
+        feature_vector = FP.create_features(feature_vector)
+
+        config = {'lr': 0.001,
+                  'momentum': 0,
+                  'weight_decay': 0.001,
+                  'layers': {'number': 3,
+                             'layer1': 80,
+                             'layer2': 20
+
+                             },
+                  'dropout': 0,
+                  'classification': True,
+                  'out_classes': 2}
+
+        config_graph = {
+            'Age': True,
+            'beta_Age': 2,
+            'Sex': False,
+            'pre-mRS': True,
+            'beta_mRS': 1,
+            'NIHSS': False,
+            'beta_NIHSS': 1
+
+        }
+        metrics_FCN = {}
+        metrics_LR = {}
+        metrics_RF = {}
+        metrics_MLP = {}
+        metrics_XGB = {}
+        metrics_Graph = {}
+        for f in range(5):
+            # FCN
+            ("Training FCN of fold {}".format(f))
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                           table.selected_d,load_images=False, meta=meta_vector,  one_hot=True)
+
+            dl = dataloader_fold.get_loaders()
+            dl_graph = dataloader_fold.build_graph(config_graph)
+
+            torch.manual_seed(0)
+            print('-----------------Training FCN--------------------- ')
+
+            result, model = train_model(config, loaders=dl)
+            metrics_FCN['Fold {0}'.format(f)] = result['val_metrics']
+            print('-------------------------------------- ')
+
+            # LR
+            print('-----------------Training LR--------------------- ')
+            _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                    dataloader_fold.test_features, dataloader_fold.train_output,
+                                    dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_LR['Fold {0}'.format(f)] = result_LR[1]
+            print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                    result_LR[1]['auc'],
+                                                                                                    result_LR[2][
+                                                                                                        'auc']))
+            print('-------------------------------------- ')
+
+            # RF
+            print('-----------------Training RF--------------------- ')
+
+            _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                               dataloader_fold.test_features, dataloader_fold.train_output,
+                                               dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_RF['Fold {0}'.format(f)] = result_RF[1]
+            print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                    result_RF[1]['auc'],
+                                                                                                    result_RF[2][
+                                                                                                        'auc']))
+            print('-------------------------------------- ')
+            # MLP
+            print('-----------------Training MLP--------------------- ')
+
+            _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                      dataloader_fold.test_features, dataloader_fold.train_output,
+                                      dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_MLP['Fold {0}'.format(f)] = result_MLP[1]
+            print(
+                "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                                  result_MLP[1]['auc'],
+                                                                                                  result_MLP[2]['auc']))
+            print('-------------------------------------- ')
+            # XGB Boost
+            print('-----------------Training XGB Boost--------------------- ')
+
+            _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+            metrics_XGB['Fold {0}'.format(f)] = result_XGB[1]
+            print(
+                "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                                  result_XGB[1]['auc'],
+                                                                                                  result_XGB[2]['auc']))
+            print('-------------------------------------- ')
+            print('-----------------Training Graph--------------------- ')
+
+            result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+            metrics_Graph['Fold {0}'.format(f)] = result['val_metrics']
+            print('-------------------------------------- ')
+
+        if method == 'all':
+            Result_c.update('FCN', method, metrics_FCN)
+            Result_c.update('LR', method, metrics_LR)
+            Result_c.update('RF', method, metrics_RF)
+            Result_c.update('MLP', method, metrics_MLP)
+            Result_c.update('XGB', method, metrics_XGB)
+            Result_c.update('Graph', method, metrics_Graph)
+
+        else:
+            Result_c.update('FCN', method + '_' + str(k), metrics_FCN)
+            Result_c.update('LR', method + '_' + str(k), metrics_LR)
+            Result_c.update('RF', method + '_' + str(k), metrics_RF)
+            Result_c.update('MLP', method + '_' + str(k), metrics_MLP)
+            Result_c.update('XGB', method + '_' + str(k), metrics_XGB)
+            Result_c.update('Graph', method + '_' + str(k), metrics_Graph)
+
+        #Saves results on validation set
+        output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+        Result_c.save(output_dir=output_dir, name='Variable_selection_val')
diff --git a/Experiments/3-MVStrategy.py b/Experiments/3-MVStrategy.py
new file mode 100644
index 0000000..9b960ee
--- /dev/null
+++ b/Experiments/3-MVStrategy.py
@@ -0,0 +1,150 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+strategies = [ 'median', 'knn', 'mice']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for s in strategies:
+
+    table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy=s,
+                        output_feature=['dmRS'])
+
+    output_vector = table.output_vector
+    meta_vector = table.meta_df
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+    features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+    feature_vector = table.final_df[features]
+    FP = FeaturePreprocessing(feature_vector, table.selected_d)
+    feature_vector = FP.create_features(feature_vector)
+
+    config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+    metrics_FCN = {}
+    metrics_LR = {}
+    metrics_RF = {}
+    metrics_MLP = {}
+    metrics_XGB = {}
+    metrics_Graph = {}
+
+    config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+    for f in range(5):
+        ("Training  fold {}".format(f))
+
+        # GCN
+        print('-----------------Training GCN--------------------- ')
+
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d, load_images=False,meta =meta_vector, one_hot=True)
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        metrics_Graph['Fold {0}'.format(f)] = result['test_metric']
+
+        # FCN
+        print('-----------------Training LR--------------------- ')
+
+        
+        dl = dataloader_fold.get_loaders()
+        torch.manual_seed(0)
+
+        result, model = train_model(config, loaders=dl)
+        metrics_FCN['Fold {0}'.format(f)] = result['test_metric']
+        # LR
+        print('-----------------Training LR--------------------- ')
+        _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                dataloader_fold.test_features, dataloader_fold.train_output,
+                                dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_LR['Fold {0}'.format(f)] = result_LR[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                result_LR[1]['auc'],
+                                                                                                result_LR[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+
+        # RF
+        print('-----------------Training RF--------------------- ')
+
+        _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_RF['Fold {0}'.format(f)] = result_RF[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                result_RF[1]['auc'],
+                                                                                                result_RF[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+        # MLP
+        print('-----------------Training MLP--------------------- ')
+
+        _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                  dataloader_fold.test_features, dataloader_fold.train_output,
+                                  dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_MLP['Fold {0}'.format(f)] = result_MLP[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                              result_MLP[1]['auc'],
+                                                                                              result_MLP[2]['auc']))
+        print('-------------------------------------- ')
+        # XGB Boost
+        print('-----------------Training XGB Boost--------------------- ')
+
+        _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                       dataloader_fold.test_features, dataloader_fold.train_output,
+                                       dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_XGB['Fold {0}'.format(f)] = result_XGB[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                              result_XGB[1]['auc'],
+                                                                                              result_XGB[2]['auc']))
+        print('-------------------------------------- ')
+
+    Result_c.update('FCN', s, metrics_FCN)
+    Result_c.update('LR', s, metrics_LR)
+    Result_c.update('RF', s, metrics_RF)
+    Result_c.update('MLP', s, metrics_MLP)
+    Result_c.update('XGB', s, metrics_XGB)
+
+    Result_c.update('Graph', s, metrics_Graph)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+Result_c.save(output_dir=output_dir, name='MVStrategy_selection_test_mrmr10')
diff --git a/Experiments/4-Timepoints.py b/Experiments/4-Timepoints.py
new file mode 100644
index 0000000..c6d15f9
--- /dev/null
+++ b/Experiments/4-Timepoints.py
@@ -0,0 +1,164 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import  train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = [
+        #['NCCT', 'CTP', 'CTA'],
+        #['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out'],
+        #['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT'],
+
+        #['Metadata', 'NCCT', 'CTP', 'CTA'],
+        #['Metadata', 'NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out'],
+        #['Metadata', 'NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT'],
+
+        #['Metadata'],
+        #['Clinical'],
+        #['Metadata', 'Clinical'],
+        #['Treatment', 'Treatment_out', 'Metadata', 'Clinical']]
+        ['NCCT', 'CTP', 'CTA', 'Metadata', 'Clinical'],
+        ['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Metadata', 'Clinical'],
+        ['NCCT', 'CTP', 'CTA', 'Treatment', 'Treatment_out', 'Control CT', 'Metadata', 'Clinical']]
+
+
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['FCN', 'LR', 'RF', 'MLP', 'XGB', 'Graph'])
+#### From all variables evaluate different variable selection methods
+
+for t in tables_modalities:
+
+    table = TableReader(input_df=clean_df, tables=t, data_dictionaries='modalities',
+                        mv_strategy='median',
+                        output_feature=['mortality'])
+
+    output_vector = table.output_vector
+    meta_vector = table.meta_df
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+    print(t)
+    features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+    feature_vector = table.final_df[features]
+    FP = FeaturePreprocessing(feature_vector, table.selected_d)
+    feature_vector = FP.create_features(feature_vector)
+
+    config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+    config_graph = {
+        'Age': True,
+        'beta_Age': 4,
+        'Sex': True,
+        'pre-mRS': False,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+    metrics_FCN = {}
+    metrics_LR = {}
+    metrics_RF = {}
+    metrics_MLP = {}
+    metrics_XGB = {}
+    metrics_Graph = {}
+    for f in range(5):
+        # GCN
+
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d, load_images=False, meta=meta_vector, one_hot=True)
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        metrics_Graph['Fold {0}'.format(f)] = result['test_metric']
+
+        
+        # FCN
+        ("Training FCN of fold {}".format(f))
+      
+        dl = dataloader_fold.get_loaders()
+        torch.manual_seed(0)
+
+        result, model = train_model(config, loaders=dl)
+        metrics_FCN['Fold {0}'.format(f)] = result['test_metric']
+        
+        # LR
+        print('-----------------Training LR--------------------- ')
+        _, result_LR = apply_LR(dataloader_fold.train_features, dataloader_fold.val_features,
+                                dataloader_fold.test_features, dataloader_fold.train_output,
+                                dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_LR['Fold {0}'.format(f)] = result_LR[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_LR[0]['auc'],
+                                                                                                result_LR[1]['auc'],
+                                                                                                result_LR[2][
+                                                                                         'auc']))
+        print('-------------------------------------- ')
+
+        # RF
+        print('-----------------Training RF--------------------- ')
+
+        _, result_RF = apply_random_forest(dataloader_fold.train_features, dataloader_fold.val_features,
+                                           dataloader_fold.test_features, dataloader_fold.train_output,
+                                           dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_RF['Fold {0}'.format(f)] = result_RF[2]
+        print("AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_RF[0]['auc'],
+                                                                                                result_RF[1]['auc'],
+                                                                                                result_RF[2][
+                                                                                                    'auc']))
+        print('-------------------------------------- ')
+        # MLP
+        print('-----------------Training MLP--------------------- ')
+
+        _, result_MLP = apply_mlp(dataloader_fold.train_features, dataloader_fold.val_features,
+                                  dataloader_fold.test_features, dataloader_fold.train_output,
+                                  dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_MLP['Fold {0}'.format(f)] = result_MLP[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_MLP[0]['auc'],
+                                                                                              result_MLP[1]['auc'],
+                                                                                              result_MLP[2]['auc']))
+        print('-------------------------------------- ')
+        # XGB Boost
+        print('-----------------Training XGB Boost--------------------- ')
+
+        _, result_XGB = apply_xgbBoost(dataloader_fold.train_features, dataloader_fold.val_features,
+                                       dataloader_fold.test_features, dataloader_fold.train_output,
+                                       dataloader_fold.val_outout, dataloader_fold.test_outout)
+        metrics_XGB['Fold {0}'.format(f)] = result_XGB[2]
+        print(
+            "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f} ".format(result_XGB[0]['auc'],
+                                                                                              result_XGB[1]['auc'],
+                                                                                              result_XGB[2]['auc']))
+        print('-------------------------------------- ')
+
+    Result_c.update('FCN', '_'.join(t), metrics_FCN)
+    Result_c.update('LR', '_'.join(t), metrics_LR)
+    Result_c.update('RF', '_'.join(t), metrics_RF)
+    Result_c.update('MLP', '_'.join(t), metrics_MLP)
+    Result_c.update('XGB', '_'.join(t), metrics_XGB)
+    Result_c.update('Graph', '_'.join(t), metrics_Graph)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results'
+Result_c.save(output_dir=output_dir, name='Timepoints_K10_mortality')
\ No newline at end of file
diff --git a/Experiments/5-Uncertainty.py b/Experiments/5-Uncertainty.py
new file mode 100644
index 0000000..5940be7
--- /dev/null
+++ b/Experiments/5-Uncertainty.py
@@ -0,0 +1,100 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+
+from evaluate_model import test, get_metrics_unc, plot_selectedsamples_metrics, plot_uncetainties
+from train import train_model
+import torch
+import os
+import  numpy as np
+# %% DATALOADIND
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['dmRS'])
+
+output_vector = table.output_vector
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+feature_vector = table.final_df[features]
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.01,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20,
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+
+########
+mean_preds = []
+combined = []
+epistemic = []
+cls = []
+results_pred= {}
+results_epis= {}
+for p in[1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,0.1]:
+        results_pred[p]={}
+        results_epis[p] = {}
+
+for k in range(5):
+    dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k],
+                                       table.selected_d, one_hot=True)
+    dl = dataloader_fold.get_loaders()
+    fold_name = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/FCNEnsemble"
+
+    torch.manual_seed(0)
+    for i in range(10):
+
+        if not os.path.exists(fold_name):
+            os.mkdir(fold_name)
+
+        path_model = os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k))
+        if os.path.isfile(path_model):
+            continue
+        else:
+            print("Training model {} of fold {}".format(i, k))
+            _, model = train_model(config, loaders=dl)
+            torch.save(model, path_model)
+
+    state_dict_paths = [os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k)) for i in range(10)]
+
+    pred, unc, epistemic_unc, y = test(config, dl[2], state_dict_paths)
+    mean_preds.extend(pred.tolist())
+    combined.extend(unc.tolist())
+    epistemic.extend(epistemic_unc.tolist())
+    cls.extend(y.tolist())
+
+################### Test
+p = np.array(mean_preds)
+y = np.array(cls)
+c = np.array(combined)
+e = np.array(epistemic)
+
+# with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+    results_pred[per] = get_metrics_unc(c, p, y, per)
+
+plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+plot_uncetainties(p, y, c, e)
+import  matplotlib.pyplot as plt
+plt.show()
+
+
+
diff --git a/Experiments/6-Graph_constr.py b/Experiments/6-Graph_constr.py
new file mode 100644
index 0000000..a37da69
--- /dev/null
+++ b/Experiments/6-Graph_constr.py
@@ -0,0 +1,116 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train_graph import train_model_graph
+from sklearn.model_selection import ParameterGrid
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = ['all_timepoints']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['Graph'])
+#### From all variables evaluate different variable selection methods
+
+
+
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=['dmRS'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+feature_vector = table.final_df[features]
+
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+metrics = {}
+
+
+grid_config_graph = [
+
+{
+    'Age': [True],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [True],
+    'beta_mRS': [1, 2],
+    'NIHSS': [False]
+},
+
+{
+    'Age': [True],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [False],
+    'NIHSS': [False]},
+
+{
+    'Age': [False],
+    'Sex': [True, False],
+    'pre-mRS': [True],
+    'beta_mRS': [1, 2],
+    'NIHSS': [False]},
+    {
+        'Age': [False],
+        'Sex': [True],
+        'pre-mRS': [False],
+        'NIHSS': [False]},
+
+]
+
+"""grid_config_graph = {
+    'Age': [True, False],
+    'beta_Age': [1, 2, 3, 4, 5],
+    'Sex': [True, False],
+    'pre-mRS': [True, False],
+    'beta_mRS': [1, 2],
+    'NIHSS': [True, False],
+    'beta_NIHSS': [1, 3, 5, 7, 10]
+}"""
+grid = ParameterGrid(grid_config_graph)
+
+for config_graph in grid:
+    print(config_graph)
+    for f in range(5):
+            # FCN
+            ("Training FCN of fold {}".format(f))
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                           table.selected_d,load_images=False, meta= meta_vector, one_hot=True)
+
+            loader = dataloader_fold.build_graph(config_graph)
+
+            torch.manual_seed(0)
+            result, model = train_model_graph(config, loaders=loader, indices=fold_indices[f])
+            metrics['Fold {0}'.format(f)] = result['val_metrics']
+            # LR
+
+            print('-------------------------------------- ')
+    import json
+    Result_c.update('Graph', json.dumps(config_graph), metrics)
+
+    output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+    Result_c.save(output_dir=output_dir, name='Graph_grid_validation_new')
diff --git a/Experiments/7-Graph_model.py b/Experiments/7-Graph_model.py
new file mode 100644
index 0000000..a3fa88c
--- /dev/null
+++ b/Experiments/7-Graph_model.py
@@ -0,0 +1,82 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train_graph import train_model_graph
+from sklearn.model_selection import ParameterGrid
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+tables_modalities = ['all_timepoints']
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['Graph'])
+#### From all variables evaluate different variable selection methods
+
+
+output_feature = 'dmRS'
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=[output_feature])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+feature_vector = table.final_df[features]
+
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+metrics = {}
+
+config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 1
+
+    }
+
+
+for f in range(5):
+        # FCN
+        ("Training FCN of fold {}".format(f))
+        dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+                                       table.selected_d,load_images=False, meta= meta_vector,
+                                       one_hot=True)
+
+        loader = dataloader_fold.build_graph(config_graph)
+
+        torch.manual_seed(0)
+        result, model = train_model_graph(config, loaders=loader, indices=fold_indices[f])
+        metrics['Fold {0}'.format(f)] = result['test_metric']
+        # LR
+
+        print('-------------------------------------- ')
+Result_c.update('Graph', 'Results', metrics)
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/SR_results'
+Result_c.save(output_dir=output_dir, name='Graph_{}'.format(output_feature))
\ No newline at end of file
diff --git a/Experiments/8-Uncertainty_graph.py b/Experiments/8-Uncertainty_graph.py
new file mode 100644
index 0000000..4635eca
--- /dev/null
+++ b/Experiments/8-Uncertainty_graph.py
@@ -0,0 +1,145 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+
+from evaluate_model import test_graph, get_metrics_unc, plot_selectedsamples_metrics, plot_uncetainties, \
+    plot_age_uncertainty, plot_boxplots
+from train_graph import train_model_graph
+import torch
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import ParameterGrid
+
+# %% DATALOADIND
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['mortality'])
+
+output_vector = table.output_vector
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+features = table.select_features(method='mrmr', k=10, fold_indices=fold_indices)
+
+feature_vector = table.final_df[features]
+metadata_vector = table.meta_df
+FP = FeaturePreprocessing(feature_vector, table.selected_d)
+feature_vector = FP.create_features(feature_vector)
+
+"""config = {'lr': 0.001,
+          'momentum': 0,
+          'weight_decay': 0.001,
+          'layers': {'number': 5,
+                     'layer1': 96,
+                     'layer2': 32,
+                     'layer3': 8,
+                     'layer4': 4
+
+                     },
+          'dropout': 0.2,
+          'classification': True,
+          'out_classes': 2}"""
+config = {'lr': 0.001,
+              'momentum': 0,
+              'weight_decay': 0.001,
+              'layers': {'number': 3,
+                         'layer1': 40,
+                         'layer2': 20
+
+                         },
+              'dropout': 0,
+              'classification': True,
+              'out_classes': 2}
+########
+mean_preds = []
+combined = []
+epistemic = []
+cls = []
+ages = []
+results_pred = {}
+results_epis = {}
+for p in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+    results_pred[p] = {}
+    results_epis[p] = {}
+
+for k in range(5):
+
+    dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k],
+                                   table.selected_d, load_images=False, meta=metadata_vector,
+                                   one_hot=True)
+    ages_fold = dataloader_fold.meta['Age'].values[fold_indices[k][2]]
+
+    """config_graph = {
+        'Age': True,
+        'beta_Age': 3,
+        'Sex': False,
+        'pre-mRS': True,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 3
+
+    }"""
+    config_graph = {
+        'Age': True,
+        'beta_Age': 4,
+        'Sex': True,
+        'pre-mRS': False,
+        'beta_mRS': 1,
+        'NIHSS': False,
+        'beta_NIHSS': 3
+
+    }
+    fold_name = "C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/EdgeDropout3_mortality"
+    loader = dataloader_fold.build_graph(config_graph=config_graph)
+    torch.manual_seed(0)
+    for i in range(10):
+
+        if not os.path.exists(fold_name):
+            os.mkdir(fold_name)
+
+        path_model = os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k))
+        if os.path.isfile(path_model):
+            continue
+        else:
+            print("Training model {} of fold {}".format(i, k))
+            _, model = train_model_graph(config, loaders=loader, indices=fold_indices[k])
+            torch.save(model, path_model)
+
+    state_dict_paths = [os.path.join(fold_name, "model_{}_fold_{}.pt".format(i, k)) for i in range(10)]
+
+    pred, unc, epistemic_unc, y = test_graph(config, loader, fold_indices[k][2], state_dict_paths)
+
+    ages.extend(ages_fold.tolist())
+    mean_preds.extend(pred.tolist())
+    combined.extend(unc.tolist())
+    epistemic.extend(epistemic_unc.tolist())
+    cls.extend(y.tolist())
+
+################### Test
+p = np.array(mean_preds)
+y = np.array(cls)
+c = np.array(combined)
+e = np.array(epistemic)
+a = np.array(ages)
+
+# with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+for th in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1,0]:
+    results_pred[th] = get_metrics_unc(c, p, y, th)
+
+
+
+plot_boxplots(p,y,uncertainty=c)
+#plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+#plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+#plot_uncetainties(p, y, c, e)
+#plot_age_uncertainty(p, y, c, a)
+
+plt.show()
diff --git a/Experiments/9-Images.py b/Experiments/9-Images.py
new file mode 100644
index 0000000..677d4a6
--- /dev/null
+++ b/Experiments/9-Images.py
@@ -0,0 +1,55 @@
+from IO_utils.clean_table import clean_table
+from IO_utils.List_Reader import TableReader
+from IO_utils.split_utils import split_data_cv
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.Dataloader import MyDataLoader
+from _utils.Result_container import Result_container
+from train import train_model
+from train_graph import train_model_graph
+from architectures.ML_algorithms import apply_LR, apply_mlp, apply_random_forest, apply_xgbBoost
+# %% DATALOADIND
+import torch
+import os
+
+## Clean original table
+excel_dir = "../../data/TheList_anonymous_mv.xlsx"
+clean_df = clean_table(excel_dir=excel_dir, pre_mRS=6)
+
+# Given a clean table get features and labels
+table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints', mv_strategy='median',
+                    output_feature=['mortality'])
+
+output_vector = table.output_vector
+meta_vector = table.meta_df
+ids_vector = table.patient_ids
+
+
+fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+## Load the images given by fold indices
+
+Result_c = Result_container(target_metrics=['auc', 'accuracy', 'balanced_accuracy', 'f1', 'cm'],
+                            output=['3DCNN'])
+
+#### From all variables evaluate different variable selection methods
+
+for f in range(5):
+        # FCN
+        print("Training FCN of fold {}".format(f))
+        """
+        #dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[f],
+        #                                table.selected_d,meta_vector,  one_hot=True)
+
+        dl = dataloader_fold.get_loaders()
+        dl_graph = dataloader_fold.build_graph(config_graph)
+        torch.manual_seed(0)
+       
+
+        result, _ = train_model_graph(config, loaders=dl_graph, indices=fold_indices[f])
+        print('-------------------------------------- ')
+
+
+        Result_c.update('Graph', method + '_' + str(k), metrics_Graph)
+"""
+
+output_dir = 'C:/Users/martinca1/PhD/Projects/AI_Stroke/out/results/preliminary results'
+Result_c.save(output_dir=output_dir, name='Variable_selection_Graph_mort_all')
diff --git a/Experiments/_t_test.py b/Experiments/_t_test.py
new file mode 100644
index 0000000..90bf206
--- /dev/null
+++ b/Experiments/_t_test.py
@@ -0,0 +1,62 @@
+import scipy.stats
+import numpy as np
+
+
+
+
+def compute_t_value(mean_base, sd_base, means, sds, samples ):
+    for i in range(len(means)):
+        mean_2 = means[i]
+        sd_2 = sds[i]
+        t1 = (mean_base - mean_2)
+        t2 = (np.power(sd_base, 2) + np.power(sd_2, 2)) * (1 / samples)
+        t2 = np.sqrt(t2)
+
+        t =- t1 / t2
+
+        p = 2 * min(scipy.stats.t.cdf(t, samples), scipy.stats.t.cdf(-t, samples))
+        print('Paired t-test of {}+-{} against {}+-{} : p_value: {}'.format(mean_base, sd_base,
+                                                                        mean_2, sd_2,
+                                                                        p))
+
+
+
+
+samples = [160, 220, 274, 249]
+
+
+######### Calcaneus
+mean_base = 8.46
+means = [9.12, 8.69]
+sd_base = 0.63
+sds = [1.03, 1.23]
+
+print('Calceaneus ')
+compute_t_value(mean_base, sd_base, means, sds, samples[0])
+
+############## Ankle
+mean_base = 6.32
+means = [9.02, 5.86]
+sd_base = 0.25
+sds = [1.59, 0.40]
+
+print('Ankle')
+compute_t_value(mean_base, sd_base, means, sds, samples[1])
+
+############ Knee
+mean_base = 6.8
+means = [7.79, 6.49]
+sd_base = 0.55
+sds = [0.53, 1.05]
+
+print('Knee')
+compute_t_value(mean_base, sd_base, means, sds, samples[2])
+
+###########  Wrist
+mean_base = 7.85
+means = [9.59, 9.93]
+sd_base = 0.94
+sds = [2.15, 1.41]
+
+print('Wrist')
+compute_t_value(mean_base, sd_base, means, sds, samples[3])
\ No newline at end of file
diff --git a/IO_utils/Dataloader.py b/IO_utils/Dataloader.py
new file mode 100644
index 0000000..18339b7
--- /dev/null
+++ b/IO_utils/Dataloader.py
@@ -0,0 +1,63 @@
+from torch.utils.data import DataLoader
+from torch_geometric.loader import DataLoader as graph_Dataloader
+from IO_utils.Datasets import MyDataset, Graph_Dataset
+
+
+class MyDataLoader:
+
+    def __init__(self, feature, output, fold_indices, selected_d, load_images=False, patient_ids=None,
+                 meta=None, batch_size=None, one_hot=False):
+        train_indices, val_indices, test_indices = fold_indices
+
+        self.features = feature
+        self.output = output
+        self.meta = meta
+        self.one_hot = one_hot
+        self.patient_ids = patient_ids
+        self.load_images = load_images
+
+        self.batch_size = batch_size if batch_size is not None else int(0.8 * feature.shape[0])
+
+        train_dataset = MyDataset(feature, output, train_indices,
+                                  data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.train_loader = DataLoader(dataset=train_dataset,
+                                       batch_size=self.batch_size,
+                                       shuffle=True,
+                                       num_workers=0)
+
+        val_dataset = MyDataset(feature, output, val_indices,
+                                data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.val_loader = DataLoader(dataset=val_dataset,
+                                     batch_size=self.batch_size,
+                                     num_workers=0)
+
+        test_dataset = MyDataset(feature, output, test_indices,
+                                 data_dictionaries=selected_d, one_hot=one_hot)
+
+        self.test_loader = DataLoader(dataset=test_dataset,
+                                      batch_size=self.batch_size,
+                                      num_workers=0)
+
+        self.train_features, self.train_output = feature[train_indices, :], output[train_indices].squeeze()
+        self.test_features, self.test_outout = feature[test_indices, :], output[test_indices].squeeze()
+        self.val_features, self.val_outout = feature[val_indices, :], output[val_indices].squeeze()
+
+    def get_loaders(self):
+        return self.train_loader, self.val_loader, self.test_loader
+
+    def build_graph(self, config_graph):
+        # print(df_feature)
+        train_dataset = Graph_Dataset(self.meta,
+                                      self.features, self.output, config_graph, self.one_hot)
+
+        train_loader = graph_Dataloader(dataset=train_dataset.get(),
+                                        batch_size=self.batch_size,
+                                        num_workers=0)
+
+        return train_loader
+
+
+
+
diff --git a/IO_utils/Datasets.py b/IO_utils/Datasets.py
new file mode 100644
index 0000000..29ac784
--- /dev/null
+++ b/IO_utils/Datasets.py
@@ -0,0 +1,140 @@
+import torch
+import numpy as np
+from torch_geometric.data import Data
+from scipy.spatial.distance import correlation
+
+class MyDataset:
+
+    def __init__(self, features, labels, indices, data_dictionaries, one_hot=False,
+                 images= False, patient_ids=None):
+
+        self.features = features[indices, :]
+        self.labels = labels[indices]
+        self.patient_indices = patient_ids[indices, 0] if patient_ids is not None else None
+        self.data_dictionaries = data_dictionaries
+        self.one_hot = one_hot
+        self.images = images
+        self.patients_ids = patient_ids
+
+        self.volumes = {}
+        if self.images:
+
+            self.load_images()
+
+    def load_images (self):
+        #Load images
+        for i in range(len(self.patient_indices)):
+            #Load volume
+            self.volumes[i]= {}
+            for modality in ['NCCT', 'CTP']:
+                #Load modality
+                self.volumes[i][modality]=None
+
+
+    def __len__(self):
+        return self.features.shape[0]
+
+    def __getitem__(self, i):
+        # Get features
+        features = self.features[i, :]
+        x = torch.from_numpy(features.squeeze()).float()
+        if self.one_hot:
+            label = np.zeros((np.max(self.labels + 1)))
+            label[self.labels[i]] = 1
+            y = torch.from_numpy(label)
+        else:
+            y = torch.from_numpy(np.array(self.labels[i])).float()
+        data = {'x': x,
+                'y': y
+                }
+
+        if self.images:
+            for m in ['NCCT', 'CTP']:
+                data[m]= self.volumes[i][m]
+
+
+
+        return data
+
+class Graph_Dataset:
+
+    def __init__(self, meta, features, labels, config_graph, one_hot):
+        self.meta = meta
+        self.features = features
+        self.labels = labels
+
+        x = torch.tensor(features, dtype=torch.float)
+
+        if one_hot:
+            labels_onehot = np.zeros((labels.shape[0], len(np.unique(labels))))
+            for i in range(labels.shape[0]):
+                labels_onehot[i, labels[i]] = 1
+            y = torch.tensor(labels_onehot, dtype=torch.float)
+        else:
+            y = torch.tensor(labels, dtype=torch.float)
+
+        self.edge_index, self.weights = self.create_graph(config_graph)
+
+        self.data = Data(x=x, y=y, edge_index=self.edge_index, weights=self.weights, labels=labels)
+        print("Number of nodes ", self.data.num_nodes)
+        print("Number of features ", self.data.num_node_features)
+        print("Number of edges ", self.data.num_edges)
+        print("Graph contain isolated nodes ", self.data.contains_isolated_nodes())
+        # self.plot_graph()
+
+    def len(self):
+        return self.meta.shape[0]
+
+    def get(self):
+
+        return [self.data]
+
+    def create_graph(self, config_graph):
+
+        v1 = []
+        v2 = []
+
+        nodes = self.meta.shape[0]
+        weights = []
+
+        for i in range(nodes):
+            for ii in range(nodes):
+                if i != ii:
+                    condition_Age = np.abs(self.meta['Age'].values[i] - self.meta['Age'].values[ii]) < \
+                                    config_graph['beta_Age'] if config_graph['Age'] else True
+                    condition_Sex = self.meta['Sex'].values[i] == self.meta['Sex'].values[ii] \
+                        if config_graph['Sex'] else True
+                    condition_mRS = (self.meta['pre-mRS'].values[i] - self.meta['pre-mRS'].values[ii]) < \
+                                    config_graph['beta_mRS'] if config_graph['pre-mRS'] else True
+                    condition_NIHSS = (self.meta['NIHSS'].values[i] - self.meta['NIHSS'].values[ii]) < \
+                                      config_graph['beta_NIHSS'] if config_graph['NIHSS'] else True
+
+                    if condition_Age and condition_Sex and condition_mRS and condition_NIHSS:
+                        v1.append(i)
+                        v2.append(ii)
+                        dist = correlation(self.features[i, :], self.features[ii, :])
+                        if np.isnan(dist):
+                            dist = 1.0
+                        weights.extend([dist])
+        for i in range(nodes):
+            if i not in v1:
+                #print('Node {0} not connected'.format(i))
+                #print('Age {0}, Sex {1}, pre-mRS {2} and NIHSS {3} '.format(self.meta['Age'].values[i],
+                #                                                            self.meta['Sex'].values[i],
+                #                                                            self.meta['pre-mRS'].values[i],
+                #                                                            self.meta['NIHSS'].values[i]))
+                #
+                corr = [correlation(self.meta.iloc[i, :], self.meta.iloc[ii, :]) for ii in range(nodes)]
+                sorted_corr = np.argsort(corr)
+                for j in range(1):
+                    v1.append(i)
+                    v2.append(sorted_corr[j+1])
+                    weights.extend([corr[sorted_corr[j+1]]])
+                    v2.append(i)
+                    v1.append(sorted_corr[j+1])
+                    weights.extend([corr[sorted_corr[j+1]]])
+
+        edge_index = torch.tensor([v1, v2], dtype=torch.long)
+        weight_vector = torch.tensor(weights, dtype=torch.float)
+
+        return edge_index, weight_vector
diff --git a/IO_utils/FeaturePreprocessing.py b/IO_utils/FeaturePreprocessing.py
new file mode 100644
index 0000000..d31fbbd
--- /dev/null
+++ b/IO_utils/FeaturePreprocessing.py
@@ -0,0 +1,109 @@
+import copy
+import numpy as np
+
+class FeaturePreprocessing:
+    """
+    Normalize numerical values and transform to one-hot vector encoding categorical values
+    :param df: Input data frame
+    :param data_dictionaries: dictionaries with information about the features
+    :param exclude: columns that have to be excluded from the feature vector
+    :return:
+    """
+
+    def __init__(self, df, data_dictionaries, exclude=[]):
+
+        exclude_vector = copy.copy(exclude)
+        exclude_vector.extend(['Id'])
+        feature_vector = []
+
+        self.features = [i for i in df.columns if i not in exclude_vector]
+        self.min = [df[column].min() for column in df.columns if column != 'Id']
+        self.max = [df[column].max() for column in df.columns if column != 'Id']
+        self.data_dictionaries = data_dictionaries
+        # print('### Reading table(s)')
+        # print('### {} Features: {}'.format(len(features), features))
+
+    def create_features(self, df):
+        # Iterate over columns of dataframe and process them according to its data
+        feature_vector = []
+        self.feature_vector_names = []
+        for count, column in enumerate(self.features):
+
+            d = self.data_dictionaries[column]
+            if d['type'] in ['cat', 'ord']:
+
+                ordered = True if d['type'] == 'ord' else False
+                output = cat_to_one_hot(df[column], d, ordered=ordered)
+                for k in list(d['description'].values()):
+                    self.feature_vector_names.extend([column + '#' + k])
+            elif d['type'] in ['int', 'float']:
+
+                output = df[column] * 0 if self.max[count] == self.min[count] else \
+                    (df[column] - self.min[count]) / (self.max[count] - self.min[count])
+                output = np.reshape(output.to_numpy(), (output.shape[0], 1))
+
+                self.feature_vector_names.extend([column])
+
+            else:
+                raise ValueError('Column {} not included in the features'.format(column))
+
+            # print(feature_vector_names)
+            feature_vector.extend([output])
+
+            final_feature_vector = np.concatenate(feature_vector, axis=1)
+
+            # Saved all the features to an excel table
+            # features_df = pd.DataFrame(data=final_feature_vector, index = df['Id'], columns=feature_vector_names)
+            # new_dir = '../out/test/Features.xlsx'
+            # features_df.to_excel(new_dir, columns=feature_vector_names, index=True)
+        return final_feature_vector
+
+    def get_feature_names(self):
+        return self.feature_vector_names
+
+def cat_to_one_hot(features, dictionary, ordered=False):
+    """
+    Method that transform categorical variables into one-hot vector
+
+    :param features: Feature vector size n x 1
+    :param dictionary: Dictionary with fields 'info', 'categories', and 'description'
+    :return: one-hot feature vector of size nxc where m are the number of classes
+    """
+    feature_vector = copy.copy(features)
+    # Number of patients
+    patients = feature_vector.shape[0]
+    # Number of Categories
+    categories = int(dictionary['categories'])
+    assert isinstance(categories, int), 'Categories in data dictionary should be integer'
+    # Description of the categories
+    description = dictionary['description']
+
+    # Create the feature vector size patients x categories
+    one_hot_vector = np.zeros((patients, categories))
+
+    # Normal case where the feature categories are given
+    if description != 'None':
+
+        # Check that the number of categories matches
+        assert categories == len(
+            description.keys()), '{}: categories and its description does not match in data dictionary'
+
+        # If the keys given in the description are not [0, 1, ..., n] replace the values in the feature vector
+        expected_keys = list(range(categories))
+        k = list(map(int, list(description.keys())))
+        if expected_keys != k:
+            for i in range(categories):
+                feature_vector = feature_vector.replace(k[i], expected_keys[i])
+
+    # Get one hot vector for each patient
+    for count, i in enumerate(feature_vector.index):
+        try:
+            if ordered:
+                one_hot_vector[count, :int(feature_vector[i])] = 1
+            else:
+                one_hot_vector[count, int(feature_vector[i])] = 1
+
+        except ValueError:
+            print('{} cannot be converted to int'.format(feature_vector[i]))
+
+    return one_hot_vector
\ No newline at end of file
diff --git a/IO_utils/List_Reader.py b/IO_utils/List_Reader.py
new file mode 100644
index 0000000..f77e6cc
--- /dev/null
+++ b/IO_utils/List_Reader.py
@@ -0,0 +1,125 @@
+import copy
+import yaml
+import os
+
+from IO_utils.List_reader_utils import cross_check_dictionary, remove_features, treat_missing_values, \
+    get_all_dict_types, save_df
+from IO_utils.mv_strategies import select_features_pvalue, select_features_RF, select_features_RFE, \
+    combined_selection, select_features_MRMR
+from IO_utils.statistics_utils import get_pvalue, compute_basic_statistics
+
+
+class TableReader(object):
+
+    def __init__(self, input_df, tables, data_dictionaries='timepoints', mv_strategy='median',
+                 output_feature=None):
+
+
+        ROOT_DIR = "C:/Users/martinca1/PhD/Projects/AI_Stroke/AIS_Regress"
+        dir_data_dicts = "dictionaries/dictionary_modalities.yml" if data_dictionaries == 'modalities' else \
+            "dictionaries/dictionary_timepoints.yml"
+
+        dir_data_dicts = os.path.join(ROOT_DIR, dir_data_dicts)
+        self.data_dicts = yaml.load(open(dir_data_dicts), Loader=yaml.Loader)
+        self.output_feature = copy.copy(output_feature)
+
+        # Check that the output feature is one of the possible options
+        self.output_features = ['mRS90d', 'shift_mRS', 'dmRS', 'mortality']
+        assert self.output_feature[0] in self.output_features, 'Output feature should be one of: ' \
+                                                               'mRS90d, shift_mRS, dmRS, mortality and is' \
+                                                               ' {} '.format(self.output_feature[0])
+
+        if tables[0] == 'all_timepoints':
+            self.tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+
+        else:
+            self.tables = tables
+        assert len(tables) == len(set(tables)), 'Tables list contains repeated element'
+
+        # --------------------------------- 1- Select the tables ----------------------------------------------
+        # Check that all elements from the selected tables are in the data frame and retrieve the corresponding
+        # data dictionaries and the indices from all the tables including the output indices
+        # All the possible outputs are included
+        self.selected_d, all_keys = cross_check_dictionary(input_df, self.data_dicts, self.tables,
+                                                           self.output_features)
+
+        # Select columns given by tables and and reorder dataframe to match data dictionaries order
+        self.reordered_df = input_df.reindex(columns=all_keys)
+
+        # -----------------------  2- Remove features with more than 10% missing values-----------------------
+        clean_df, missing_values, clean_keys = remove_features(self.reordered_df, p=0.1, exclude=self.output_features)
+
+        # Remove data dictionaries of removed features
+        keys = [i for c, i in enumerate(all_keys) if not clean_keys[c]]
+        for k in keys:
+            self.selected_d.pop(k)
+
+        # ----------------------------- 3  Handle missing values ------------------------------------------------
+        # Apply the missing value strategy
+        self.final_df = treat_missing_values(clean_df, method=mv_strategy)
+        self.meta_df = input_df.loc[self.final_df.index, ['Age', 'Sex', 'pre-mRS', 'NIHSS']]
+
+        self.patient_ids = input_df.loc[self.final_df.index, ['Id']]
+        #  ---------------------------- 5 Set output  --------------------------------------
+        # This is used for fold split
+        self.output_vector = self.final_df[self.output_feature[0]].to_numpy(dtype=int).squeeze()
+
+
+    def select_features(self, method='all', k=10, fold_indices=None):
+
+        # ----------------------------- Remove output features ------------------------------------------------
+        features_df = copy.copy(self.final_df)
+        features_df = features_df.drop(columns=self.output_features)
+        output_vector = copy.copy(self.final_df[self.output_feature[0]])
+
+        if method == 'all':
+            selected_features = features_df.columns.tolist()
+
+        elif method == 'p_value':
+            selected_features = select_features_pvalue(features_df, self.selected_d, output_vector, fold_indices,
+                                                       k=k)
+        elif method == 'random_forest':
+            selected_features = select_features_RF(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'rfe':
+            selected_features = select_features_RFE(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'combined':
+            selected_features = combined_selection(features_df, self.selected_d, output_vector, fold_indices, k=k)
+
+        elif method == 'mrmr':
+            selected_features = select_features_MRMR(features_df, self.selected_d, output_vector, fold_indices, k=k)
+        else:
+            raise ValueError("Not implemented")
+
+        return selected_features
+
+    def get_statistics(self, output_dir='../out/data_exploration/statistics', output_name='all_statistics.xlsx'):
+
+        # Reorder dataframe according to data dictionary
+        reordered_df = self.reordered_df.drop(columns=['Id'])
+        types = []
+        all_df, all_missing_values, clean_keys = remove_features(reordered_df, p=1, exclude=[])
+
+        tables = ['Admission', 'Pre-EVT', 'Post-EVT', 'After24h']
+        for t in tables:
+            types.extend(get_all_dict_types(self.data_dicts[t], types=[]))
+
+        # Compute statistics of the selected tables
+        statistics = compute_basic_statistics(all_df)
+        p_dmRS, methods = get_pvalue(all_df, all_df['dmRS'], types)
+        p_mortality, _ = get_pvalue(all_df, all_df['mortality'], types)
+        # p_shiftmRS, _ = get_pvalue(all_df, input_df['shift_mRS'], types)
+
+        statistics['p_dmRS'] = p_dmRS
+        statistics['p_mortality'] = p_mortality
+        # statistics['shift_mRS'] = p_shiftmRS
+        statistics['method'] = methods
+
+        statistics['missing_values'] = all_missing_values
+        statistics['Percentage (%)'] = (statistics['missing_values'] * 100. / all_df.shape[0]).to_list()
+        statistics['types'] = types
+
+        save_df(statistics, output_dir=output_dir, name=output_name, overwrite=True)
+
+        return statistics
diff --git a/IO_utils/List_reader_utils.py b/IO_utils/List_reader_utils.py
new file mode 100644
index 0000000..57599c4
--- /dev/null
+++ b/IO_utils/List_reader_utils.py
@@ -0,0 +1,108 @@
+import numpy as np
+import os
+import pandas as pd
+import sklearn
+from sklearn.experimental import enable_iterative_imputer
+from sklearn.impute import KNNImputer, IterativeImputer
+def cross_check_dictionary(input_df, data_dictionary, tables, output):
+    d = {'Id': {}}
+    all_indices = ['Id']
+    for t in tables:
+        indices = list(data_dictionary[t].keys())
+        for i in indices:
+            d[i] = data_dictionary[t][i]
+        all_indices.extend(indices)
+
+    # Check that all the features from the data dictionary are in the table
+    columns_df = input_df.columns
+    for index in all_indices:
+        if index not in columns_df and index != 'Id':
+            raise ValueError('Feature {} from data dictionary is not in data frame '.format(index))
+
+    for o in output:
+        if o not in all_indices:
+            all_indices.extend([o])
+
+    return d, all_indices
+
+def treat_missing_values(df, method):
+    if method == 'drop':
+
+        final_df = df.dropna()
+    elif method == 'median':
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        final_df = final_df.fillna(df.median())
+
+    elif method == 'knn':
+        imputer =KNNImputer(n_neighbors=5, weights="uniform")
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        imputer.fit(final_df)
+        final_df[final_df.columns] = imputer.transform(final_df.to_numpy())
+
+    elif method == 'mice':
+        imputer = IterativeImputer(random_state=0,
+                                                  estimator=sklearn.tree.DecisionTreeRegressor(max_features='sqrt',
+                                                                                               random_state=0))
+        indices = df.loc[df['mRS90d'].isnull()].index
+        final_df = df.drop(indices)
+        imputer.fit(final_df)
+        final_df[final_df.columns] = imputer.transform(final_df.to_numpy())
+    else:
+        raise ValueError("{} mv strategy not  implemented ")
+
+    return final_df
+
+def remove_features(df, p, exclude):
+    """
+    Remove columns from a dataframe with more than p*100% missing values
+    :param df: Pandas dataframe
+    :param p: Percentage of missing values
+    :param exclude: Columns to exclude
+    :return: Clean dataframe, number of missing values of the original df and the indices of the selected columns
+    """
+
+    # print('### Removing features with more than {}% of missing values'.format(p * 100))
+    patients = df.shape[0]
+
+    # Get indices of features with more than 10% of missing values
+    missing_values = np.array(df.isnull().sum(axis=0))
+    selected_features_idx = (missing_values <= (p * patients))
+    # Get indices of features to exclude
+    idx_exclude = [df.columns.get_loc(i) for i in exclude]
+    selected_features_idx[idx_exclude] = True
+    # Select the features from the original df
+    final_df = df[df.columns[selected_features_idx]]
+
+    # Print information about the removed features
+    removed_columns = df.columns[[not i for i in selected_features_idx]]
+    removed_number = missing_values[[not i for i in selected_features_idx]]
+    for i in range(removed_columns.shape[0]):
+        print("### {} - Feature {} removed with {} missing  ({:.2f} %)".format(i, removed_columns[i],
+                                                                               removed_number[i],
+                                                                               (removed_number[i] / patients) * 100))
+    return final_df, missing_values, selected_features_idx
+
+
+def get_all_dict_types(nested_dictionary, types=[]):
+    for key, value in nested_dictionary.items():
+
+        if type(value) is dict:
+            get_all_dict_types(value, types=types)
+        if key == 'type':
+            types.extend([value])
+        else:
+            continue
+
+    return types
+
+def save_df(df, output_dir, name='output.xlsx', overwrite=False, file_format='Excel'):
+    file_path = os.path.join(output_dir, name)
+    if not overwrite:
+        assert os.path.isfile(file_path), 'File already exists'
+
+    if file_format == 'Excel':
+        with pd.ExcelWriter(file_path) as writer:
+            df.to_excel(writer, sheet_name='Sheet1')
+
diff --git a/IO_utils/__pycache__/Dataloader.cpython-37.pyc b/IO_utils/__pycache__/Dataloader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e09a70de9bc92e145ca06daa4644ac277fbf28f
GIT binary patch
literal 1850
zcmZ?b<>g{vU|@Kf9h4lx#lY|w#DQUE1_lNP1_p*=D+UIJ6owSW9EM!RC`LvIn<<wu
ziW$si%3;Z6jbhDZi(+GBaA!ziPGM<bNMUJaiegV;4Q9|}dkHd8lkpapOJYf)Pkv%b
zYLO<>El%G`5WhIJ<QA`cQDQ+xJd~RZQVzqcFh{5{FfgPtL@}l?L@}jsf-PZ;VoqUA
zVQXQCVo707;b>uqVohNRX3*ri#RD-LVwam=GT4(03=9Gc3=HfH3=Ga7Z>cddFw`)|
zFxPU`a@X?IFxD`{Gt@BEFvK&~u-7oeGu3d`FvK(0u+=ccv(&KGFvPRgaHKF~a~4_E
zaO4S;u%$58FlMo*FiA3qFf@a7bCht_FxD{DaMiHXaHcT#vexpJaMkeEFl2G0u%s}h
zu=cXlFvN31RdK^r@s)7Z@PSn^rLgrf*D%EMK-KWT)$o^a)$pUJ;jLk;;YwjhVQ6Ja
zW0C~9oJ*2n0pCIfCWab@c>WrOc!3(m8XgoCJV+`8YZ&5%Y8Y#{QB-gvsSvJVh!;s=
z2xib^_e-BO69it`7cnw0FlaK}5===fNsLd)OfJdH&r2-IOf9~}o}ZT*pOIe@qRDcL
z9V88kTdt(UlH`o|;>@blTkORdrD<t7skgZEN^|4O^NX@mi;6Xwi#Qk<7>YPS1UJZV
z(OVp8sfi_}MXANN*z!wD3Q9|EvE-(fBo=|<?iP1JVo7FdUP*jrO7Si3oczR;_{`kI
z^wi>8%$a#5w^)la5(`ox#(|P>5fcLg!!4eYqQuO+c#!JUqFY>Li8(MfcS&k-35>&D
zTv(c#T9q2b2T>1mM-)^)#5Ga8U_Ee2h!OdvCHbW#Q9K~C;Zk6mATpW)w^)i(bJA|H
zLuHG2!7fY7&q;~T%uC5kPA$I0Rh*iWnp~2a5}#58O07lwpkUzx5#Rs<hX60caWFmH
zARA#E9<bA3Jg7=&WJG~g!Z;9>P~J+0B6$V|hF?C;RxzQ)sYS&xxrs$3nR&^HhA{yd
zE-?W``B|ySCB-q0p7FsYMfusOF^-<W@j<ESMXANbF`oYMr6rj;#WA2f3<+etg34PQ
z@$s2?nI-Y@pcpPT17&I^E+#fcIAl^_;$Y-rVqs)qWMbrkvyoX!j7p4EqR1Iq4`ymI
zC@q2v1<|0m2Ip%~u+=bRG1M?*G1f3-F{LpFGiWmUX)+fHgMv{6M2LdY8Do(+hz;^L
z*z-jSp!ncUPlY9iVtr6K0ZKm%j8zgSPC!zW3|7Fvz`(-5z`zD8T?`l)7)s!7sA0%r
z29+>bEX}O7OeL%}Obgf+f@IlK7*m*fnQNJAK(1u=gJds^EWuF(GQ0?!4tPM01|=Ox
zb`VI17oL!Szr~!MS^`f0nk--+-r`Bl&r8cpj|ZzPk^|`n2L-y%lS(slQXr~~Kt2cA
z#K6SHD8$6U$im3Mgcb%!$~9Tt{QUgfG}%B=o|l-L8Xtd)D?UCqKczGW#O8^QFDy;W
zfyfjof?S{kB0!}&B<xY$QUogZL19@8N;w=%42)cA9L!)*KTVD(fs*{9<P1G<lG6jF
zU~ndn5(5jzr>ExUrj`_CChI{0q)3^8fgwr|mPYkJX|y=Cq__x_WFP?xvJRU&ZgJQ^
T{9p%4V#OdA@i6i*3NQlzN%+(j

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/Datasets.cpython-37.pyc b/IO_utils/__pycache__/Datasets.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00eb6a5eee63c0a4b4d6db939d63add7517a2947
GIT binary patch
literal 4138
zcmZ?b<>g{vU|`tIJufj(l!4(fhy%mS3=9ko3=9m#9t;c&DGVu$ISf${nlXwgg&~D0
zhdGxeiiHs*#+<{N%NE7P$l%V9!ji(;!jQt+%oN4$&XB^E!rsD=!rsgj#gW1o%%I8f
z5@f!gCgUv@m&B4pO~zZ?$@xV^sX2)ynfZCiAW0a8J3x<tfgzP4iZO*DiYbLVg*k<#
zg)xdbg*An(g&~S1g*}C%g&~SHg)@b#g&~S9g(;XpljjzvZzafr;?xp1zhn-mnIM{l
zfq}sp<gYXa28J5O8s-#+6oyu&G$u)g8isg=8m1a%FpIH<J%u5gv&f)^y@ol35hS0-
zlmb%2RKrrk5YJr0TEh^}Qo~lm5YJk}Uc(U2mI;z&uVKhyElR3kSirH60TepH44O=S
zMT`s#44Ry`IMPxROG=AUi*K>zBqpWi6yM@6NG!=r%`1t|%uC5kPA$GAn37nM7@v}v
z3<{CNqRiCdTkQFHsqq>4CAZiza}(23i=%j;nu_ByQ;Kh~m*wY_=B5_k;?BuWOo@l6
z(d4+rQk<HTR>aD{z)-}-z`$^e9co1pC>)A7LE@nJzr_tT4P@9#h9Ut528Lf=&Q>v@
z#i>QbF}aCFC7F52iH0!&87?saMfq8&$tA@xj-K(sB}Mt!sWFb8!SO+<=|!o<#W9}#
z@uel1ImI#1z%SM-sJz7yAD@|*SrQ))vbR_S6dBAyj9iRTjBF6d#Ky?RR3!wDGCi1v
zWKghy)iW?KfII?@IVT1Nh6<Y!h7!gahAgIL#%4wrhFGmyMo{=?G1oBKFjPpTfW(Aq
z!7&CBlVqr2vSBDx31=u22xmxP0LO|Z(=8T1XXlVx%+4VJnk=_iixTtFQ*SZnq~;Z|
zgMx<}6l=^yyr9ry%#7m5%}+_p$t<Y^hY^Gj1gYa;U|^5{B`S~)8JI+vxEQ&ZszebU
zLW&r;pFn<Nf~7`~2{jB^3|WjRjFJ%7LDCEpI0A367H1?Dq-rvP-2$;06i-DU>u<5g
z$LFNx#m6gwoB%SCfw4*uVL4Pb87#xVzyNY2C_#V&a|I|HFk~^*Fd{NSEmH~O0;U?K
z1<VT>7BVhiS;$b!3{KWjC2R}WOV}20)G%amrZ9>#EM#2BSj&>a2+9&!oHdM+3^oj4
zy(|kEYgzN8K<pB>1zaHYAX$)luz9RC%r&ejOmmoO*=iWFxQmul$fYp5FvN=0f-(>f
zNNo+fBts2b4Z96PVOKaq4O=jSCX3%oP^f7#-D0e`#aOAyR3r=veo&Iq<b?)QNq$js
z#x1V2qWs+Wywco)%3JKkg{7&fRjIdF({l0?OTa1r7E@lqE!L{kqWt1p%(;mbkQ~CA
zSX7i)Sp>=zMc}ySfP}X=$QDrgE0P8!1y*q0Es_DTSwPwK7Go}YoN&j-r>B-=mZav!
z$E$+kMGBOTnS>aH7}*%b82K1^n4}n)7&(~P7`Yh5SdbG7l1@!#H#a{`_FEkB@p*~4
zsqyi*xZ>k;^HWN5Ky04)_`=e}9EePj5Xh0BbW|h>VrhT~a2i1npwv<XDgi;!Ukvgl
z2NMG$7e5CV2Qye4r34107jPAzgIoeLrZBZIL@|TQVNm%CDTi61<uF?cXE1{%*DYT6
zqQruXcxW+@46+a8e~^`+oCQvL-#|%^5nAdp#52|~*D%C0l`v<q)PP9V6vkdq@?b5h
z10}l}<}8jBCP{`8PH=)^UdY(Yn8vh_v6j6;t%MsU3oZ*pYB^GvYuIZzYM5&{BpK2`
zIX@(v0c<jR33C?2WX?PRun03kq=sPuPYugL#u%m=hIrl@hIqab{u;I#&Kia+-WrB1
zz8dBfmR?Y~FHj<w!dk<SC6Fc5%*4o0BA5b7#sXQwP#!1|3uK8vd7$hfut0Pn1BeeU
zdpJQ=AZtoVWkD)9n-_snhbAjHc|p<@S87UnDyULOt+>Tro|>7SQBn*}q*VfbrMXF|
zMGE<83VHb{sl^Ia!Uz#q{i1-Z3}jTXLX|8yP85>!^GXsk^As|R^K%kQQd6L&Xo}rp
z$xSUu1ZN|#O+~Vx1kP5Hnpd1(RHOh(Pl_M{RLm5Cin}5)u!7RO%)-*tB2f^BC%Gs!
zu_QG<9TfdVdLZTcAOe(@iqt?VSPP0W^Ga@U=9T8ggWZ2i5XOv$xgH`5cJ?iCsC$d!
zVGf39)8r}wmCHp2AmhP#6O_nq@g(QxrDdi=9l!~a1m)Q%UT~f*j?d3a&B!k)0_SK@
z21d)b;9O)2%C8{*GC=Dm0VWnkIYtpi5k?^<0VW|P2}V9JtP%qUDJ+>|)?1*`3gmTg
zdI#lA9A!M%6QCf)Uc!R|Iuzt8kYS+G9o62P)I5}G1!OJQN>EV`vN9di_+s?aWCX_n
zc1vzCr>B-gz%5~_5=6BmJ+%a0g@6hYklCO{T5%Q=14BAPEmJx}EprDWtPZPXNoS~K
zoxoU;R>D-n0?L;Y7-J(k8744R1R#kx)v|Rk)UahU7n#*Cg7SA!RSEL~7LYk0UKVQy
zgCqljSHmXBAi=PZv4$~)S&|`}A&)7ArIx*hF@;r<p=cRc9jhb*k}fb0q$`^Vtd^sO
zF@;T%q39M^EgOnjFb||w0#qHc!_DWcVNBtWWSGEMBmmaMfuak{gX`ji>*A_mFEXp)
zC^D<zEHbO%Dl)5KTEN!9u#l0Fp@w+@TMZkCRl?rEn8ncnYDGg^p&)(@8=^99W~${b
zVP3#l!@ZDk0%K8L3Rf+64eJ7~8t!yPkO_rm;ox>n#V%N=#;)sR=wPT}%4VOySk%+b
z(9W0!YB=+>aKKbFGuHByFfZV);aSL7%Ue-Z!j!@bilNxNT0S&a^49RB@QO1?Lfj3q
zm!}3UCdmMC1IRy6pD`_9L&$^N2X-qKJrfuUkA*W#U@YtjXDH#x;sv$5)0s+mviMT?
zdYNkZ!D6f>JX!o;zCaCsjX)ZxanBEFP2Xa6Oi#VVk(64J81I;#8gh#{IJKgRy`U&n
zH#aC4Dgk1#`gwW;2jAiXs|2&tK~)h0!v}kKrU7LuP<jS6CyLD&K<T-LAy%)Ju>_R%
z7_*qrVu&$|3Aw@4%rudykR_NwlhIF;xd@agz{S!nmdu>|WKE`9OnL^lm@+dpnIPp3
zxZDEQ%AkDoi_0b_v$!NVKexcH9+v&s7^?J8vmbK7s%MjvpPZOeY^R6N3aLUEi*!M4
zRspd6kU|<%E*B|+ieBc#q~cp_Wr;bZpbki4K|yL>3dFUb^79sJW^rC(-YvG&ijvg4
zlw0hHMd`))MJ3>BpvVwp0837OUOKoY&=iD}zLp?irZU4@Ol3y5Si$9a5xBqy6+cnD
z$@zIHnV^mgD2kA|!KoF<+}xnxD1L-ua4g+oNy#iOxy1tNAVzV6T#}j+58~Wn%mUXe
zQM{0PF}^G{xg@^`T#};%F(`D4j2RdhVnD$PYRWJ$u`seQ@i4N08)6(xR!llfCQN2b
z0*qXYT#RarJd7-iB8+^DT<j8zTp*fJh!MmRVpL%gVH9AhlENJ+ewyq>nxOhd3q)vx
z2yl}OR7n<Dfw&;2fL)11fZYgcU|ECQ2ugz-OdJv%%p5F5AW=<DaPZ$^1vh0O(I^6L
z;>M?^=I5rC6lEssftsL2pd@@ttT;Kdpi-|G)T2wx(E|lwVqS78B$8P`ZU7mIWIa3;
a-r}%<v@7jEl?SA4$jHIS!^p$LX9@rkZLhQd

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc b/IO_utils/__pycache__/FeaturePreprocessing.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89e3f62800b5a6305c10bb2c37cf7c1b8079f7b8
GIT binary patch
literal 3546
zcmZ?b<>g{vU|?wF^-g>$$iVOz#DQUE1_lNP1_p*=9R>!56owSW9EK<e%^1bx&XB^C
z!ra1;!raUh#hk*P!kWU?!WhMp!Whh;$?+0ohF>y>gkcoZQW>HcQy8L{QW&F{(;3nj
z!DfMUv!t-LFhsGYu%)oKFhsGXa0D}Ga^4bgOHC{(ElLe2N-ZeLPfjf^&df_s)MR8}
z;8IXfQ1Hty%1zA4tV&hLE6q(U%1lnoQ7B8yDNQX_NX$!7C@D(JD^ANV%2g=ISIEyx
z)y>E+Q7B7IF3B%aNX<*mPsz+nS4d7QNlnj3)(bY;svxl_F;^ia%}T*Dub{L<AtkXS
zQ6a4;F*g-OKCvV*J|#1`Br`uRu_!aO*h&GJr%;|*lA(~92XaIrNR2{bQhsTPLP<ud
zLK?(t#mL5`RwU<?rleXaB<JUp=H?YElw>59C}bp-rGmVWl&Sz#kfM-Ql%I=i4Afr`
z!-`T%N{jNWz?@_jXhg6wFff4P0)&gx7#J8z7#1+pFfU~6Wb9yEz?j0A&bW|~ks+O-
zmZhB$8Z#Un3|UMr49$!+4DrnEp!i}*VQxVdWdVz_qKUHBFx4=`v!yTuGib8;-D2`g
zNoIsPgNcEG0puiMPy&-+U|>jRs9}f|sAa5S>|n@dFJh@-oXAwj63noYp$Mc-lj#<d
zp201~OijjHd{FPlLn09@|0~SdDkiizwWv5IH?gQBGcP&OFeV_wB_^OKKPxr4q&UXW
zGd{SaC_g(j#?do4J}5Q4D7Cma#?wE(v?Md9I0jo1(<`X_#buL|SzMBwpIcxDb}Pu?
zYz$R~Sk=eJXXa&=#K-H|<m4wO<`moMA#`K+x&qAW5+JXqFk~|qv2-xhFiJ8kU|7fi
z2?9;VB2a8;GTvg&&CJteDgwpfEjCEv&}6*Dl#&JxgCY(F28JR|1_lNZP^dy&N*!wx
zE1=dEaWgP5XfhV@fUE*V48l@Tgr%D7w^)+%3o381rB;-r=B3<Xhd8$Q76&Z77l8_%
zB3=dthFgN5OofttG+A%46sP8-L97u1DPo64`Yk@F3COXp$plf!4ssPJ6mD_Aq5@<?
zF&`+^F>x{SFtRX-F@j)~EZzuA1_d5S6Udcp3=9m;AkWz`g3@U%6DY05GfZHt&?#Z8
zVaQ@?W^`egz!)nN!&J*$%TmLT#azQ8$xzE$!<xb<$&kX7%~mu4%4W`HD)LBS$zw`k
ztz|1=sbK`EtYNKTOJVC}u4S*-Qv%Y(F3GTfZ6O25+*qkvjv9t6h6U_3EGZn~3^g3$
z43I2ZSP;&T*8??|Gn;J!V-a5sLl#F3vm`?eLl!59&Somw2eBiCON1d$1;U2ON`Ta%
zsFz@nW~gN^;ab35!@huLAwvy&7H<ldBtr^!8q-3iT6UOAATG$`E8$DwS-@YzvXHTv
zv5B#UX#sl;J4CL8D@&k;DTO<Qx0k7wvrr<Op@uV<L6gt#7E4KGL8>OxE#~CJl3UFA
zMJYv+3=9k*Zkmj@*z=20Qj1bkZgHoi7AF^F7Jv%NUyRBi)tPxEw^-A1@)JvnK!wOl
z5K*P(3@#xRs%sVU@=Fvl^S}jXib7@{a;aFX$#{z;u_CirQ}C7uYVwKCOUz9zzQvge
zt~wG+Qi}vZX$YL;_>vP#;!E=5^Yc>UGxAGrv4Bc~TWpZ3rHCJ-4wSmUDft#tUcoK)
zqSWGy#DdgY93}bjd8N4pmA6<S0$gFB`pC7YD8J|y8>AYz#hsj=2dZ}R5=&Awxr#ug
z3nUeTGSV&9<owdS5^#3A#h6kg1<C_#`K2WVr6so*v!g`PGV>C1;!!;VDFxuU@)mz`
zQEFmIDoj-|$dF<cP_kxXVPs>J0ObfK5k?_KF-93iJ|+c5HAX2W0cJjC9ws5SDiy3b
z1kFfz@dAnt5C&%yP>um5m0$)<Mn6r)B1K4{0rn!;d0+yZQ3caeOJEL!M20OWtAX<$
zW0e|KH(+S-(_{zbcu-u%$KT?LkI&6dDa}cZkH5teA75CSm;;qzkB?8uPmYf-5(7n^
zA&4*n5n$&a2#~{yKrR3Y6@z#j>?}+?yd274QMY7JaSRGZP$>v1Z^526VT4wu3=0`+
znQNG{7(uzemZgNLhB1XnlA)QgmbHYrhP8yLnW@;Qge8R;oM#wQKxGA63F`v38kQQ?
zG^T}&wd^%)DXiJdMLTL(OW14J7I1*df^4SZj1ra<Hju6o&JwN~)@H_L##)XN&Jykt
zrV>t=x@M+k##+uAjv7vo2^A_3l`agi617}4%nNvGIBU2h8EQB{<U*!e=E6DQ3>9T1
zyfw^OeBcr%BZjG#yOyictcI!RO$vJrdkuFAM+s96Ge}1>V=Ge{lMO?j2t)=U4iXJ#
zC_KW#P{N<hHG#26i;)2YO9WCl7YIUhgUSg;hDe6OBM}URN5UCu*n=4~xe^s&O*!Ax
zl8pQmNP7p-ZUf~KMB}U|GchSAwOAoD52HQD1#as=8*R|MQEa6EO+?VfkwP)Jsi;t)
zV2G?7-e9Y=QgDGa<ti1xO}Dhn)SQ%J1$9spPF+Vq9p<3aVs#yG8&e&ryi$jl3T=JC
zTbxkmz#NgE26ab8vO;-AYEh~}u0mo_D!ACnE6q(xEmFu&Q%KH9EG|xkG!DVV-YqVK
z-BtR|&{iurG$3tLgf|q5GxAGwQWTO>K|z(8o?28S4Jt)g{POcst6ZvUt>CR*XiGV>
zq*wvTBMK?`sm0)OH8-&&8PtG9x1va%fq}uVO18RIAvrM*R01cZDkSIUm8BMyq^5ux
ztC@KvnnFdO`nO0El+$EEF5t>6&de(=Nz6-5y~XEPT%1}|l9`_eE**>1Knhr^Qj7A7
zZ!zbj=G|h+POU610+m{~Sc?+#(o=6S=O%*7+Jc<K<kTW1u!hXMl+=nMEik*FC^N65
zNE=k<@PLc+TU>}x0hgpj;3AfzAh9GfHLs+&$Q<M}P~b%IK?{3m>mrIbwW1(3xg<3u
z9_$PQkU~)DR-_AJfyxI+p$jS`ij<LiObi@cOk7Mnj6zHt%zVro%q)x|%sh-@j2w)7
zj1tTuj7*Fyj8aSzOahEDOkkawjBc8&;Gnw23a&iBVF502!154_P_4YhVFM}ZKur^d
NVvx%?7&(~uYycu(qTT=i

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/List_Reader.cpython-37.pyc b/IO_utils/__pycache__/List_Reader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..163df555b090f376793b1f44a6ed072895a93f63
GIT binary patch
literal 3883
zcmZ?b<>g{vU|`rT*_qfW!NBks#DQTJ1_lNP1_p-W5(Wl_6owSW9EK<e%^1Z9<}*bx
zr7)y0<uK>6M6u+uMzQ9yMX}|wN3rK}L~$^J)H3IA=5j@G<#I=H=ki4H<nl)G=JG}H
zf#q0o_;UrK1abwV1Q{9J8B$nN*jgA;*iwa>nWKbL7=sx!*<XU(;-|@aODwr4zqmL)
zIU_YWJ3b{dxg;|`FR`fd7JpG{Zhl#6d|GN^Noi4P@hy>(qSVBa_}t9m;>^7C__D;D
z($wNxLg}d`@rgM(V6E{bl?AEAx7dpl%TnW0(m<vcr{<(4m%vSlF97SiC4?>><aSFK
zQ_S_2P;!25Qf6LiN<2h1Ge7T^2!x3+)HldCNR#On7sxG83nY^Ba|=pKQsa{ni!+ns
zi%SwqGK)(xlZ%r<9*1FOSVU+sFfgPtfC4{?DTObEC55$xF^V~bKZPTOvxPB=B}E{G
zCxy3#F^V;XIha9H@D_JSVp2|OP-<dIYSAsOlFZ!Hg8a<9lHyxzxv43ciFtm>Y*0&h
z85kHqY>-{W9gGYNDU7vjHOwhY*-S+ODa?6HDJ-?@B@9`N3z%xyYS<St)v}i`XR(y9
z*047-mM~|rrLgug)iA`fmvAiLs9{~mSi=y{nZlOB4su&MOASLjR}DiJX9|}jLk&X~
zS2kO5SqXOv_W~Z6=t9P3#wNxZCXk90o@}O~Nh!Q3d@1}X0$|;|d2BTx^Lb17YM7cC
zOZZCoL3A@?HdFDI67Cd1knR$J8paxiEcP0PEMAZ;%`7n>J%Y6yHH=w8H5@5Iy&!i9
zmx$CbWC^DTgVdym^fJdV*K*c!buzRwq%o$5riit0l!(@FG&8y|G&9z6S4h=xyD-EG
z)q>nAxInCiXCWga!vw}cfpCTr@fyw=mK1TY-z91ovm|R6vLv#kQY2C&Q>0R)(^=A(
zKt7I_Mv{|BXM~E%fK<amK$0O#wnQ#PwwG~%{6dBrhIoY(hF}IwIln4XXDj{C;?$yI
z{oKT&lFYp1L___63>W=?qWrAX<dR~2N6+}+lA`?VRDDO!;P{}_^rF<_;#*v~`6-Dx
znI)O2#Z_wXOp}>ftPkf@#v|0|Rp#d4R)<JDAa$CIx7d90!O4m(Hz?TBAmtWkaYklZ
zNqlZl@GX`U5XqUFUsM9Jzp{vtfq|jQ&%d;!ptM8*npYKyGxAGwQWTO>74q{^74p-p
z6d)$)D8Nn9Q2?2wqX0KaAu%sSA+uPax>g~I50s%1&br0vm;%oB`FU0B0Y$00u3;fn
z90B>oB_P%<4#%{T)FLC3j4D-drYKg($t*5WNY2kINzBYERwzm>NKGtBO;JeANzF~o
zE74@U#h#p>Q<|GsoIYzN2<)`iWV*#zaEm>)A~~ltB{do3D^T_U<rz?6DGVwsWf&M3
z(iv(PVnt$@Y8h*pIv8pgB^ioYYM3T66|w{~tYjzxDbZxU#iVC&i!u2YW2PqKEw1F8
z)Wp2_?9|F)u=FoqXR8=+l*hoMJSHH+B?cDdG4LplL5lJiPyhJRlFXdq7@y4Il6XkQ
z*DI*}#bpEXEvU4yGX=Sl37p-lG{8XtRTdwgnU`4-AFpSVlb@WJQ*5V)&;*L_+|-hc
z{FGbFj_Ikln1fR*s@MyPQgw5Kf}>ddJUxPgZ!vkMXfobnO#zj!nu@oW@{4b=6eN~p
z++xYf&&<2UQkj^WbBiS>KQZMNOMXFWUXc(31H&z@l*E!maKT)BizPX~pz;=9J~*Mk
zO3qvSsQltve2&G%sYReVz_q9-zvvcQT7FS(V#zJG5>Rnhe2Y0JHSZR4acW5s$e*{k
zASH2XN_@&K_M+6xyp+_6TRcUn`9&$IMIh0%B2X0G;!Mpe%}p&zEJ?k^T##Q-1d7vJ
z9BG+(i8=8pX}6ej@{@0|f^|o+=ca-jofgGikXVwLnpYB^nNoa<qa;5*uQa!y@)mPu
zUdb)?;=<C@)T-25yigC6r6!l;7isd|Vku6|NxQ|7nO9I+5}%S*WC98ZL3lubE4R$l
z;#=IgW%0!&AQz=q78x)wFhp?#`TK{&yLbjg@ug%I#ly7}M{$6%11JIA;(#Qkl(buX
z;0%GN2U)=KjM?BcrpZ{Oz`(#zqzJN{0~Y&ypyUCvih+fNk%y6miH}i?k%yU&QH+U?
ziHA{unT=6~jfatsnU7h5S&Wg3QHX_)QHDv3xk?0EV!Fkgn3MAo)W}E%1t%!Gurn|)
zfUq;jre_Qc3?&Q;K$QaHLdIH_8kQ`k8ip+96vkf0S{9g04MP@73X>#*B*Q|+TGkrI
z6y|KEB9|JLEY<~V3mIzJ^5)errm$o)6(yCh*Ra$uWO3B6)-cyFrLgw0*0SX_)G(&7
zWiu5ulyG8^O{!r`Vb5kNnpeVwMb@Q;F@+<Wsc26LHx^m78pagPY^I`HB|KPUIZAj_
zxIh(C4O=jSCbyp^W052%#6bl|kq;<3*$d*qO$|-PA}<i1w<s|$B|kSlEx#zWxFm|X
zC@u9C2dr^%izT-xx2THWFTX?~Gq(UzPNk-3@)hZWOtk^UBTGtAenF8Th!2WHuzk1K
zO7cP3zX%lSMdlzm4iLc!B0y17#0}EQ6$WZ7fpe55dl4vhij+YrKn}abla`;85}%ou
zl9`-Z9K{VS!;3+Q*9qisL3kd6NrBTCgaD^7euxTKt}b=~MZXFtK``?$f|3^pBM%b?
zBM&1NBL|}x6AEUkl15HkXeRwq($mw=FD=mr=Z4gZf}H%KL{RCVk7!W+5(Z@t1YfTr
zr??^+l<h$-;73l9PZ$_L$&n$8F@-UmF@>p@v6eZVp_ZkDsfIa)Ih`Se1>9m_t!1la
z2Q^wjZJczLT8;{p8V(nRSeaVR8kPmjC7@);TEi*Hkj{|8*~<iKGZb2dGnBB^ur@Q+
za+R>xu-34GT01o?&CD@OwcNElP%&;v2B;WsEnf|H4Oa?}4MPoY4Oa@U4MPo24Oa@E
z4MPoE4Oa@k4FiajWJnPZVW?rv;z$vcWRPZ9z_}14E@Z<{!&1YQB5cD@!d1gn18QA~
zq=@#iFf!C|1v6-h`4y=#Ffc$<<x5bi)MP4>0>!yBh!6+W+L%>okpx)NEmlxDUtA;#
zQYr>YKx_r^pz8V-cR@V7!iFRv4{)Uj3TcK&2S{sxFCeujIW?~&F+Ej5LshdVf`Ne{
zM3eaz7qqNQ$t=3Xl9!m9dW$o^EVZb-D6=G0le@?gWLp4;0L9%cw$zG})V!1;R*;Au
zhyXQYi+DiJVJ*%`EJ(e@UXmXVPW_<fttLMtVMl|~EH~6DkS!2{L5_%G1ErLdv?w7E
zjaq<03QTZm%~%q}g-A|CksupEwL(UIN^wyXh{qTYN&TSe9#Y8irKgr4HS!ZdIY0>%
zD$D|mEQ}&dptLW;%)uzaD8kIa$ic+H#K9=X$iXDQR3(L+{!xw5WG(`w-y#791_nP(
zW^ln$<O}976@`MrBn%W`Y_~Y#<3V1HkH5tg4{G<7=A_2Q-{OgnFDy;WfyfkjgVcwB
zh!hY3b~}OqW#l4I4FL+^Vo)K%!NkDGWy&QB7S$AiltkJfts0;pmVz}%^uP_0B5+Rx
z(nKi&)y?3F7hKUoq7W2EQDSgSi0UshwHQ)-fh;XTExDp3;TjMz0<{F>&>~QpxW$&A
r1ZpgU9f}k&V9$a4b&JC$H$SB`C)Ey=5sE<?IT%5)Aj2fXB)|*+oSr(g

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/List_reader_utils.cpython-37.pyc b/IO_utils/__pycache__/List_reader_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e354aae7e8356f0dd5e32343905b3449e000d779
GIT binary patch
literal 3532
zcmZ?b<>g{vU|^6D>rA{Sz`*br#DQUE1_lNP1_p-WJO&1a6owSW9EK=HFwGRj1g4pz
zSim%E6l)4Y3R4bSE_)O^BS<ZC4o5C$6lX416c-~y3QG!W3qurl3R?<$3qur73P%cO
z3qurdIzt*`3Renu3u6>tieL(Via-lv6n_e1FoUMhOOPx4G#PJ6q~;|i<)p@EmZTOX
zmSmQt#%Jahl$N9xX)@j7^7ixdgz#<&c)}DwMUq)SMldii@Gvkiure?(ID`DM0pu6P
zOomLxT9y>XbjDiN3YQut7lv4yTDB5~8pawnNrnZC3mKXjYuPJgYS>*EVnu2>AmTL~
zk_<I0H5@h!g?ix(HLMGmYS<StGBOm3gfrAIWHHxrR%q3*x-i5l)pFHv)o^CB7bVni
zr7&hQ7bTRiq%bXDt>Ie8*v#0(SeOvbP@z!6?7|Q$QOjM!UBjBqUep6Nkvp9cY-Ue5
z$S&42reFq5X1`lZo+(wPZmEeSrA4U<)wK#~Mftf3DTyVC3MrY%C7JnoiA9wPnZ*iu
z`6UXOc@UYjqQu-(1x>bFEIFCQCAV0zQ!9&av87g&q~@jEVo%P`Db39*zQq-mm{XeS
zT2z!@bc-!5zbH4cM3d(hM`m6@X-RxaS`<IXmUx6Mx7bQR5mJ1MEx)v+ptR%`V@ecv
zVopwcW?o8Wa%%A{#*$m?aCT-C7u2Hol(bu{nRzLx6}K4kS27fdGB7aw3U;=N2`x@7
zDvrrbEGo&&OHMS53CM7X2`I|XN=+^)j&byi4=yRn&rXeT^bC#<N=+|HEiR7n^p7ts
z$;>H^@yRSMi7!e`Oi3+*i0Bnm-V#eL$}cXCPtHh9&PH-LJ1G9;K#78pgOQDqi&21)
zi&2D;j~T)iV&Y=tU=&~~5@%pwNM-`5gC-m{SmI$}WMHUaOkvDsDiW$;SirE5p_VD{
zT@7OjQ#MnPM-4+3V+}(Jvn0a;riBcWpaj8O!?1v*hIt`lEmI9B`WY6mEo1=k@^;iP
zrm$o)70oJPPhkaRj9#W%76K-)Ea0eN0ws<bmIa(OObfU`X4Np&FlBMuFyy7xFs5*1
zGZp<P;Yr~v;mzX9;!oj9;Y{J~WlG_p*mRB(ffQa)(yaj{NIt(?EGb3#1-IC8Q&Tb%
z^KP-_1_fIhq}*c8&dYlV%51mTOY<^8sY#P5iaRepFEukgBPqYA_!fIPh*?s6izPQR
zIkkw9fq~%`OL1XQi6&DNPi|sGd>SN?7vJJ3O3X{i&y6oGNi0d#WGYf*U|@*iOf4?S
z%uOuGFRB7%gWNKO;*z4olGOA{aMD-E%q_@C%}vcKNlj7E6uiX-a%EoPE#{p3<Xdc+
z#d)PUIYq(@3=BmQAdA`3GIMhB5{o23BB1i_7IRu=$t}*3qQtynkn3-8l;p?fmF5;y
z7IA{qfXb9x?8VtRsfk5-Q7k1zsi{$-E~&|x#hLkeAw{XF&=i_qR0JwmHCb;lrKEvE
zsU#ym<rYU;W?o`Wd`eo8AjmK5(2@+C6(EEd$W<aGMX8A;@wu7B#hH2O@nxX=Su6ue
zy*i*&$;bgtsUn~h%*4US!w6z=F^Vy9FtRYB@R^EqL1`J}F_1=X1_lO@FF^d_BMb}-
z9Sm6vDU6Z~wag`q3z#|>7BDYl$YM!hOkwI}T*z3<Qo~ZiSi>yBkj+@j+72qLnNwI=
zI6%c^Gh;1V3Tq8(4Vw)^2gn@O8dgaLNrqbX4u&jHdS+>102PJJAk#T&SW?(ikVQBv
z>`K^bI3Tt+GrBOu>eO<Tu%~b=;HcrM;i%z~WT@dpBAKNbQaD8z7P2&hircPmh8p%7
zmKxSH=3oX*u0$h71_mw#1qFqm)ZF~CR0U{(11TdD!DS(&C@jw`$xz76FG^J?$w<so
zD9|!AFi=&1hO+`BoWTZJ6(klV<|?G5St$f0=A|SSBMd>7FR)SwNG(cE%_~VvPgTfI
z!(&uxMRHDQN~)EDGo;v6D9KlVia|^)N-ZfZ%Ck~%&Ph##l;2=C>nP-v<|d^UVVVvS
zDalAx$S=xF2PG?olr)9Jyc7kH7_|6>DJf3PNlgaj2x!QFZ3JgMO~zX+i4~c}$)J1)
zN+Y0h50rmFd88PWN75N;7-9u#89`}f0b>p0LdJ<qg)G64@>`P$oGos#r>B<0=j11A
zGTma*Gbj>bU|`T>EYbqy7*H`^1WMMwxNJbxP;!25ft@EPF@vmPW2jOt0>>IW-|N}r
z<R>TQ6x-<`)Q6zB50rj`K_-Cg0lSZ@ma&GBhXKiD;OwNyRAc~h14tcM078IVQ)I}%
zz~B!u1mX<ZSP+1)pvqiXSs7Gh=_(*q;}E~6D1cLDbuByrDQHw%=^3TfDyV9{OaX-^
z=PlOajKqS}TTFQcw^$R4iV`b}R6vCVb8%@dBobJQ67$kiZ?P5>W#*L>fik-$H#iP%
zF&5lnhbFctj)KIJ%+$P+;#+*sgbyuXqC{cI1RfRfnJE=f+?gpA@lf4GpaLk0AL{CO
zXmX0;g9$*=DmebZK@Cd;pb)<W)dn}+5)?R6phU;T#Vp3i!z9GW!NkWb!YBl)l^NL>
zB^ZlrK;;)GhCs?dg)b;=f*W#P3=9kvE|6N-g(21=hN+ebRF2g!H#3$nWpfm1l`z*Z
zgKFzuCPs!DCU70iRl^9XkD*O0u0pOtu5gALP=k{h)YfDzsVqn>zQqEfG+A%4W|pMp
z78lupf|&)>0J<dvO6Q=q4u}^IQdV5V0g4`$D8anc;*!)9r1nOUEl4ACc53A<R&dz@
z4iX3f3Xmd41_p)*P$2Mue8(un#KXwL$ic|R#Kr`c`^61uVd+)m6j!+2Vs))ZPR&UM
zg#buBs5t{F&B37&#=yW(!jQ$dfT@PDhG`*FEo%*Pu^L!}xrTKiW3gHZOA5H@S;GPj
ziJ%hJ1#BRhevw+X8ip+P8nzVX6c$ieg)^iu1Tq9ML@=Z<1T$!|`c(<LW#*(RB<6q`
zT9pc^6`92)#YL_l-?0T}q^6b_YBJv9Dh9FQ^AdAYH92lE<rm*#DM&2IxW$r{pP2`$
zdebs<Qg87&78j=$f!eE(1}9TN$}MiNzru?^Eqri&n+o!$COahUazVl%J|(m077NJq
zTb%i2sYT@=<x$)qL*qdebz(^rCzu1alnpFh1PN<+33Q9SII%1>J|zv5ql!V@0tQAt
zMixd9a4lLC0*XlHA{Pb*1~;(pGzD(4Lh61KP$}X8_HY5XJ}B}9iD-gqaS>>1KrgkT
zAhjqn7gQVM6oJxg6dz0qTG4|Ft|Cw>2DdT6l`}YngJT<PKaz_P&gQVm%}*)KNwotN
a(#0YS3=ABM5Xi$Qz{nxWA<H4p!vg?gUY>gZ

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/clean_table.cpython-37.pyc b/IO_utils/__pycache__/clean_table.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a0b33d4af8f0f7e35599a5f2b3e483631a1cb92
GIT binary patch
literal 9996
zcmZ?b<>g{vU|^WGtvgZ1hJoQRhy%mS3=9ko3=9m#B@7G<DGVu$ISf&ZV45k4IYlys
zIfbQ#F^VOHHHEE(A&NDHJ%yu%A&M=9Gli>#A&NbPJB6o(A&Mh~H-)c-A&N7FKSiL0
zA&M(SFh!_^A&NUiI7Os|A&MuRA)O(ODMd6ztc5X(H$^-}qJ<%fFNHCfK~w4_$Sr;^
znHU%tl0gKB%f`UK0Ky>W75`ymU?^coVQgk(1kp@T8p3B>z*NJqkg=AjhAE3Vg}H_)
zizS6wk|Bjff+3v=<m_IiTBZ_)6!rzIFgY;IBFV52Y*GzV3Wp>^Cqp|!8Y9?S94Xu`
z49$$S%q0vdJXx$Y%zX?X8*7*#DpOdpnIsu%S!!6am{T~?8B<tN*m{|2SxOjEco(qN
zut4lz2zFf!OASj3pCrQqwuKBJJtYh&{FrJ$Dr#6#1SA<G7;0H-SZY{Pn6p`->IE^?
zL)6wVW-~(72qDzeFhYF=vRN1u4%tjKEGZ%omJ`esq97lYu-C9OGlE?%2Ih0X`Ql(c
zC!8+<=5xXMl3+eJoG%6D^T7GiU_LLLF9YWD!TGXCE(V1dJT{Qx85)AY44QIPx49G)
z6cj=-GK&>*Q%f@PQxuYmQWHy3ixm<Tl5<iM^AvJ2i%S#=it>|Fi;FY!(iI94ixP8F
zOHzvzk}8onnRyB&8L0~SMVaZDd5Jj?Z6*0o<G{8gLIpDtOB9k5^AwU&6;d*bQj<$^
zDq$9+rYI!lr6?39mZd6WmVk9xfh|->tw>JIiBHKavQj8WEXh#FPXpTr76zLOQCyT-
zQd*Q}1?E)QD^mv04R8hp2q+9n5|eUL6$l$%#K^$F@DfDaVkycmFV<wd#gbT&SzKiX
z)2rZ_msygTn4<tOvOKdSL!r7B<i*U?ypm#YXjIoKq@^a7loqAl;tEMED#|R$&n(SN
z1{n*=(x9x(%D})N3@UA87#JAR8EP0}MQRy47&;hg7(tn80mDLuiA;qo!3--IG#PKP
zCg+#tm1r{EV$w6X#h9tdSOh8(ir5(#7=HOWTg8MHrxq2*<R%uCWacF&8pZ@<xWoh$
z<!7ZPmlVf1dd3Hr6y;~9#yENg#|NdR7o`>#$9Ve3mzHGa6vu!QO+45)dIgogxNJb_
zCpka2z)l?GK9FPC7^>uvmD%LvCnx3<+vy?HYBGa^t_W026@m0r1;azkF|Q=GC^NrE
zAvv=sxilxSBr`wHN};+|N5MBUB_$^nSsW}JkY9{lN0k-a(BRaZ)Z`MBpa;iAE~b&i
zVBg<j%MA*)G)Sq^C1^%jVoqj?LVjsUL1{@9dqGjEZf;O;l`%nmdHH#|d8z4%C7ETZ
z3dI?jX(d&f1U2Ob1uNJnRM%E<2A8Dfm4H&~E#8ofqWs*X)a;V{+|1M}cbFlBXaz%E
z1>e+?M1_>Zl0=1qqEw_}Ku=Gv%8QTzFnI+d1ziO{XXg-wvc#Oy)M6}VdBW5Zq7{r4
zbQPRK0;pk>3D_t{+(!8mat%yg!Bjz4Ap}%(=BDPA;5IM-rhyQxU<PtqeqKpYevSgZ
zfb%0{5=>sf9AsinW?p7;Vh(N>+QQTmq7^I@bQOY9bMwnmK?x+aSOJtKOkvs-z@;T|
z<yx^O|1H*nqRhOKTTBHhQCvl-i7D~mqWu<2N>P5nE!N_U#DdgYY-#yLxrrrFta<q*
zd5O39A*D!eYDr=|D2c)Z@{*HFAOd_470D$9Xnd#!5r}*VEaD)lprX!@(02}jhzdj0
zCPPCIBFIw&4ge4*wOEtw7AM%%pnB>SD5uBg1_j^Z$jmD!Es0M_i{b|rhVdovGW-^6
zZgPA|8pwN@X(iwS7*-f_GcYjR;)YlWE=@s2d~pb<z-Q-R<Y44r;$o6vWMdR#;$ReF
zWMbrD6krr!5@8Zx1YsT!i%E<{hKYlbgBcHIlwkpxRip@NB*JPG5Y5BDz~Brju{9VN
z7-|@@7*ZHhm_V&oP}47kJ%u%e1JtNX=LWS@dzoq(Y8ZkUG<p4Q@wlfZl@^s0rzDn?
z=2i(gmZs$;X5^-VxeB4)u0d5iPO0gcd3g$sdFh$?RT5FDnI#3KdD$fjL8;k^d5Jlh
z#o#i%N-&@_FBw$r>6YZ{g5|0d5sJf7i*gc6(~5PSLln3)3i69f6pTzVG^=DlN)*cS
z^B~44BxM#Wctc_`C$&lzF6)$;laijARGOC#Rsu0!93%~KjG>+aLP-^GaAj^mNq(+E
zeqM2E3AkR;WGn*3i6&!_AOiz~CS#F20|Ns%4j}|628*N_7#PAq!3gdtF!C_6F|sl8
zFoMP5H8?1|Ibq>lg%;j(m{VAL8EY9zzzuRx6MP}#0%j<WO_HI8A%z{(z%5~1zygYs
zg-p^63mF$O7JHSjHZhj4rEr0ph)B9?7*d#RKyeE0Wu!BuaHR0UW021;ipewO7IScF
z#VuyX^i)meC^o;++}zZnQc&9>IJH7S!@xH8@RGc|oXq459R)+%^3=>E5Ic(1F{dQ8
zNR#mvdvbnGX>MLIw&*WX0tFMOV!y>0StSE1E))#)40RQp^YhA5i%Jw6(^C~n@)a`k
zN~(lVA_0<m1W?7CLtyd1af_`eH7_wY^%hfJ!7bLboc#QvD7MtX(!`wNTihj?xv43s
zIVFi^Cb#$;i;Gi>K<x(CqN4nwB7TsI@I;~r0|P@mC=&TV*^fztnU7I}nS)V?S%|3!
zBnOX3P$cuhA~K2*YsLX*8&Ia1!=1tlir5rBQ0ETRu>*DLKwY^M@pP_Q#tN+(Mi+)y
zrCO#E#u|niCP{`Ai4@6nrWC0XrYz=kre<c4s0~A*Q#b?I48e5fTILji6d@#Y*lSrT
znravpuq<S7VTdh@VX9@VWvgMUVU}dbVy$JbVXI*Qvp7oFYS^0@Q>3$*iq4g=)o?)B
zCsJf;ISap~NY`@aX&{MzLQ?+*sz$1oGmj-jwwAMovxXsyy@oY~JB`VPp|CHU0px=e
zMoESgxpYRb>ls0=Phqry$jD1Fq)4I2$U|j7zLrFhVFD%HU<OSEzbejDUB}=6SLcx6
zDuFOiic3{Mh;Vs^IV+^67MJFwRB?JbhbegGrKDC=DTk#N7pLYZ_$Mdllon^^=PCH)
zCxgoy1;>=!%wmvGl{|)`;LMa%BnA9X1)w%oesN}Tl?0jq+|(*z$0AVcGciZOC9}9V
zH5rubs${STB5Q&8N+CZfwJ0qoz9cg>O_RGy0`575q^fd-f@HYw1tGrALwG-m(=*H&
z$@jWo-<KLOFfjNRl_<EUrX5~#cz#)GQE_rcPHAzm0#{OMrUI<6R?sLY%CE@GP0Rtc
zoHa{T85kIXQ!`6a327{~1gV4+m5C*p#mO0|`epe=>8ZuZ8HqV1sR$E_OH%Xli&GVl
ztu3{MI<J^)6XX~e7@(d@0R?w9D7aBA5rtZUp{z<6nwp^Dt`81(#v+jKqgb5dlZv8P
zJ;4;4r(ZCL%i;qfS$z#5lo5n7hEOID$`nGG#U~ZL1QqgC%z65WXuX@4pyKc)sKTh?
zLPV=3N0B<H6~I@NT9A{NoEi^lJQsm#<Xg;niFr{RnV_~retGdNHi+CU=ES_pTg*B6
z$(mf?`Y4JA;-~n;qSVCVC>9VE#g&qpS^yOhEG{ibElSHw&P>dK2;E}M1m%<{_JYI`
zP$x8s8>}4C?THdXRu2)o#h3`G@jxBOB9JeNKw(@6DlwfvB_<;qBNGPXVU}RzVpL+{
zViaN&V&r1vV&nl;k4$V#Fq(x)h)Ie`icyLQY#zLn1qBudtdu<juP)QU4HQU4mIWF;
zVohNK4Xc5gDRbCTxWQ#DsPtt4iKnyHGFGU8OIW#DrV=JlSt`kp!jsNe!kopD4lbW+
zK&6*mI710*3NNS<tziI-Xo1wGa7r?y@TD`R@PkxJgDPPG8<09dh`KC>6jp?LN?22b
z;Nrq5BA_xpodwjKg19!AK~vlhy|Qx-(G5UuXH@a~W)|lrmLz8=KyrH(A6y^^RDe|p
zdsG(WgCY}Bu7PuW6{oWkwCIZxa&~fy&&e-OEiyKUF9VmCQKBKai52nbp!RQSky$ZR
zSd*<v8g4kmwmgNr#N-TxAjbetmnuoPB-mzTDHRk)fZd>=0kXu<pjclmH?hJ@&$PHo
z67DJmjUo^`Gfx4e$j+=v%-P9J!N%A?RUs!8R2VBjTv{atl2@=ZD@GO9gyc_fuBx&D
zw<e7B3_(4e#2oPGK&nC}s9jN<s*no{zamg@Dd;LBmgQ%rC}ifPW#)l05~vxL{%7UK
zOTI|~_Eln@P;IDwtx|;A0qW91dq1cu6so|TJOyJtP)`-A1Ui6ItdO5nl9-v7nxX(6
zUT`W(%*@Nr&8$$+$j{5ERLIOj4E>}P<>x9G85mk<Dpc`76e>77xq*VFiXX~{g-Mk#
zQ~>UjvizJX5vU+6O2GMAlMUQn1s5$v`k<B%s5~h$0I@&~Ax&mT!669ls}v-c#KYWv
ziyg{B&+TFi3=9iExm^vE+t~$}5c!>lnGMucVU%OxVB}*IV&Y&FVB%mDV=4maf#+z@
z2oI<&4eoG&2FxKH4nz*BVSwc5OvV&8P(B9Bvm@k7m{K_4IgAq_UZIx4<-!mvSIbxe
zYAu0UAl&JUB`jI2pnO}yP{U}$Q0Ny9>AdjxRdG3HmzJa|xCA>YAm_bOkZYm!1yY>}
zQdT7bt@#lR3g-|<SgI5Z2?;<=uT|jwwSoyajY1PgNn&0~YOVsLO3W;VjrQp(<mJPX
z10=a*mPGM}fE7d3rfRZOafT%3DFi3y7o}ElJLi|B7DG5uY)+0LzK(uXg3dYlB??ZZ
zMJcH;b)XsuB^iL?9W5<rGJ%spkr^mmfD3jo0WRG^BYDR_@ht#~ZzcgI0Tuy94kiI+
zE=D<~B9JURW<hxhqzW9fXW(TusO(DtPkz8kXi$q7G})2DKZh?x02IgIHlv^<Ly8be
z`%e(jGK91wLGs|%BokDIMUo*!7)1{YRE9&6Aw?ve5w3#+D$Xg%05U5@G@U6$C`Aa;
zwoc)M%JNDwq==%J$7=&gF=BpEJkYVV<dXc{$|&Z55XUNhztY^K)FK7n#Jtqfvecp~
z@sP~i)Oc8LHohc3J}5OgF;A~b(ygepxTGktBsE0=ER>j&Sp_Pb@~UJpl2u|FXwV(3
zOfQNh#M9Zcip!_6II~zGQ!i1kiVwmrEKSTS$t<a?!feClrIsrMmlhP{R5})=CRS-+
z*np}4(H_+#q!K>x1u8Yu5{r^c6j05I;sCXjL8FnHd_|yMRFz12dYYa>IjB)tlv=Fd
zl3JFToLUMR!*<N8O3cVnNP&dl;ccK0l!OHyd?)}^ROV*p=~YRjLBcRE71H5{j~{?r
zt(ch>QjMXGG_Zm~Q=y8}r?NN|9GX#JW^QU;dTOZ`qDhIA_cQYp@={Ava}~l;i;DA$
z(o6Hwi_=q+GE++wxHQTSFUcrM%}db-TjmKGur31)#^&c~mV#QSKKa>jXJR&~OSKpn
z7@Sfw67y1WK*JLnDXGO^%knVQrWR{fDS76lB^G6ufPw^vn~+9>p+!>^8zjiVEzv4A
z<tRrFAH^aa1_lOjRjSE?t%ATY$RPo0rGm->ki!|Uw)P<HKOSZ&Mi7)>lmoTuK>AQh
z3Q+qPTvE)#QBurdPvHcW4dD8KE1eNkYJl2M;5HekugHtseoFzB2uw8$DWDR84@Cx4
zB7kJrK<&5`{&Xf#s}F>sd7sUO0c<8aNFGHeJ2dYL_(h3=^EEg}f-*I9ys%0TEbJTt
z5(AHM=~eOjBqo>UB^D`wqbj+iN&?D&w&g&BAE1$5=a4E<kII5nP+?u1s^FWNl9{Lw
zoSB|iB@F5p6c>OdDWLW^hg1nTqKRm-SE+!8uyBokfszO~*C13O2DuR9U{GbHD;O9U
zauTyk4=*W7g}ObvL_s4vH8U?&!8^aGB()+bwInq&Cli$Z6-x6`ARf#vQFzq3#8AgH
zIafjBQRfmP9fRatg|x(+oMMIC%#xDSoMHu+a8HmmzCj@h>8ZJ(PI;cDLT0W)dePx+
zhv%21<|(+87A0qZWs0DB6HALh{ngCW!}CD{P71}PN!j^DC7IcYIhs{c;2_N|fwUL%
zKp7J@Kv*S?9;j8RnaLTcc~xSFU@e0VD1t^5tAwB-i!{mz%{W#3E{Ub7Md0!YJZOmA
zP^e;44tEU-Q3U59aMsdfg=8eq@Dh3zi+#vYl7WHYD=0TffofM4Hby39NbX@`<Y5+I
z1oem%K)DB`4kh=n!1_enP;$>4rW6)Xo+)8WVFk63K)s(9P>+Q}k^wXyU&8>Y=~BQ0
zg^)f@4MPeyXi%|+A%zE23qtxoDSRpHXgc|!`GiT50i1gnQv@I_D{$UpUBFhukRm9_
zu#gc%gK`dtSHl1r3zA^SV$b485d!y~gh6t_44NW-XhW@4yrFs7dHLme3jUyxS52lW
zVU(d*1$_mdV81H;Qm88M_^gJZEtpm)%}WB6^m!#ZAXN&9dFiR?sY$7Mny|=}2e;<U
z!L2dJloW-O%;b!G&@@?9YKlT`P%u1zVNKLvP|SdeN$@PIN-bjzV+unyOOZ$lW1dh6
z!ve+{#)XVUDkThAOnFQxOpsYuO=j>UCOD-)QUx=l7tfkmoR^pfZhUJp6@iMsBG4Ej
zdIK9Y7%$1lz)%cw1sg+^I%Hk|Whz3?26Gy<2s9XZizNj#-zW+7DX8B9@+ru7&{;*$
zOa*98GKv#C%LtmZsFHvx2lZh<%5(FJN)mH2ODc&B5hg+*@{$eIvIO<zkwc`&93%?r
zj1USBSwwh5al%~(4&o|Y!}u?47#J9$Ak8swasn68uwg$<PDob8(i$!TwPB;UOY*@J
zJ)p7VTTCe>x7dqQlk@XZigDD7pbX3hawe#vV_;%q<YDJv6l3II6k+6G6l0QN6k?R&
zC<3WU289DCyMuZ`pnw6_f-Vf8^*J?+&5R`sHB8Nn*-XU}C5)i;I!%lfY9&lHj5SQn
zOfC$u5;07*%(W~v3|Y)I%r%ga1!hKuLKSGwhQ+T+!zVQ_9kOmBDZeBGykw$SAp^9~
zq9h-*6a%z+qc|})H3T$%pP2_9anfYD#hjCxSELQ9;F+s33vRJP$D%Y@Z?QoJ`J?z@
zYd)akQ&GGniACwDB~UInLEmD`LQm76uBQ$ta6m&j42%*?B8(zT;1Uv^!cYRE09>en
zRx;EwrZdzsr8CqrR|J3ua6D>RYM2%<*07{AE(9%QVp_ml!?KXE$gPHX0b>or0_KI_
z0dR<Hp%_H8hM@+$;AjEsLI#*jI710r4O0zsGZQ023TQry(NB~87F%gvW?^aSEw<E(
zlGMBuNLpY4<=k7$#ihALAP<1&5Hwkf96<?;7oq_c0EVE@W6J|~3c#U(78JL*5TRuX
z3K1btN?_z*WMO1s0u76CF^VzCFcrB&rkOxxfF^$wTR~!8N@6kCHMdyvN^=V;!DR|~
zDi1ur1fH7!51oN~+u)8_5hy7ZfvOI0x&wzd*kB|#!8>}lIBatBQ%ZAE?LghQVvttQ
aR3cXp2M31`hdzfIhX{u>ha`s_A144u7)h@H

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/mv_strategies.cpython-37.pyc b/IO_utils/__pycache__/mv_strategies.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c082ebb7d9f04c45bba16f774987e8536198cb6f
GIT binary patch
literal 6790
zcmZ?b<>g{vU|<mZ(~)>Xg@NHQhy%mS3=9ko3=9m#Qy3T+QW#Pga~PsPG*b>^E>jc}
zBZ$qI!yLr|mSc@#OJPW1&SB5xh~fatvE*>(az$~0*{nI-xja!kU^ZJ0Z!TXHA0tDG
zND4;^XA5H#e~M@dR|<CvW0XLOSPD-HZwq6TV2XGOUkZN<W0X*eM2bL)U<+fEaEfG#
zP>OI1W0Xh=V=#lJ)Ju?C{WKYG@#Gfe7R4v$Bo-HErfD+XVh(b1)nvRS7L=HmlAr6A
zUzA#0;tY|^OfAx6yv3ECS`uGSmY7qTs>ygu#4R<kq_ikCpeVJVC_g#1xHvN}{Us=9
zl377kFfcIiGcYi)F)%QILZ$c@0|P@kLoI8CP7UJ%hJ_3+46zEeY&C2tjM?l(d^M~K
z7;6}7*d!TJm?Rm}85c4#G8FQKGn6nbV6I`vVo70^WJqC_U|7gl%U%&t!|cKk8&Jzp
z!x6(=%UR1=%T>dW#ahE9$xzE(!nS~|hG7AF4fjIEg^aa4B^)(8S?nn+>5M6?y^J+X
zHLT6dF-*0*wVXBVB}@xAYj_tjnlse07y5v0<Emj_$e6-V!_vwm$<WEr&XC5K!j{6`
z!coGF?2cwe7lvlWT8<QsbjBJEafVus8ip*M8V*T@TD}tA6wU>FHGEn83mKak85wH$
zf*CZqKtayrnR1Jzq_QCOB_jg^LlFl91A`{xE%xO6oYLI9;$)B@DCn3N7#LU>7#M^>
zDM<z#ZZ!<CBDIV)j2#TwY(*j|40%i`j1!p(S%MifnL^xFGH5d0V$w6X#h9qcc#FR{
zH!&wCwJ1I_FC{ZMwYZ3ffq~(dzq3_LXmM&$aZGMvQAuWAa-v~OK!!_9Kv8~HYH~?&
zjH73Ka7j^qc4~~HXK;K_YI;#>adC{Ne|%|4W=?TTZdrVBNl{`+YI<gBv0g#tFD{#$
z%;J*d{M-UNMUX>5u4ZGX(kf2PNlh+^PlF_nVrYudv&qR%PRuE`(?e);t5Sd_qGAPz
zuGAC-jRIXr60=gMu2s+!xW$s4T3LLHEw!Q~H815BQ(nO>)~eK^{Nh`z#TkhOskc}&
z^HNePZm}fi7gXM2Nh!)NC;}zGTdaviMTwQS*b|G=i}Q<0Zm||6=B1|=aWgP5++r;#
z%FHXd#g>*|l$%&`iw$gJu_pg5rj)c>f+>k5iSa3!$t9Wjd5J}tsl~T=^Giz#N=xF)
zQj<&ai*E6x<>#cpg7+3<_AORWq!!;|EV;##n3EG<5Dzi%7CSTwZZXE+;wecjE`e*}
z0XY=TV^2wo2fLpGu2m2gBJhv|$D<}=5hz;~34pvI22lf70S#+Vd>1Q$QxXd!A0r2&
z1fvKe7Z?gKNihns3o$BycuWFJMfwa34Dbvq%D}(?3Oi>|d<ro#FqAOVFw`(MGu5(!
zGbkwgvDGkTF_kc<F!nMoU|Gme%U;5|fUSltiyf447c$mzRHT5jXG|?;4QC8<Emti!
zC=)K=s9{^kSi@DrEe<LK8Csdrm?Rl$c}qAJaDcKLL<XFFI8&HYSW;MfnQHlK_!n^1
z@Ye7xWMX8f;i%!y;s#~4LaT6w8rB6o3mIw!N_eyQYB*{HQrLQ#Y6a67YI$n}v-nfk
zQ#g7VYlSN8YJ{=`To__aYK3cr7YL?sE@Vt$kz}Y9sS&B+&1NrpSHrtNs73@*;FJj0
z@HR8nuw*k8od{qkJQ2=NB9g+jKonF&fHN|0FoPyHC?m6`fFisIRNB2v0cB-S*41RX
z#giAGT3nKun^=-xRD6r42warL7ndZKq-ru2i8C-TxM?!p;!G?~PR&cn%uD~ps9dFn
zB@YB8=B4H5>ZXAUVNFO*5Gw+erMLK#i&7IyQsHrai=!kzKCd*lpz;<oC{42$7nY``
zR;3n+fZPR&(IPPrO9DiIN|js8X_+NaBGCL9pP5^bUsRHqmz-K0e@hTl#KUCr5_40F
zZ!s05++qt(1!n_pP)3S}q;0ToZm|{@<Ybl<fl8`d%sHufMY14k<Uj?RXpsVl4JsRo
zK#8$P8N^iqrCTPqfLq+4i~_al78i&E$zxFhAU0H@805Sn4Uit*lA^@SJa~@bE=$aT
zaf-A+3OHbTZm~g3y2YHFlXi<0;tjAY$h2ELAWx*GK)i8_Coi=EURd2?hnfM-Y7hdP
z(S*=5T96ya&SE`qCIe+H0VV+^E=Dd;_G02;<YDAv=3wMwW?|%E<Y5A3L68_5Gaplt
zA9{uZ#VWXlG$A*`6@fEc7BLyFlK~^c@s=Pm9A61n4POmQ3R4POFEcp%ad$A(@WQhn
z4^sBy1!q6LEPinIV~1qFPKFx6ECEm(1)lu`;n`0pg$t4WIvBFqi~e*lED(lgKamaw
zX!g4nz)*NC98&vor|`6ZYFANEjoZmkB9_9tK)iziQnz+61T$#z`Drp18Guq3C_@$*
zf>@9YS!4ua8-tP~^DRzz*1RQ>2hTt8CHe7?*uBM4T#{M<&8H!nj726O&8DFE0+|Mm
zYUG+3l%vuaY8YY#;59Q#4YXE<q#aN}tjScw3re1hX_|~hZXgpuqTuqoNDx%cf-to1
zREIbO;kF<*S8Nq$l?=8r!_5_1Qb>SHiXt<RW#%Bl0z_DX2wa6i5hw)~S%DnNQIwdM
zotc*&Ut|r^VFMy;L4+NM0F_}y_8=B0&liCjyWlF7sEl9a2-4^TB0yElEfJKg4z(AY
z-HTj6id;biD5Dj*gIFFQ=P?(hr51UDxLzQ_8$|ek2wxBZ3R_q>gLAGhmLLrT1vw~X
zFo3cwBWl*=17%(&Y?)UClzqcM$pKVHf|3KMSc7NZHm0G|z^&ygVOqcqs&{MoD-23_
zYWQmSo0(h~VwGZ;Y6WTqOL%J-viNEQQkcaVBpFiJ7Vy^yE@W(GVq_>hfZ9w>VHaTl
zH<cON8Pga+8NY?2L;!13nG30@%m;2N3zhJua4+Dm5y}FY4r(eh)Cfs3)C!jf)d+*z
z9U?U%3xsPx?GDizrYw;X(G;Fu#sy*v8EVDAT?0`_*I*%It$0NlxERcXb_*nGC2P6B
zodMB>j5QK9lF03jP6n`y7)(Zzp;oFyJcT!f7u133Wv-Pjk*JZbVM$?55$I*Em8p?g
zAlboCBLyx9#cO1;q{JC&#V0TpHia|Ph%As^$WSX=B9kRsBVHq$BG}7RE0@mD$xtJg
zB?sz!^)lAVSGd&3XR*34#9GxV)F>>FPZ3$jn8GK?P^(y@2r3FEFco=qFf3rKQG~Wd
z6kvtn1g0XT0EP)ng-Xx@Q8a}UTp;p63q;-&u?74n1)?~pKm_$Jia0@;0$eJBxSEXM
zyjsN*0IB`J4U3nc!iW662e_nz7IH;=ppq8aLIq_-L@_rBl;=P;u`yJsCFkcRW#*-(
zKnvc?{5))>95LMkNV)$Klp%{ig|sF!xX8c7nVVP<pORWol2HUH{fqoTArSx~K-r&0
zrF>BkBml6K^Sh7&0DtkVSR@2;Cb)b>E3d(&FR0!rk_0IMds_;`k_HuxETERnE#|7s
z0!V2L>gp7M+G0hZPGpfhNFk`W!df$cnuds4pePum7+VDZF8e{nzNQk+240aWNVgh@
zPzMp923(OQhy^apwLvT$5TOflHXq2@sfhj*FO&~yXWnAWyv0~n1ZrtTaiyfi7p10@
zCa0#LRV76sAmb2~2E4u~3I$2vuP>M&^)jMgC4`wkt3lBh11j`cu=c6IHHiqL9FrKV
zZ^g_3>SLkQE_{rj+C>1RcHsimH;f$2;Myetxpo0XHal$i#E;y@VI8<sEF-^h2x<U=
zO2S^|TD}S$Py-P(_z|lR!&J*(D*$Tzu`)<9lyKMZH8a-mOE8!-Ff-UN6!wHOAobG(
zq5U+$6qa75S|M=hD3ryY!kWU?%UCO1;Zq_|Bb+5zBb+4!u7|8*m}*68MM0&13TQxv
zLz1CZtVRr6QZg2abucUtsS$&gl-#hAvglO+L*XlEDao0_1ui8K%|>ptW+M-%l!P}M
zH5qR)dP9o^H{`}2xLm<rK7w*0(JeqcWy3R2;(`{A8t4rc-yq*0Y{g@hES5$fNHKY>
zK#=vfI8*aVb5lX1Jdl<hwgMm;l%YUr6H=gn%L8y(1ukw7{XcQs#RF<H5R!MnWdtGv
zg9{39E(Ya%&=^^1UP)?EQ5?v2EZI4VD-YC>1D6>^*qeeP*h2OYDCj|n0JAN~$0)?8
zzyfOtN-*&;f$}O3BO9{_6CYC%sNWACIRTZ}prRGjj4nRMK-|blIzuf-#ex#n8fI{g
zni9hV9?Im3VXoz_<*DTbw^2Z&CfqeV$gP)JKCle?LdF`fj3h%Xe+fIN4F~GF^)lBA
z)UYn#SO^=wWKLme;iwS+w-z9+G*++(Ll!4UtX8mwV*wYaeaFa9!kxmlfTxDDMsOh$
zXvCw?D4d~&BbY&x9hO%#8H*A@g%qf=1(k-NWCN;(Kq;UYRM&v>gIFzN2SW{`B*OxR
z6o!S2DU6Z~u+hpQP=^Yfr!*PC9SLaL;TB_-CL^R_kJ@th1BxM#HEax3y4a#H9yA(<
zEr(Dq)82xPv>}FF;=#iT;KmQ4k@FYHsT!EMGc!*C<W$hOrUp_*<;C4yO9G{KP$^Rc
z3IIrl43y1ou_YE1q~@hS>UmJGYYGvY-J-xll+b~WVn~L<mifSK7(@<agp4C=GC|@f
z4dgO#4@(kfoV*1E2gq^Iyv4}EBm^BI0S$==fkq{nM3{;)A&n+bU8l)dM353K0)@ye
z7SQ|!I9zVAg2%tWBVM=I3KH{D5{rvK{njYS;_RH%#G*VssCN+!PH<BvN&qIGnpd2f
zo0OAU1ZoD|l7x+V>w$*uGK)(xlZzq4;E<tZS-3njl2Il>AkhgLSOpLE6oI;oMWC7;
zTx5WK3rRQNbPguK0m)&Lo1apelWGUbTg9Me9S0*1BL^dh<N*yTF>}drsdH&@xpQd)
E0IH$`Q2+n{

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/split_utils.cpython-37.pyc b/IO_utils/__pycache__/split_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a492db74b2ce1f97daca711e86495299dd7ca9f
GIT binary patch
literal 1944
zcmZ?b<>g{vU|=ZPlbZOFgMr~Ohy%mS3=9ko3=9m#dJGH<DGVu$ISf${nlXwI%x8*X
zPT@#lNnveajABV)OJQ$eh+<7)3}(>ed<ine?<Fe(1H(&31_p*?7LWh~1A`y~0|Ofa
z0|UtP;vWnQ3?&R%j0>1*7#A`!GL#@#3=5b$7#A|uva~a%F{Uu4Ftu>hu(&WZGuE<}
zFf3rHVOq$T!Ysm2!z9g6%T~f#!&bvm!`jST%bw0q%TY0}gsq0DnbCzIc2X^83Ht)}
z8ul7aNrr`twOkP1LdII|8tw%gHJl)OQ&>`%S{Q1$!RGUnFf8D#;aSMo$<V=A!z0Pi
z$<)r!&XmTK!iwYy-V)Xnke$5E%(Z-AGx<{3!8Y(idCXv*Kn=$Nt{VOtff}xKW{_Ql
zap4R#9Kj5l9Dd25kO74Y6EskSLBTD@z`&5sP{R-_R?Aqzuz<0Hp@uOV<Xr|yhKWpt
zEWr$#j71=wnoPHt@(OOTmS?0ErGhwm2DccKH5qTQ<(HNel$NYy_~q+t6%$&VT2vg9
zn^;tmnU|bs7!#1;5))9ApOu<iQXJ#x86R9yl%Jg%<LDV2AC#J2lv-RI<LMt?T9TPl
z98+A7lUV{`>lIY~;<CxfEG|jT&n>Xy1UVJr?kYuyvXsP<#Q5YgJ)4~T<iwm}J3WNz
zB2aY0y$j-i(nB!F3@Mm*g=!gVKrzNq%T&WOk-3lu<kcdOJzzqUxd@bQZm}1aCV^aW
zizTxpH5V)eAwVg)h?{|dK@4Op#A$FVL0onQ1_p2ltHG?4t7Yt90EaOnB8(R>EyM_E
z<|2@Li$KZw7F$U^$bL;GuzQL?rfM?Xl1eU%FDXjQ%!@Be%!$v;OUX=5EsjqtDoU)p
zB>;&JgglyqK|xRqav>XoCR6&XnIQ1e9?Cdr4~cAUuoG^v6(#1S<mcXEDNapIxy4qR
zmswbvdW$7LFSYoV03;rYONtUpGSf0sQ*N;qCFZ54-eO74FQ~l5T##RIixuJ|w#0&h
z)Vvf;!6FV&{ug3kV7SGUTy~4G;1+vwPGWI!YVj>Wm@f)ai{in8x7Z8f!FJr@PRxP(
zGKx1XKPROa=IL83AZ`>tC_qw+OJKrLd>{dE2!Z5_qeS3*q!5dehFOXTOt|$?q6k4`
z`vu@8K{bQzLNXN`-kQus{2;%AJuCoXfl|yZUa*Jap@|n1!^Ik)Jjuwz$i>LTD8wwp
z$iv9O#K*`7;xqFxaxrl*iZJqU@G<f+i7*u@fl4<}iUw)oWnf?cB?xCw5_|?KHyP6z
zOBiYxn;AtIYMIj+YFR3jN}wX(@=2(cwS;K_a}84sE2z9+WGGY!XQ)_J!cxNkmY)~H
zRLce})oM_5=G3x6bk%a!FoSg0a@DYvFo5)>Fi9|EGZ)P&VO_vh!&SqX#h$__$xy>C
z!XVC&!Yt0PkTH(~CR)o~!coIn!&SrG$|MOYUwLXcAmtw;Lk%~`WE+OUsBi|bD>*<V
zSPjbpt{O0_&>@_m22>!i_<<8CIGd+~0)m0zfxRa8E#{omydqG3ECQALx7dp_O4HJE
zQg3ml=9T8A7A2OXLQ*wzW?soH*5ZuBg4A0qpxkwfEw!Q~H7}(Ils7bai{wG^B?}@z
z#p5l;k|<tiTtgBhI2+z#P0laPE4jr~TyTpWDjmfMWyB}nVk$AX#Z+Q=i!l+LARq)N
z5fq6sFfeF=;!FYLct$=(7A7G^AtoUv5Y55J!Dz-Pz{mwo5`0Wx)tXF2pd1VKu_jxQ
zILK*WKeFbP<`z_fy#-DiVAT+-;AL46$kJOJHo5sJr8%i~pu)Tulq^9uaWHYI0s!98
B+=2iA

literal 0
HcmV?d00001

diff --git a/IO_utils/__pycache__/statistics_utils.cpython-37.pyc b/IO_utils/__pycache__/statistics_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4e069e116665318b8fc45e73187d04699eba9aa
GIT binary patch
literal 3419
zcmZ?b<>g{vU|?Xr)SK)s!octt#DQTZ1_lNP1_p*=9|i`76owSW9EK=HFwGRj45nFf
zS)*7PL2^tvEV*n^Y+yEH4to@P3Udle3quq~3Tp~m3qurV3VRAi3qurF3S%&XCg)3#
z9e$dOw>a`ja|<esiZYWKK|%}+4EziX3~USx49*~%l^7WqN*J;jYZ$VaQW$#~YZ=oS
zYMIg*YMIj+YFW}5YFR7pl`xmE)G#zNHZ!^~#GZ>`s%5KXFJaAMOJR~^sO6~PC}CZ|
zUc*qsF3GTvv6(T2S(2favxH*-Cq!%kR|?BQ#)XWvTs2G!xNEr585c4#GL&#E;6an;
zu3=ulUBeBN&*DXv2l=v>sg|dPWdU~$4@6B3R|<1BbJ3$39td5+m&KnYkR_PHUc*(x
zlfu!<9K%%0Tg#Wnk-`aK)vzw$uHmiWOJ{<ZP^c5mP{UNin8IYkP{UHgn8IblP{Ufo
zn8IztP{Ulqn8IVjPy-52UQkf7X6B`&R@`FAO-;-z0)^%+_OzV*#1dnpmmr=d<1Lny
zl>D^xG%$E)ugP?aIVV5)7He^GVos_i6Ic(JxW!ytl2RpMpl4~Sker{FmYI^8mz=7Q
znOBlpRF;@?i!C=bB{MNkQ|J~`LCP&om&B4px1z+{)LZPy`8lPzdBwLlQ}ar5Q;QNy
zQg5;3WEPj);wZ{b%1TWxDZa&66vgcZa#vVta!G#CEvCGJTkLs>c_3%sVoNM8sVqpn
z#g<x8lA4!tixVsXvg{UHUSeKxNealhnFW=%Sc^*%ONwu?<mDIT-r|5cO_To?Q%c$`
zw*1nPg3^*GR*<2^QS1=+7DurZm!uTmVoJ`u#h7`E4dU1+wjzisu;QX376t}}B5n`?
z${A5?$(ivv`Q=gU$(ivPnduoT8H$t`7#My9Ia|eq7N-^!$K)m!m1O26CmO~CWVplx
z6y<~c8RO^~A6!zDpPd@x=ouUzl$u_YT3j6C=^tNOl9^K+19E<5aY<%!F+@PGpz@YP
za(-?>X-R5)Qetstay&u_2PiM-fiegq9}^2B7&3`5axscAi7@gog0KW552FC16cY!d
z2onb*7%~;9FfcGAgFFP%Dvg}64lto*EO2H@XQ*XMXQ*XQXQ<^!XQ<^&XQ<^$XQ<^)
zXQ<_wz*OOdoYf{U#acj<EprX8Btr^QHq!*gB8?K(8ipFi6y|KE8ipD^Nd^$hfkBd?
znX#6?hIIj34L>At*KjUiOJRnvp?X-b=n<%4TfkN$08v-Ng{FrIWIBdDf;H?5*lGl!
zdblz4V6#W4hGPL+jSy514?<6&T?zXF4w#=L!G2o6nZgP#G=#xE5{Bq1Vb9{irY?mg
zg{_yVRs`hZ8WEUUsF^I;OlW3`f_*FsQwKE@O&!!sF;Ixqh{4o?%>;)DnwjF@5D|x|
z1DlDV4r-=E4fg`J8VQ(MsF`T?FJ!Eh1c#U;OdZrrG<8rjrD}K<u+>OG)E3%>gNiYh
z6m}bi8nzmi6b>7P8ul8N6iyq48jczka52YO!vZemxN2Cy#T<7HOA4<ILk&+2O9~&T
zI1Fac<oDBLEMjJ0V7SGcoLB-bx{5$mM-eLn1A``G5jz6|Llh?{cgN=z6&oO!hEe<w
z(fp#4#GK5MN&|EO!zhm8k`#zG7|Sq<4=jezgvvM6<Sya?6&$=Ef)7N1DzhR1unTii
z^NPekV!|Lo1Vo5}2nmo|rHf=iida%|gMx2y=E7{a#h;v5lA4}hl$lx_4=P8a1R*6`
zd{Sl^sMtv?NsVFyd8OD8N*h6GV<>GB#R@9)3?Y;egffOuCPfM$7bt>)i7h$5G_S-k
z3PKw}X=5mD0x3!j!6m36gkuEe7(qD3V2&|_V*=)wfD1|pp$zh!47_*)IX*M7Bo(QM
z(*PB537~R{2~os>3pz#?CMhO9CK*N^Mji~3j|C*I0F}X{1>l7txJYE=U@FoA6^fu#
z1F}qzfq?;3x`JC9J&X(tu(rhn#)^s(h7v|t%VGj!Y+4LcEo&`X4MPo^B*Oxxg$xUr
z7cvz2)G#k#NnwN}?HZN^EGe+sTcU=kh80o(7JUJ=NFX|E7*kk!nQGa=#R_{C=R(F5
z)@+8NTQ%%iT%a~f3R5;q(FHJ%4P30U6kRCc&f-a72eoHPco*<3WME_{;V)rdAW*|m
z!@fXpAwvpB3TH1<3LB`610tIlYdKT6YPkwOr7+cUrZCoW<w=wXWeG13sbOEp7{gM_
zUCUX^Q_GveUCWhM0(B=(HroWoA}6>j*+4~8EgziE1m^RD%@M8PTOhWOp@ttsE@X;f
zsuif^OyRBN%2TUhOW^}o1z{;{P`ian#B116_@VX)G_y1@f<g_}+5&}I4JRZ73njuq
zZ5`$`reFq5LBCrpe))N+kX)?_$|+3X3||DwT`xh|9GvT`xYII=Gg6E6N>YnU(n0as
za(%g@CSw$5d45rLaY15oY8AH|Scw8iNfozqMy76YVQFGfYKkWFE#}O;l3T3c*3vEJ
z{Gya9wh&#Ax?4=1DODoDC5a{AsyZ<TtXm;1zo?2oFTX?~Gq)foH8(Y{BsE1-q6m@M
z*)ogsN^^2<F(>BaKr%h3T)4%NT$Ep2T#}en1S$h=G3RFH-C`?C%qdMRzQtOckywyg
z1WJ!ZLZDm;YJK10NK7utEK4j&Edtg4w^$R4iV`btag^l8gWJtTk|32*p!_S4oRMi1
zpPZjpl9`vDnwMM|#aROKLVRXk$}P5(qWprq#9LfpAj@2fit>vz1tCQaTS;P3dTL3L
zCMbup=cblq<fjzh;wXrRxC^O-5JWOOz9cazCp8KhrXi`tB|*0s3qTD+kp3tZkYjE!
zr{t&I;z+G1NKGzDO}WLGc#ARV77M6V11S_h?WiJf$-tGKS`uFXwlNn}4%mUB5Y)_J
zl4247<$FdZCLTs1W&uVS7A{5(MkOXTCKg6EW)Vg<CO#$sP{W6jjhTy)g^`U(h>3?$
zfT_q0lvucLu|mQL9NugNiFqlB#o#hJinpks(nt^1DuskQD99l}3Tilk3kt9~NX|vL
hnZqVGKczG$)eh9SE(WDXPzl4s$iv9N%%Q;H3IKP3VR!%l

literal 0
HcmV?d00001

diff --git a/IO_utils/clean_table.py b/IO_utils/clean_table.py
new file mode 100644
index 0000000..cf39a30
--- /dev/null
+++ b/IO_utils/clean_table.py
@@ -0,0 +1,321 @@
+import pandas as pd
+import numpy as np
+
+
+def clean_table(excel_dir, pre_mRS=2):
+    """
+    This method creates a clean list processing parameter by parameter in the original list to create
+    a list that can be directly processed and save it
+    :param excel_dir: path of the excel list
+    :return:
+    """
+    print('###############################################################')
+    print('##############Cleaning table ##################################')
+    print('###############################################################')
+
+    input_df = pd.read_excel(excel_dir)
+    # Remove first row (info about notation) and last row (empty)
+    input_df = input_df.drop([0, input_df.shape[0] - 1], axis='rows')
+    print('######## Initial table with {} patients and {} feature'.format(
+        input_df.shape[0], input_df.shape[1]))
+    # Middle cerebral stroke selection
+    Territoium = input_df['Territoium']
+    list_territoium = [Territoium[Territoium == i].count() for i in [0, 1, 2]]
+    print(
+        '######## Anterior circulation: {}, Middle circulation {}, Posterior circulation: {}'.format(*list_territoium))
+    mc_df = input_df[input_df['Territoium'] == 1]
+    mc_df = mc_df.drop(['Territoium'], axis=1)
+
+    print('######## Select {} patients with middle circulation stroke'.format(mc_df.shape[0]))
+
+    mc_df = mc_df[mc_df['mRS90d'].notna()]
+    print('######## Select {} patients with valid output'.format(mc_df.shape[0]))
+    # Remove patients with shift in mRS <0
+    shift = mc_df['mRS90d'] - mc_df['pre-mRS']
+    mc_df = mc_df[shift >= 0]
+
+    print('######## Select {} patients with non-negative shift'.format(mc_df.shape[0]))
+    # Remove patients with pre-mRS > 2
+    mc_df = mc_df[mc_df['pre-mRS'] <= pre_mRS]
+
+    print('######## Select {} patients with mRS < {}'.format(mc_df.shape[0], pre_mRS))
+    # Remove patients not treated with either trhombectomy or stenting
+    #rm_indices = mc_df[(mc_df['Stenting '] == 0) & (mc_df['Thrombektomie'] == 0)]
+    #print('######## Remove {} patients not treated with either stenting or thrombectomy'.format(rm_indices.shape[0]))
+    mc_df = mc_df[(mc_df['Stenting '] == 1) | (mc_df['Thrombektomie'] == 1)]
+    # Meta data: id, age and sex
+    print('######################################## 1- Meta data preprocessing ...')
+    mc_df = clean_meta_data(mc_df)
+
+    # NCCT values preprocessing
+    print('######################################## 2 - NCCT values preprocessing ...')
+    mc_df = clean_ncct_data(mc_df)
+
+    # CTP values preprocessing
+    print('######################################## 3 - CTP values preprocessing ...')
+    mc_df = clean_ctp_data(mc_df)
+
+    ##### CTA preprocessing
+    print('######################################## 4 - CTA values preprocessing ...')
+    mc_df = clean_cta_data(mc_df)
+
+    ###### Treatment preprocessing
+    print('######################################## 5 - Treatment values preprocessing ...')
+    mc_df = clean_treatment_data(mc_df)
+
+    #### Control CT preprocessing
+    print('######################################## 6 - Control CT values preprocessing ...')
+    mc_df = clean_ControlCT_data(mc_df)
+
+    #### Preprocessing clinical data
+    print('######################################## 7 - Clinical values preprocessing ...')
+    mc_df = clean_clinical_data(mc_df)
+
+    #### Remove dates
+    print('######################################## 8 - Remove dates ...')
+    mc_df = remove_dates(mc_df)
+    print('######## Clean table with {} patients and {} features'.format(mc_df.shape[0], mc_df.shape[1]))
+
+    # mc_df.to_excel(out_dir, columns=mc_df.columns, index=False)
+
+    print('###############################################################')
+    print('###############################################################')
+
+    return mc_df
+
+
+def remove_dates(mc_df):
+    mc_df = mc_df.drop(['Geburtsdatum', 'Aufnahmedatum UKER', 'Beginn Angio', 'Zeitpunkt Rekanalisation',
+                        'Puncture-to-Rekan', 'Zeitpunkt Verlaufs-CT \n(post 24h)', 'Zeit von Rekan bis Kontrolle',
+                        'Puncture-to-Rekan', 'Zeit von Bildgebung bis Rekan', 'Zeitpunkt 1. Bildgebung',
+                        'Symptom onset'], axis=1)
+    return mc_df
+
+
+def clean_meta_data(mc_df):
+    # Rename columns names
+    mc_df = mc_df.rename(columns={'Nummer': 'Id', 'Sex (0=männlich, 1=weiblich': 'Sex', 'Alter': 'Age'})
+
+    # Check that ages are correct
+    assert np.floor(mc_df['Age']).equals(
+        np.floor((mc_df['Aufnahmedatum UKER'] - mc_df['Geburtsdatum']) / np.timedelta64(1, 'Y')))
+
+    # Replace float Age by int Age
+    print('#### 1.1- Convert Age to int')
+    mc_df['Age'] = np.floor(mc_df['Age'])
+
+    # Remove modality of first image (all are CT)
+    mc_df = mc_df.drop(['1. Bildgebung UKER', '1. Bildgebung CT'], axis=1)
+
+    return mc_df
+
+
+def clean_ncct_data(mc_df):
+    mc_df = mc_df.rename(columns={'e-ASPECTS bzw pc-ASPECTS': 'e-ASPECTS',
+                                  'Volumen e-ASPECTS': 'Volume e-ASPECTS',
+                                  'IVCgesund': 'IVC gesund',
+                                  'ICV-Index': 'ICV Index',
+                                  'Ort Gefäßverschluss \nbei Aufnahme (proximalster)': 'Vessel Occlusion Location Admission',
+                                  'Seite Gefäßverschluss \nbei Aufnahme': 'Vessel Occlusion Side Admission',
+                                  'Symptomatische/vorgeschaltete Gefäßstenose bei Aufnahme': 'Vessel Stenosis',
+                                  'Ort symptomatische/vorgeschaltete Gefäßstenose bei Aufnahme': 'Vessel Stenosis Location',
+                                  'Gefäßdissektion bei Aufnahme': 'Arterial Dissection',
+                                  'Ort Gefäßdissektion': 'Arterial Dissection Location',
+                                  'ASPECTS oberfl/tief': 'ASPECTS oberfl_tief'
+                                  })
+
+    ASPECT_areas = ['C_br', 'IC_br', 'INS_br', 'L_br', 'M1_br', 'M2_br', 'M3_br', 'M4_br', 'M5_br', 'M6_br']
+    for area in ASPECT_areas:
+        mc_df[area] = replace_values(mc_df[area], [9, 'n/a'], [0, np.nan])
+
+    deep_areas = ['C_br', 'IC_br', 'L_br']
+    superficial_areas = ['INS_br', 'M1_br', 'M2_br', 'M3_br', 'M4_br', 'M5_br', 'M6_br']
+    for index, patient in mc_df.iterrows():
+        deep_stroke = patient[deep_areas].values
+        superficial_stroke = patient[superficial_areas].values
+        if any(deep_stroke) == 1:
+            if any(superficial_stroke) == 1:
+                a = 2
+            else:
+                a = 1
+
+        elif any(superficial_stroke) == 1:
+            if any(deep_stroke) == 1:
+                a = 2
+            else:
+                a = 0
+        else:
+            a = 3
+
+        mc_df.loc[index, 'ASPECTS oberfl_tief'] = a
+
+    mc_df['e-ASPECTS'] = replace_values(mc_df['e-ASPECTS'], [11], [np.nan])
+
+    mc_df['pc-ASPECTS'] = replace_values(mc_df['pc-ASPECTS'], [0], [np.nan])
+    mc_df['Volume e-ASPECTS'] = replace_values(mc_df['Volume e-ASPECTS'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_ctp_data(mc_df):
+    # Two patient with missing CTP,
+    # One -only patient with a image missing+ missing mRS+ a lot of data (posterior circulation)
+    # Second also posterior circulation
+    # mc_df['1. Bildgebung CT-P'] = replace_values(mc_df['1. Bildgebung CT-P'], ['n/a', 0], [np.nan, np.nan])
+    # mc_df = mc_df.drop(mc_df[mc_df['1. Bildgebung CT-P'] == np.nan].index)
+    # Remove CTP - if using posterior circulation I have to modify this
+    mc_df = mc_df.drop(['1. Bildgebung CT-P'], axis=1)
+
+    mc_df = mc_df.rename(columns={'Mismatch Volumen nach RAPID': 'Mismatch Volume',
+                                  'Mismatch Ratio nach RAPID': 'Mismatch Ratio',
+                                  'HypoperfusionIndex (Tmax10s/&max6.5s': 'Hypoperfusion Index',
+                                  'CBV Index (rCBV in Tmax>6': 'CBV Index',
+                                  'CBF <30% lesion volume': 'CBF_lower30_volume',
+                                  'Tmax >6s lesion volume': 'Tmax_greater6s_volume'
+                                  })
+
+    list_pct_features = ['CBF_lower30_volume', 'Tmax_greater6s_volume', 'Mismatch Volume',
+                         'Hypoperfusion Index', 'CBV Index']
+
+    for feature in list_pct_features:
+        mc_df[feature] = replace_values(mc_df[feature], ['n/a'], [np.nan])
+
+    # Infinite value is not replaced
+    # mc_df['Mismatch Ratio'] = replace_values(mc_df['Mismatch Ratio'], ['n/a', 'none'], [np.nan, np.nan])
+    print('#### 3.1 - Calculate inverse mismatch ratio - avoid infinite')
+    mc_df['Inverse Mismatch Ratio'] = mc_df['CBF_lower30_volume'] / replace_values(mc_df['Tmax_greater6s_volume'],
+                                                                                   [0], [0.001])
+
+    print('#### 3.2 - Remove Mismatch Ratio ')
+    mc_df = mc_df.drop(['Mismatch Ratio'], axis=1)
+    # Delete values from Brainomix, only in patient from 2018
+    print('#### 3.3 - Remove features obtained with Brainomix (only in patients from 2018) ')
+    mc_df = mc_df.drop(['Braino CBF<30%', 'Braino Tmax >6s', 'Braino Mismatch vol', 'Braino Hypoperfindex'], axis=1)
+
+    return mc_df
+
+
+def clean_cta_data(mc_df):
+    # All patients have CTA - remove
+    mc_df = mc_df.drop(['Akute DSA '], axis=1)
+
+    mc_df = mc_df.rename(columns={'Gefäßverschluss DSA ': 'Vessel Occlusion CTA'
+                                  })
+
+    # Remove empty column
+    mc_df = mc_df.drop(['TTP lesion volume'], axis=1)
+    print('#### 4.1 - Remove tandem stenosis feature- no patient with it')
+    # Remove empty column
+    mc_df = mc_df.drop(['Tandemstenose'], axis=1)
+
+    # Replace n/a
+    for feature in ['Tan Score', 'Coves Score', 'BATMAN', 'Clot Burden Score']:
+        mc_df[feature] = replace_values(mc_df[feature], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_treatment_data(mc_df):
+    mc_df = mc_df.rename(columns={'Thrombektomie': 'Thrombectomy',
+                                  'ggf. weiteres Device': 'PTA',
+                                  'Anzahl der Manöver': 'Number Maneuver',
+                                  'Puncture-to-Rekan in min.': 'Time_Puncture_to_Recan.',
+                                  'frustrane Rekanalisation': 'Frustrated Recanalization',
+                                  'Gefäßverschluss nach Rekanalisation': 'Vessel Occlusion after Recan.',
+                                  'TICI ': 'TICI',
+                                  'Lyse i.a.': 'Lysis i.a.',
+                                  'Lysemenge': 'Lysis quantity',
+                                  'Gefäßverschluss in neuem Versorgungsgebiet \n(während/nach Intervention)': 'Vessel Occlusion new SupplyArea',
+                                  'Lokalisation Gefäßverschluss': 'Vessel Occlusion new SupplyArea Location',
+                                  'Behandlung (des neuen Gefäßverschlusses)': 'Vessel Occlusion new SupplyArea Treatment',
+                                  'Infarkt in neuem Versorgungsgebiet': 'Infarct new SupplyArea',
+                                  'Stenting ': 'Stenting'
+                                  })
+
+    mc_df['Device'] = replace_values(mc_df['Device'], [0], [np.nan])
+    mc_df['PTA'] = replace_values(mc_df['PTA'], [np.nan], [0])
+    mc_df['Time_Puncture_to_Recan.'] = replace_values(mc_df['Time_Puncture_to_Recan.'], ['#ZAHL!'], [np.nan])
+    mc_df['Lysis i.a.'] = replace_values(mc_df['Lysis i.a.'], [9], [0])
+    mc_df['Lysis quantity'] = replace_values(mc_df['Lysis quantity'], [np.nan, 'n/a'], [0, 0])
+    mc_df['Infarct new SupplyArea'] = replace_values(mc_df['Infarct new SupplyArea'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_ControlCT_data(mc_df):
+    # Missing in 5 patients: 4 died? during recanalization, 1 because new recanalization was performed
+    # Remove empty column
+
+    mc_df = mc_df.rename(columns={'Zeit von Rekan bis Kontrolle in min.': 'Time_Recan_to_Control',
+                                  'Zeit von Bildgebung bis Rekan in min': 'Time_CT_to_Angio.',
+                                  'lakuärer Infarkt (keine Kortexbeteiligung und Infarkt ≤1,5cm '
+                                  '(≤2,0cm falls mittels DWI in MRT gemessen) im größten Durchmesser f'
+                                  'alls ausschließlich subkortikal)': 'Lacunar Infarct',
+                                  'Infarktvolumen Verlaufs-CT': 'Infarct Volume ControlCT',
+                                  'Hyperdense Mediazeichen': 'Hyperdense Media Sign',
+                                  'pc-Aspect verlaufss-CT': 'pc-Aspect ControlCT',
+                                  'Aspect Verlaufs-CT': 'Aspect ControlCT',
+                                  })
+
+    mc_df = mc_df.drop(['Dauer der Rekan'], axis=1)
+    mc_df['Time_CT_to_Angio.'] = replace_values(mc_df['Time_CT_to_Angio.'], ['#WERT!'], [np.nan])
+
+    mc_df['Infarct Volume ControlCT'] = replace_values(mc_df['Infarct Volume ControlCT'], ['n/a'], [np.nan])
+    mc_df['pc-Aspect ControlCT'] = replace_values(mc_df['pc-Aspect ControlCT'], ['n/a', 0], [np.nan, np.nan, ])
+    mc_df['Aspect ControlCT'] = replace_values(mc_df['Aspect ControlCT'], ['n/a'], [np.nan])
+
+    return mc_df
+
+
+def clean_clinical_data(mc_df):
+    #
+    mc_df = mc_df.rename(columns={'Symptom onset / LSN': 'Symptom onset',
+                                  'unknown onset(1=onset unbekannt, LSN angegeben)': 'Unknown Onset'
+                                  })
+
+    #
+
+    # Create dichotomized functional outome
+
+    print('#### 7.1 - Add dichotomized mRS')
+    mc_df['dmRS'] = [1 if i > 2 else (np.nan if np.isnan(i) else 0) for i in mc_df['mRS90d']]
+    print('#### 7.2 - Add  mRS shift')
+    mc_df['shift_mRS'] = mc_df['mRS90d'] - mc_df['pre-mRS']
+    print('#### 7.3 - Add mortality')
+    mc_df['mortality'] = [1 if i == 6 else (np.nan if np.isnan(i) else 0) for i in mc_df['mRS90d']]
+
+    mc_df['Unknown Onset'] = replace_values(mc_df['Unknown Onset'], [np.nan], [1])
+    mc_df['Zeitpunkt 1. Bildgebung'] = pd.to_datetime(mc_df['Zeitpunkt 1. Bildgebung'])
+    mc_df['Time_Onset_to_Admission'] = (mc_df['Zeitpunkt 1. Bildgebung'] - mc_df['Symptom onset']).dt.seconds / 60
+    #mc_df.loc[ mc_df['Unknown Onset']==1,'Time_Onset_to_Admission'] = -1
+
+    # Handle unknown times
+    # u = (mc_df['Unknown Onset'] == 1).values
+    # mean = np.nanmedian(mc_df['Time_Onset_to_Admission'][u])
+    # mc_df['Time_Onset_to_Admission'] = replace_values(mc_df['Time_Onset_to_Admission'], [np.nan], [mean])
+
+    return mc_df
+
+
+def replace_values(column, original_values=[], target_values=[]):
+    assert len(original_values) == len(target_values), 'Length of both lists have to be the same'
+
+    for (i, j) in zip(original_values, target_values):
+        column.replace(i, j, inplace=True)
+
+    return column
+
+
+def get_values(column):
+    unique_values = column.unique()
+    values = []
+    number = []
+
+    for i in unique_values:
+        values.extend([i])
+        if pd.isna(i):
+            number.extend([column.isna().sum()])
+        else:
+            number.extend([column[column == i].count()])
+
+    print(values, number)
diff --git a/IO_utils/mv_strategies.py b/IO_utils/mv_strategies.py
new file mode 100644
index 0000000..ac266c8
--- /dev/null
+++ b/IO_utils/mv_strategies.py
@@ -0,0 +1,258 @@
+import copy
+from mrmr import mrmr_classif
+import numpy as np
+import pandas as pd
+from sklearn.feature_selection import RFE
+from sklearn.ensemble import RandomForestClassifier
+
+from IO_utils.statistics_utils import get_pvalue
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+
+
+def select_features_pvalue(df, data_dictionaries, output_vector, fold_indices, k=10):
+    # Get types of features -- different methods to compute p value
+    types = []
+    for t in data_dictionaries.keys():
+        if t not in ['Id']:
+            types.extend([data_dictionaries[t]['type']])
+    all_p_values = np.zeros((df.shape[1] - 1))
+    for indices in fold_indices:
+        _, _, test_indices = indices
+        drop_indices = df.index[test_indices]
+        df_copy = copy.copy(df.drop(drop_indices))
+
+        p_values, _ = get_pvalue(df_copy.drop(columns=['Id']), output_vector, types)
+        all_p_values += np.array(p_values)
+
+    #sorted_indices = np.argsort(all_p_values)
+    #sorted_values = np.sort(all_p_values)
+    #for i, v in zip(sorted_indices, sorted_values):
+    #    print(df.columns[i + 1], 'P_value: {} '.format(v))
+
+    smaller_indices = np.argsort(all_p_values)[:k]
+    indices = [True if a in smaller_indices else False for a in range(df.shape[1] - 1)]
+    indices = [False] + indices
+    selected_features = df.columns[indices]
+
+    print("Features selected (p-value): {} ".format(selected_features.values))
+
+    return selected_features
+
+
+def select_features_RF(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_imp = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=100, random_state=0)
+        clf.fit(features, output)
+        all_features_imp = all_features_imp + clf.feature_importances_
+
+    # plt.figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_imp, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=False)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # plt.show()
+    print("Features selected (Ranfom-forest): {} ".format(features))
+
+    return features
+
+
+def select_features_RFE(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=100, random_state=0)
+        rfe = RFE(estimator=clf, n_features_to_select=k, step=1)
+        rfe.fit(features, output)
+        all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=True)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+
+    # plt.show()
+    print("Features selected (RFE): {} ".format(features))
+
+    return features
+
+
+def combined_selection(df, data_dictionaries, output_vector, fold_indices, k=10):
+    # Get types of features -- different methods to compute p value
+
+    ### 1 - Select k*2 features by p value
+    types = []
+    for t in data_dictionaries.keys():
+        if t not in ['Id']:
+            types.extend([data_dictionaries[t]['type']])
+    all_p_values = np.zeros((df.shape[1] - 1))
+    for indices in fold_indices:
+        _, _, test_indices = indices
+        drop_indices = df.index[test_indices]
+        df_copy = copy.copy(df.drop(drop_indices))
+
+        p_values, _ = get_pvalue(df_copy.drop(columns=['Id']), output_vector, types)
+        all_p_values += np.array(p_values)
+
+    sorted_indices = np.argsort(all_p_values)
+    sorted_values = np.sort(all_p_values)
+    for i, v in zip(sorted_indices, sorted_values):
+        print(df.columns[i + 1], 'P_value: {} '.format(v))
+
+    smaller_indices = np.argsort(all_p_values)[:k * 2]
+    indices = [True if a in smaller_indices else False for a in range(df.shape[1] - 1)]
+    indices = [False] + indices
+    selected_features = df.columns[indices]
+
+    print("Features selected (p-value): {} ".format(selected_features.values))
+
+    global all_featuress_imp
+    df_reduced = df[selected_features]
+    FP = FeaturePreprocessing(df_reduced, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        clf = RandomForestClassifier(n_estimators=5, max_depth=5, random_state=0)
+        rfe = RFE(estimator=clf, n_features_to_select=k, step=1)
+        rfe.fit(features, output)
+        all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=True)
+    for next_feature in sorted_value.index:
+
+        feature = next_feature.split('#')[0]
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+
+    # plt.show()
+    print("Features selected (RFE): {} ".format(features))
+
+    return features
+
+
+def select_features_MRMR(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+
+    all_features_ranking = np.zeros((all_feature.shape[1]))
+
+    for indices in fold_indices:
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        selected_features = mrmr_classif(features, output, K=k)
+        for counter, i in enumerate(selected_features):
+            all_features_ranking[i] += len(selected_features) - counter
+        #
+        # all_features_ranking = all_features_ranking + rfe.ranking_
+
+    names = FP.get_feature_names()
+    feat_imp = pd.Series(all_features_ranking, index=names)
+
+    # Get original features
+    features = []
+    sorted_value = feat_imp.sort_values(ascending=False)
+    for next_feature, next_value in zip(sorted_value.index, sorted_value.values):
+
+        feature = next_feature.split('#')[0]
+        # print(feature,  next_value)
+        if feature not in features:
+            features.extend([feature])
+        if len(features) == k:
+            break
+
+    # Reorder features
+    features = [f for f in df.columns if f in features]
+    # plt.show()
+    print("Features selected (MRMR): {} ".format(features))
+
+    return features
+
+
+def select_features_MRMR_fold(df, data_dictionaries, output_vector, fold_indices, k=10):
+    global all_featuress_imp
+    FP = FeaturePreprocessing(df, data_dictionaries)
+    all_feature = FP.create_features(df)
+    all_output = output_vector.to_numpy(dtype=int).squeeze()
+    all_features = []
+
+    for i, indices in enumerate(fold_indices):
+        train_indices, val_indices, test_indices = indices
+
+        features = copy.copy(all_feature)[train_indices + val_indices, :]
+        output = copy.copy(all_output)[train_indices + val_indices]
+
+        selected_features = mrmr_classif(features, output, K=k)
+        names = FP.get_feature_names()
+        selecte_features_names = [names[j].split('#')[0] for j in selected_features]
+        s = [f for f in df.columns if f in selecte_features_names]
+        all_features.append(s)
+
+        print("Features selected in fold {} (MRMR): {} ".format(i, s))
+
+    return all_features
diff --git a/IO_utils/split_utils.py b/IO_utils/split_utils.py
new file mode 100644
index 0000000..bcd345d
--- /dev/null
+++ b/IO_utils/split_utils.py
@@ -0,0 +1,63 @@
+import copy
+import random
+import numpy as np
+
+def split_data_cv(output, seed, cv=5, p=1):
+    np.random.seed(seed)
+    random.seed(seed)
+    classes = np.unique(output)
+    indices_per_class = [np.where(output == c)[0] for c in classes]
+
+    # Make train and test splits with same number of classes
+    p_split = np.ones(cv) * 1 / cv
+    all_indices = split_stratified(p_split, classes, indices_per_class)
+    folds_indices = []
+
+    for fold in range(cv):
+        cv_test_indices = copy.copy(all_indices[fold])
+        cv_train_folds = copy.copy(all_indices)
+        cv_train_folds.pop(fold)
+
+
+        cv_train_val_indices = [item for sublist in cv_train_folds for item in sublist]
+        cv_train_val_indices_array = np.array(cv_train_val_indices)
+
+        output_train_val = output[cv_train_val_indices]
+        indices_train_val_per_class = [cv_train_val_indices_array[np.where(output_train_val == c)[0].tolist()] for c in classes]
+        train_val_all_indices = split_stratified((0.8, 0.2), classes, indices_train_val_per_class)
+
+        #random.shuffle(cv_train_val_indices)
+        #sp = int(0.8 * len(cv_train_val_indices))
+        #cv_train_indices = cv_train_val_indices[:sp]
+        #cv_train_indices = cv_train_indices[:(int(p * len(cv_train_indices)))]
+        #cv_val_indices = cv_train_val_indices[sp:]
+
+        cv_train_indices = copy.copy(train_val_all_indices[0])
+        cv_val_indices = copy.copy(train_val_all_indices[1])
+
+        folds_indices.append([cv_train_indices, cv_val_indices, cv_test_indices])
+
+    return folds_indices
+
+def split_stratified(p_split, classes, indices_per_class):
+    t = [0] * len(classes)
+    indices_split = []
+
+    for c in (range(len(classes))):
+        random.shuffle(indices_per_class[c])
+
+    for count, sp in enumerate(p_split):
+
+        indices = []
+
+        for c in range(len(classes)):
+            indices_c = indices_per_class[c]
+            t0 = t[c]
+            t1 = np.int(t0 + (indices_c.shape[0] * sp) + 0.5) if count != len(p_split) - 1 else indices_c.shape[0]
+            a = list(indices_c[t0:t1])
+            indices.extend(a)
+            t[c] = t1
+        random.shuffle(indices)
+        indices_split.append(indices)
+
+    return indices_split
diff --git a/IO_utils/statistics_utils.py b/IO_utils/statistics_utils.py
new file mode 100644
index 0000000..e7e58a9
--- /dev/null
+++ b/IO_utils/statistics_utils.py
@@ -0,0 +1,159 @@
+import numpy as np
+import pandas as pd
+import rpy2.robjects as robjects
+from rpy2.robjects import numpy2ri
+import scipy
+
+
+def compute_basic_statistics(df):
+    output = pd.DataFrame(index=df.columns)
+
+    means = []
+    medians = []
+    stds = []
+    ci = []
+
+    for i, column in enumerate(list(df)):
+        rmeans = robjects.r['mean']
+        meanr = rmeans(robjects.FloatVector(df[column]))[0]
+
+        mean = np.nanmean(df[column].astype('float32'))
+        means.extend([mean])
+        median = np.nanmedian(df[column].astype('float32'))
+        medians.extend([median])
+        std = np.nanstd(df[column].astype('float32'), ddof=1)
+        stds.extend([std])
+        if mean != 0 and std != 0:
+            ci_low, ci_high = scipy.stats.norm.interval(0.95, loc=mean, scale=std)
+        else:
+            ci_low, ci_high = 0, 0
+        ci.extend([[ci_low, ci_high]])
+
+    output['mean'] = means
+    output['std'] = stds
+    output['0.95 confidence interval'] = ci
+    output['median'] = medians
+
+    return output
+
+
+def compute_bivariate_statistics(df, dmRS, mortality, categories_list):
+
+    output_bivartiate = pd.DataFrame(index=df.columns)
+
+    means1 = []
+    means2 = []
+    means3 = []
+    means4 = []
+
+    stds1 = []
+    stds2 = []
+    stds3 = []
+    stds4 = []
+
+    for i, column in enumerate(list(df)):
+
+        if categories_list[i] == 'cat':
+
+            count1 = len(df[(dmRS == 0) & (df[column]==0)])
+            means1.extend([count1])
+            stds1.extend([0])
+
+            count2 = len(df[(dmRS == 1) & (df[column] == 0)])
+            means2.extend([count2])
+            stds2.extend([0])
+
+            count3 = len(df[(mortality == 0) & (df[column] == 0)])
+            means3.extend([count3])
+            stds3.extend([0])
+
+            count4 = len(df[(mortality == 1) & (df[column]==0)])
+            means4.extend([count4])
+            stds4.extend([0])
+
+        else:
+
+            mean1 = np.nanmean(df[dmRS == 0][column].astype('float32'))
+            means1.extend([mean1])
+            std1 = np.nanstd(df[dmRS == 0][column].astype('float32'), ddof=1)
+            stds1.extend([std1])
+
+            mean2 = np.nanmean(df[dmRS == 1][column].astype('float32'))
+            means2.extend([mean2])
+            std2 = np.nanstd(df[dmRS == 1][column].astype('float32'), ddof=1)
+            stds2.extend([std2])
+
+            mean3 = np.nanmean(df[mortality == 0][column].astype('float32'))
+            means3.extend([mean3])
+            std3 = np.nanstd(df[mortality== 0][column].astype('float32'), ddof=1)
+            stds3.extend([std3])
+
+            mean4 = np.nanmean(df[mortality== 1][column].astype('float32'))
+            means4.extend([mean4])
+            std4 = np.nanstd(df[mortality == 1][column].astype('float32'), ddof=1)
+            stds4.extend([std4])
+
+    output_bivartiate['mean_mrs0'] = means1
+    output_bivartiate['mean_mrs1'] = means2
+    output_bivartiate['mean_mortality0'] = means3
+    output_bivartiate['mean_mortality1'] = means4
+
+    output_bivartiate['std_mrs0'] = stds1
+    output_bivartiate['std_mrs1'] = stds2
+    output_bivartiate['std_mortality0'] = stds3
+    output_bivartiate['std_mortality1'] = stds4
+
+    return output_bivartiate
+
+
+def get_pvalue(df, target, categories_list):
+    methods = []
+    p_values = []
+
+    for i, column in enumerate(list(df)):
+        # print(column)
+        if df[column].isnull().all():
+            methods.extend(['None'])
+            p_values.extend([1])
+            continue
+
+        if categories_list[i] == 'cat':
+            contingency_table = pd.crosstab(index=df[column], columns=target)
+            # This test should only be used if the observed and expected frequencies in each cell are at least 5
+
+            if np.min(contingency_table.values) < 5:
+                if contingency_table.shape[1] > 1 and contingency_table.shape[0] > 1:
+                    # oddsratio, p = scipy.stats.fisher_exact(contingency_table)
+                    # print(contingency_table)
+                    ## R Code
+                    FisherTestR = robjects.r['fisher.test']
+                    numpy2ri.activate()
+                    p = float(np.array(FisherTestR(contingency_table.to_numpy(), workspace=2e8)[0])[0])
+
+                    method = 'Fisher test'
+                else:
+                    p = 1
+                    method = 'None'
+
+            else:
+                chi2, p, dof, expected = scipy.stats.chi2_contingency(contingency_table)
+                method = 'Chi-squared'
+
+        elif categories_list[i] in ['int', 'float', 'ord']:
+
+            a = df[column][target == 0]
+            b = df[column][target == 1]
+            stat, p = scipy.stats.ttest_ind(a.dropna(), b.dropna())
+            method = 'T-test'
+
+        else:
+            if column == 'Id':
+                p = 0
+                method = 'None'
+            else:
+                raise ValueError(column, 'Statistical test for', categories_list[i], 'not implemented')
+
+        methods.extend([method])
+        p_values.extend([p])
+
+    return p_values, methods
diff --git a/Loss/Loss_uncertainty.py b/Loss/Loss_uncertainty.py
new file mode 100644
index 0000000..5c749b7
--- /dev/null
+++ b/Loss/Loss_uncertainty.py
@@ -0,0 +1,67 @@
+import torch
+
+
+def old_loss(y, f, s):
+    y = torch.squeeze(y)
+
+    """"
+    a1= torch.exp(-sigma)
+    a2= torch.log(f)
+    a3= torch.mul(y, a2)
+    a = torch.mul(a1, a3)
+    b = torch.mul(sigma, y)
+    loss = torch.div((-a+b),2)
+    loss = torch.sum(loss, dim=1)
+    """
+    ce1 = torch.log(f)
+    ce2 = torch.mul(ce1, y)
+    cross_entropy_error = torch.mean(ce2, dim=1)
+    sigma = torch.exp(-s)
+    term1 = -torch.mul(cross_entropy_error, sigma)
+    term2 = s / 2
+
+    loss = torch.div(term1 + term2, 2)
+
+    return loss
+
+
+class loss_uncertainty(object):
+
+    def __init__(self, weights):
+        self.total_loss = torch.zeros([1])
+        self.sum = torch.zeros([1])
+        self.elements = 0
+        self.weights = weights
+
+
+    def get_loss(self, f, y):
+        y = torch.squeeze(y)
+        assert y.shape[1] == self.weights.shape[0]
+
+        weights_vector = self.weights.expand([f.shape[0], -1])
+        weights_vector = torch.mul(weights_vector, y)
+        weights_vector = torch.sum(weights_vector, dim=1)
+
+
+        a = torch.mul(f, y)
+        ce2 = -torch.sum(a, dim=1)
+        class_weighted_ce2 = torch.mul(weights_vector, ce2)
+
+        self.update_total_loss(class_weighted_ce2, f.shape[0])
+
+        total_loss = torch.mean(class_weighted_ce2)
+
+        return total_loss
+
+    def update_total_loss(self, loss, elements):
+        self.sum = torch.sum(loss).detach().cpu().numpy()
+        self.elements += elements
+        self.total_loss = self.sum / self.elements
+
+    def get_total_loss(self):
+        return self.total_loss
+
+    def clear(self):
+        self.total_loss = torch.zeros([1])
+        self.sum = torch.zeros([1])
+        self.elements = 0
diff --git a/Loss/__pycache__/Loss_uncertainty.cpython-37.pyc b/Loss/__pycache__/Loss_uncertainty.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7655bee12f0561e92ed7baebd3a93b4325afb6dd
GIT binary patch
literal 1917
zcmZ?b<>g{vU|=vwn3s5rje+4Yhy%kc3=9ko3=9m#Dhvz^DGVu$ISf$@DNHHMEeuhN
z?hGj`DXc9FDXghX&CF5EDU87knrtsYdi|1_K@<Z61ITQUDb65MQWzK*N*ER})-Wt&
ztOc={Y8V$X)-sncEMTr-u3=cnRLfGrki}BNlERq6)XP-MTEeh^wT6isq@E3|inWGq
zArm)4Eqe`93bQmrEk_B%0=63V8V+%W6y}9YwVXAa!3>%#elHmr7#K7eZ!xE2=DuVC
zvDt62mgE;DXWU{hE-Xz=txCPcoRgn^i#fM6=N3zDYGU3k=G2M;kcKi%u3L<iw;0oI
zF&0NLC#M?TVopvqiV{vP$}cXCPt7YS$}gylPc16SFS^BAoSB}Rc#E|pwJ6sRLK)p+
z$;mG+Udd1-!oa}r%g@;=CbT%Us5mA!v8W_7FFDaLCLqHlCZH%kD>b>KIL6U4KDeYP
zKRY$X(K9$cC^fw(wYWIOCm%}0m*yp>7L_Ds=9N_H6;$5h$j?cM2U!3L)?y)0I5To_
zb20KT@_{i2lK@kZ7y|=CGDr%BK|uhHNfQPJhE#?q#uSDqCQvB1Fhnt@FsHDzFhsGW
zu%@uJFhsGYu%~deFhsGXaHepzFhsGZa0fGJ^4t;t*^2C(WRR&K8$dKD#=)_y1C8Yr
z#&pJoj5Q4LOrSW2^O#eZY8c{KYCtlqDGb33n#_JhAOniPQN~up%D}*Ii?u4XD8Kj?
zS4n<JVh-55x0s7db8m5^=A`DP=9LuRVlPk4OwTAO)?~WHQk<HTRwTo~010w%xEG0n
zjNyon&&<m#iI0~9r9TdkTN(Kn`54(4t0d8VqX$z9PrM-YAlHDsH3#ag8m5JewM;ck
zSxhO6k_<HrS<G2XDNK?K*-XVMB`i&hAdv;EHH;9^6y|i0dQcK)t6{ETTF3-S?pf?L
z%qfg1EWKdf0=61PunZ{ivx5^qOD`yqgOWc;7wbZ%TDBU71spYOFmo3&F@iD>XARpz
z##;6o_Fx80R-|Bj`Tzg_|C)?Nph(f=0taak8z?|ni!%}nQo&($i_fvRIJKxGGe6H2
zoR-;AD+&_xQi?#{E|LPN7A!4DNi0c?N5n!ACnzs*6v=`Dn-@gzFfcGg@j)XazAQDl
zB){kuV`33MNL(m6C$YFV9wM8X5}%xER3r(~1<D%8@syrg0*+->P&|QBF9RbNqZE@E
zC`mK(FbXj8FbOenF=2@{m`-@Cf&2`@;8?SO#Tw&6#sy3Z85S@vWLUtmkfDYlp0S33
zg&~WzhRK|Pi6Ne~h9L_ScdXJNIW};@Wc1TyhlCwCG}uy7OA?baZZRhpl-^>^E6pva
zERqI!2IM16W=NO_g9N~$s9r2mW?*0dxuzHt4-AYFjC_n$N*HMnBfyhExdNPS85kHq
zY_PXLX4EibfpQO{pC%(XOf?xHE<m;S79S{p5D{n#3SdxnV_>Y3$FLq%EsEXnAXS3~
zDLhAG&EMbvM9JB#kf4+R1rR92z)1`ofH+*vnw*oGSOhM$K>lR_=iDkW3`c{N`Dt?8
z;)svWOUzA;kH5tgAD^3_Qknx|^Tfv&mL}#vWQsrqK@ljGfg_|y6XY5#5P=9bFbh<C
z6p1r1Fo24cVo){4!NkGNA<V(b!3-AHWP$hxlvr=E<tKp(bFc&?S%VD$6Cjgrao9lI
QV+V@DVo=e|!OS560B2OUyZ`_I

literal 0
HcmV?d00001

diff --git a/Metrics/ClassificationMetrics.py b/Metrics/ClassificationMetrics.py
new file mode 100644
index 0000000..7e40387
--- /dev/null
+++ b/Metrics/ClassificationMetrics.py
@@ -0,0 +1,126 @@
+import numpy as np
+from sklearn.metrics import roc_auc_score
+import torch
+
+
+class ClassificationMetrics(object):
+
+    def __init__(self, classes):
+        self.classes = classes
+        self.cm = np.zeros((self.classes, self.classes))
+        self.accuracy = 0
+        self.balanced_accuracy = 0
+        self.recall = 0
+        self.precision = 0
+        self.f1 = 0
+        self.auc = 0
+        self.nll = 0
+        self.pred = []
+        self.y = []
+
+    def compute_metrics(self, pred, y):
+        """
+
+        :param pred:
+        :param y:
+        :return:
+        """
+
+        self.pred.extend([pred])
+        self.y.extend([y])
+
+        self.cm = confusion_matrix(self.cm, pred, y)
+        self.accuracy = np.sum(np.diagonal(self.cm) / np.sum(np.sum(self.cm)))
+        self.nll = -np.mean(np.sum(np.multiply(self.y[0], np.log(self.pred[0])), axis=1))
+        epsilon = np.finfo(float).eps
+        self.precision = [self.cm[i, i] / (np.sum(self.cm[:, i] + epsilon)) for i in range(self.classes)]
+        self.recall = [self.cm[i, i] / (np.sum(self.cm[i, :] + epsilon)) for i in range(self.classes)]
+        self.f1 = [(2 * self.precision[i] * self.recall[i]) / (self.precision[i] + self.recall[i] + epsilon)
+                   for i in range(self.classes)]
+        self.balanced_accuracy = np.mean(self.recall)
+
+        try:
+            if len(self.y[0].squeeze().shape) > 1:
+                a = np.argmax(self.y[0], axis=1)
+            else:
+                a = np.array(self.y[0]).squeeze()
+
+            # self.auc = roc_auc_score(a, b, multi_class=mc)
+            self.auc = self.custom_auc(self.pred[0], a)
+
+        except:
+            self.auc = 0.5
+
+        metrics = {'cm': self.cm,
+                   'accuracy': self.accuracy,
+                   'precision': self.precision,
+                   'recall': self.recall,
+                   'f1': self.f1,
+                   'balanced_accuracy': self.balanced_accuracy,
+                   'auc': self.auc,
+                   'nll': self.nll}
+
+        return metrics
+
+    def clear(self):
+        self.cm = np.zeros((self.classes, self.classes))
+        self.accuracy = 0
+        self.balanced_accuracy = 0
+        self.recall = 0
+        self.precision = 0
+        self.f1 = 0
+        self.auc = 0
+        self.pred = []
+        self.y = []
+
+    def multiclass_to_binary(self, pred):
+
+        binary_pred = np.zeros((pred.shape[0], 2))
+
+        prob_classs0 = np.divide(np.sum(pred[:, 0:2], axis=1), 3)
+        prob_classs1 = np.divide(np.sum(pred[:, 2:6], axis=1), 4)
+
+        binary_pred[:, 0] = prob_classs0
+        binary_pred[:, 1] = prob_classs1
+
+        return binary_pred
+
+    def custom_auc(self, prob, labels):
+
+        unique_labels = np.unique(labels)
+        all_auc = 0
+
+        for l, label in enumerate(unique_labels):
+
+            prob_l = prob[:, l]
+            class_indices = [i for i in range(len(labels)) if labels[i] == label]
+            rest_indices = [i for i in range(len(labels)) if labels[i] != label]
+
+            suma = 0
+            auc = 0
+            for i in class_indices:
+                for j in rest_indices:
+                    auc += (int(prob_l[i] < prob_l[j])) + (0.5 * int(prob_l[i] == prob_l[j]))
+                    suma += 1
+
+            all_auc += 1 - (auc / suma)
+
+        return all_auc / unique_labels.shape[0]
+
+
+def confusion_matrix(cm, pred, y):
+    # assert (pred.shape == y.shape), "Input size does not match target size"
+    for i in range(pred.shape[0]):
+        if len(pred.squeeze().shape) > 1:
+            a = np.argmax(pred[i, :])
+        else:
+            a = int(np.round(pred[i]))
+
+        if len(y.squeeze().shape) > 1:
+            b = np.argmax(y[i, :])
+        else:
+            b = int(y[i])
+
+        cm[a][b] += 1
+
+    return cm
diff --git a/Metrics/RegressionMetrics.py b/Metrics/RegressionMetrics.py
new file mode 100644
index 0000000..4b11007
--- /dev/null
+++ b/Metrics/RegressionMetrics.py
@@ -0,0 +1,55 @@
+import numpy as np
+from Metrics._utils import int_to_binary
+from Metrics.ClassificationMetrics import ClassificationMetrics
+
+
+class RegressionMetrics(object):
+
+    def __init__(self):
+        self.dif = []
+        self.mse = 0
+        self.mae = 0
+        self.median_ae = 0
+
+        self.pred = []
+        self.y = []
+
+        self.cm = ClassificationMetrics(classes=2)
+
+    def compute_metrics(self, pred, y):
+        """
+
+        :param pred:
+        :param y:
+        :return:
+        """
+
+        self.pred.extend([pred[:, 0]])
+        self.y.extend([y])
+
+        self.dif = np.abs(self.pred[0].squeeze() - self.y[0])
+        self.mae = np.mean(self.dif)
+        self.median_ae = np.median(self.dif)
+        self.mse = np.mean(np.power(self.dif, 2))
+
+        metrics = {'mae': self.mae,
+                   'mse': self.mse,
+                   'median_ae': self.median_ae
+                   }
+
+        binary_y = int_to_binary(y, threshold=2)
+        binary_pred = int_to_binary(pred[:, 0], threshold=2)
+        clas_metrics = self.cm.compute_metrics(binary_pred, binary_y)
+        metrics.update(clas_metrics)
+        # Convert to binary prediction
+
+        return metrics
+
+    def clear(self):
+        self.mae = 0
+        self.mse = 0
+        self.median_ae = 0
+
+        self.pred = []
+        self.y = []
+        self.cm.clear()
diff --git a/Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc b/Metrics/__pycache__/ClassificationMetrics.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c52322bd8b9f23331bd9504604d94943e5fea4f4
GIT binary patch
literal 4067
zcmZ?b<>g{vU|=wL+MTGw&%p2)#DQTJ1_lNP1_p*=3kC*;6owSW9EK>y6owS09Hw06
zC}u{87)un3J3|U{3QG$^3QH<$GjkMM3Tp~m3qurp3S%&XCi_c}d48IVw|I;4lj9Rh
zljDn%^NUiGLBcT11hdM7fq@|vY)uqX3S$aW3quq$#PBE<sIjam>?s^A3{h+;oGDyj
zYq*0MG<j}`I_D%77iXqrCMT9;=I8mQmK0?s7bk;^23Z55K|XQ@`6z^efuV-6h9RDz
zgmD2=4MP?K2&XYE1c@=HfJhcF$qFXfz$81E<N%YLV3I4Hp@t!z8%*+~Fa$GbGWiuT
zGB7Y`^4?-k2016S_!d)M!7bLR)S~?2TTIEhw>T1$lS_*dlPhltCMD)1<|U`5#KVQy
zic*skb8>ER78Io>XBKDX=iOpTGrYx|Sekr`IWH&Y7E3`<YRWCfN=>F)EXAogX+<C}
ztz;+?VqjqS73ORe6Iz^FR2-9=SX7dkmz-!A6OiE&6Ht_&m6}{q9OLL2A6!zDpPd@x
z=ouUzl$u_YT3j3h4XGHc;ip$nd5a@HJ~J<~BtD)46n0{u&}3v|6k%jTgH>|ajeu!S
zW(H|xU|?WnU|;~HI}pFPhmnDygE5O?0b>ngI^#k{Mi7r_0b>mlgjd4c!I;HT!&t-A
z%-q2k&r-s=fUSge0Xs-^AtRK>2H`DalxA4S*ufajQNp@_vxGGZq>`(HF^egMQIesA
zbpbcXCJ=ie<3gqsrWEF0riF~$3>}Q|JSD6PcuV*eGG_62GPE<aGo~@6u%xiIaFhsi
zFlGrhGrBM|gG?5J%CTXQ6NbvMW04a9xu}CNOBn1v(aM+-F_^m+h%aQwl4xd3VajHi
zz*wYG!kQ%slY_XimN^fkUkWC=kP)P(mbrs*fi%oVHOvc{IvC?+!Wr^}7#Tn?g#*No
zWXNNR0EH<_2V)jT2V<5H$Q2!oSt1>bS)v_`Su!1rSv)D6b2w^QYFL69G`XsrxVRLc
zz^WjzC^1(7l<cg~1uKz6ic(8Ti}DZx;B4>`oEI2xu_RVx7AJ!^pacNQiJ+7u3`$LA
z3=9mQ{1dBJ%LwvG4Py;s8j~bAUxN543@Hq)Oc1dS25|;Z3{PY#1i6#ZPm{Tboq>U&
z2$Y^~F&CHSYBJqo(lfZln5oHhi#@fVI5Q_duSgK21e8IFKsNs3vdPITE=kVMEwEDo
zxd7xGHijxo>}eOIuCydIJ{MA)=-K4tCnx3<+vy=pLGc(T0|Nudnqop8s{s|I46RHc
z&*Jr1kvOP8VJeaUc|#JymIAR+{Z%B*z`&r2@DPft*kG=*fd#ioEn^BOQ83gnN-~Im
zSd4HMSgeCF3oIhe0CJr)11!*Ag2GFasfZio1W+c{B+hASaHlb*gHizl!vlLw4oLig
zQUk)kA|8;@pnxpm1+n-*ewHobXJBBs#g<x8lA4!NBmm+FB<JU)m4Zsi_}s*jqRa}2
zX`s{q4zybwDVd4s`FV*sMIak)vE-&E=H24REzK#(EXb+6#hjC$UIa4z7He8&URwSw
z*0h}b#FAUgsRhNiSc?+#(o@0ZD>kRyV$Mm;yTx8ySelwzm3oV{I3uwj^%h%VQF?A-
z#VyvvqN2pgTU^Pd#U=T<ph^$y6-`!1Vgz{w;swUUTkO!J1unxinTil`3ra_~_|a0J
zC@363iHm`QkCBf_fKiRnh>?d;hEax*hf#o$kBNy<icyS7gjs@#jgg0mg%KHYF;%G&
zmCoU*2b5_+bqu)L^k86MC;=7J3|XK`lZliH6O_s0p%o@dRmlU+zD1xU3JGm+mIg&w
z5jYKi(ikKL!07^<B;Wz4$p{HiaP|fh;6P(d&Ph!yas&kxNRR=evZ|8C5ey)G@HPV|
zC4pQ6_IwM>^U&HP1?>G=rV@q)%q0w<8VXWYftoFlYz=C-^fINeE@Y}@#!$tS!iJ)X
zeIZjVOAT`k6U>w}CL4wt7C4_7%x4N_(By!$F^XhBnFEv@G#QIPmGVnab@~z%jxRwm
zrpbyFu$rL!!j_U*mYI?Y&Kt-{@fLSdW?o`ZWjv^qkK!pP%1??1w_b`3P<V#mBnlzG
z!7KvKN?@h&CHe6XQ+z<74vH!UCJ`nzMl~h@MggWOE$rcq%>Z~a0Tk7s*a!Km_!%_#
zI~W%-)-tCs*0NNbC}FB$235?ldt#VsSvwhEWoiv8s8p?GgI32(Da<V#CCnu(9gNM4
zAob0Rwd_zi7A$fcDU7w8DU7vT6+t!ZE)20=wcHghH5@Jsu{O0lHC!dEHEcE9k_<I$
zH9V3G*$mB$DXeH>OwEiU4B`yt47FS}oGEPP47Hqv4dD!hW#J4pEGcX?Ts5513=$xD
zmO=@TSPgR)8>m?#4QfQPLn0WS2SFt+C`LhHUJQzLP+p1^0aciwGP{Ezo2iJUh7nRi
zL!t+i>QM{kTdX;WNvS!v*uYdVL>b6LaK(`TiZqZtYz$Q<*rN(jmJzH3@jI58uw#qN
zL1`P*??7{BBEp^UTBHb^2iU=>{T5qkUS?rw>MhRHywcp%qQsKaB2dj!qz=je%$a#5
zMWC#p$qOzvK+Wm=q#}?#w|F5M;~~Kl#h#dx11h&~G3G?EfpbL8EnY}U&&*57OinG1
z;wefkE{TWoZm|@X<|cwm`65syeTy**>~u{gB)@=zTZe&xAp+zLP)sr~aWS$m2{8$P
z3UF{?&Be&YXuv4IB*IjsKqw;oG&w+x^Ss2|)cE*YT=DU_`6;D2AU02Yd|_!~4n(F%
z1QeS3Ai@AdfNGB-kTbw8L?XcM1LZbQAyN#=dK^p~yc{|l+#EbbAaQus1Sx=*jEfi;
z7%JwJFw`(+F@h2?D2v5*)iReb)i5q#UdWKe2x;3E*_E&?V69=SVTP1(piH`uv6dxI
zsD!PAWdS=#Mv`G6V>4qdO9@jA6N=s{uwEuKy{vgWC2TcJHO!I>&5X6IH4HT@k_<Jh
ztPGM2DNN=J%nUXRg#zIWHK1(849QlI<iY_-7NEocP8&rUppXF-_(fVEmNh87uomT)
z=A~$|LE07YN(UUlMY<prj7jLhC&$3RkPnJpP!?oh5@8f#lwcBK0+BpS5==!Pc}<R6
zta+um1(o158^vFo4XSqX^k6LkP)5APT9RLsoN<dSKMB-%0|!4?Gm?cME5MD~TO2l!
VsJ8>v*Tta9lY^0mk%L(v696z1Z^Qrq

literal 0
HcmV?d00001

diff --git a/Metrics/__pycache__/RegressionMetrics.cpython-37.pyc b/Metrics/__pycache__/RegressionMetrics.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2df34842674b3cb390c2ab9f89223d4a7582fef2
GIT binary patch
literal 1563
zcmZ?b<>g{vU|<OH4NlBwWng#=;=nKq0|SEt0|P^`2?GN|3PTEG4nq`U3PTE04pT03
z6f+}8j5&uTmo<u&k-?oIg(Zcxg&~DCm93dMiamufm_d{6CCDs4O~zZinRzAgCHe75
znR$stm70vVM4fXIi;FYUGLsWaGV}9%Q%j06lZ%r<>S34(X1@vp14Am<?kJ`d#uTO&
zhA8F~<`k9|hA5U4))clDhA7q)_Fx80j$493sp&<j#l=WQGD6J*(aa1C49+kwr8Cqp
z#51HY)-c2~f=MPY$qW`}0h6pHY$;4B%)KBr>?sVv44N!{MIeJ-f*hyGc#Az5<c-u~
zO^#d4DVb@vm~)F$Z!za4rrzSrO-;#6%!^M<y~R>cl$vsjv9btc{Vk^CTusJXEXAog
zX)76u_!$@&eg!#O#e^2878S?jCKi=s<|QW@#sp-z!~_)OXQd{W6vsGv#s`-a<!7hH
zIC=)hL;V;74YU|2AK_=cg34PQ@$s2?nI-Y@93cPlfdYY%4FZ|i7}=Prq%gI?v?PNZ
z3ev&Oz`(!;3$rr}3=B04SquvpYZy})Qy5#BQkc@1BpK2f7cw$}WSAB()-XYMCCm$0
zKx$K%BpDX4E(FVh*b)p2L1D>O!n}aJh9Qd$%;P9wUcd?Gae?GYm=|z^L{gX+GJ)0b
z)G%am)G%a$Xs#5NIn1@pCA>9EDa<LXy-c+%CA_F^f{L@&Fl6y9;ICn=VOhvj%T~j@
zK%j;V>{@0}@~m>=;!=PDtAfO$#9ReXR9T@5Rw9WMrIwTy<sk$h>7j^~fq_Ajxrmd2
zfuRT#;zgi*rO9}Uvm~P^wKyX`Cq+}Rh?{|d;TBtJMM-L2N)Zo;!<1KWi#ai=_!fI{
zVQFe=Rca9^$ZxUarY7cr4ZFn#Nk?GwZm|~Rm!}p%EC8jzB3_Uo{K@&b1*Ija@wt#(
zbc?ODASJORRg=946z^bbA=a@&RYY+>GGcsX6gQLw@^};vC^^H70jEs}Apr806axc;
zFeov}fRYA>6eAy#0wW(IAEOu(3nL377c&o|1S20K7ju;&X3|1)VlpTbfs6ssAfJG9
zr4Fc|0i_R=TnWyJ&|F!=kj1)yZ6O1w@CjznWP*ggCObH+AmIWEc!ZZBA;g-TlbTqh
z$p{G#P@Y5bmn;JVg8|52pcr95WD&+HaV-7-8RVzQ2FhP~iMgrq@wd3*<8$*<N^?ML
zp7{8}(!?ByOpzcg*or`QK&(Ww98}1Ha#%5l;9%n5=8!D{iD`1&V$CbfEvPIKVPIg0
z;)CXOz4+3S%$(vPP>@B*!UVCFrI6BwEk6lVgn-S&=73uqHW0_#fnoyU3Pv7A9%cX~
CBWvpb

literal 0
HcmV?d00001

diff --git a/Metrics/__pycache__/_utils.cpython-37.pyc b/Metrics/__pycache__/_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1210ca5be0dcb0a67b7f8deba0ec3accdefa8cc2
GIT binary patch
literal 518
zcmZ?b<>g{vU|{Hq3rU>Cz`*br#DQTZ1_lNP1_p*=5e5c^6owSW9EK>y6s8pB7KSLM
z6vki%O_rA+wSLJUl^_-~0|NsL7t1g(FtjtIF{Uu4Ftu>hFt{)@GuASdFf3rKVOj{%
z&Fq)V2-3yCzyLCUje&td7-XFq0|P@kLk&Z$TrDG5Ib#Zg2*X0g4u))&BDoaCJf;+;
ziA;qo!3>(rFG0Fqf{ZBw5t>Z5nDPp4v8LtZ=NExEdIq-`Gc_4+ah7Bhr50!8=cKG;
z_~q_w6%$&VT2vg9n^;tmnU|bs7!#1;5))9ApOu<iQXJ#x86R9yl%Jg%<LDV2AC#J2
zlv-RI<C|Jil$l%{6JJ`AnNzG+Q2C3?CMUDFBso8~z>XE<a)>vo6f^Tm;!E=5lQQ!X
ziz@YOa`KZCbBgWs5UMqqidYyJ7;dp978NB{YBJwqD@#o-$uBD60Esdt7I88#FlaJ@
zxkaEDy2XoPCP;HJh+tq8Vk+VXDQ3RKnpc`zPzg2|ECe<Li2$3!VUwGmQks)$2MVrY
Ikb^mx0qtdWjsO4v

literal 0
HcmV?d00001

diff --git a/Metrics/_utils.py b/Metrics/_utils.py
new file mode 100644
index 0000000..c8b3432
--- /dev/null
+++ b/Metrics/_utils.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+
+def int_to_binary(vector, threshold):
+    a = [1 if np.floor(i*6) > threshold else 0 for i in vector]
+
+    return np.array(a)
diff --git a/README.md b/README.md
deleted file mode 100644
index 0074788..0000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# AIS_Regress
-
diff --git a/__pycache__/evaluate_model.cpython-37.pyc b/__pycache__/evaluate_model.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..322b92ce507f58b1850b31118dd901c41cf6da33
GIT binary patch
literal 10401
zcmZ?b<>g{vU|?W8e?0k_4Fkht5C?`i85kHG7#J9epD;5pq%fo~<}l<kMlmvi*i1Q0
zxy(__U^a6OOD<~^E11oa!<Ne)#SUh(=5XY4Msb4KY&l%H+)><MHhT_FE^ibsn9Y&H
zm&+f;4`y@b2;>Sz2|?MyQ6eb}DV#Z?QDR_PJW2vgOGZh8X{jiw6s{ER7KSM46rL2`
z7KSL96z3HF6oD4TDA^Rj6rmP|D7h5j6p<E&DESo86tNbDD1{X96p0pwD8*Ez6v=F+
z3Cu+gQlz4kQ#n(WQ>0U5dYPhBQu$L=Qe@K^Q{+<QQxwt}Qxtnyqf}E>v(%#0Qn^#r
zQj}7ZQj~j{qtr7QqBJrYqBJYyQdC?RV#T7gG8s}eQnhRtG8s}eLG%R1LWOXK2}~7>
zQdC_SCNRa$h|*3~&C*FxOVv(QPf_n>j?zt4&C*NNm1Kz0Pi0TlPgTt_Xl9BsOf_6!
zl&ZgwF-kv0BSkYsD@A(_a|%O>bc)U#<|yNIhA5MChA7i?hA6XihA8ujDXA7IO3jQe
z46z+imZ`j{`l)KE>Zz8J3?TDTbW`+tS);5{trl1>WQej!(N8gGVSwpqW{k2;RZKNb
zwMmhdWJtAbW{zQrvWv2ha)@$FHCf=4YPZ07A;UsOMut?=1x~5<aGu!$r&I?x&wPPX
zsw0dyfw6E|I76z~0+&>ig^W>JV3zqp#wbTH%XA@Qlq;BJwvaK(twIGHelk(+snV&g
zsamOysqW1zsT!&7k_@TZHVhM(3KPN^(ix&WDrTfwq-te(rZ7k{fPLE*<&`S7z&lkd
z)hmTFg|n3@jY*PWA!C#sSkw_s)F;(0)hC;20#nhKRG$<EFzZ>W#{#Dm!*oWF5A%4y
zJR>Mi1*(d<2qJC_6^HOlAiN1og;T;AQdv`^Q%rjqqkL0+7x;nvu#hRrKQ$o5tQiz?
z0jd6AS{CBJ6!R2|UKU1%RM}KFklGYWC|_tn;6esQh6=kBD;I`Xv#6jHYe-zB2B&&7
zGlKlw%oydBDwGwH>YC~$$&l&>CWBHWQfxpbPGBx93TH?aS`eCIn_`z@pW=|t0`gg^
z(1NfOM<^>i#R<xa0NFNyvCt`;A%!!TLDS_WDDP`B-r{shEY3`hcXRgBWW2@f4yHt%
za}tY-Gt)AY6H7Al^L$fFiZYXnH5qS-xTPkRloq816r~mv<tL{W7iZ?BYck&APR>b9
z%!@BcOv*{sWW2>&T#%Dl5}%S-k{F*{rpb7V$G6fYu_VzaKQSe>2&6s)q%tTK#Pv&N
z0Xc<%fq|WYfq|8Qfx#J65+yJ&FqAN)Fs3l|GS;%xFw`(-Gt{!yuq<G#VXR@5WT;^R
zkqen>*-DsFm_cD)!j!_YfVqZwAtQuW!m@z1h84<VTfkPs1_}$9>Ke9S22EDKmrM){
z44RC$*poqCPA#rtQC3z~t`da$S)ml<Lj}9)T21yMkUMYjC+FuDl$NB%=R!i|7HdIK
zW?soHwzT}B+{BVwOnC*jc$4!>^Gf3L^7E=vi}G)=6@Zm!vfmOc%}Y)#DoM=DE2)f6
zEGkN@yd{te3b}%!)RbbV2zO3mQfdyA!&q>O9cB}IW?o8Wa%%A{#@v+*MUo5*48Po+
ztztrpQ;UjYaubV6GV_uX4Pyc_Tw(%>^0QKtONwJ0J>!E*it@8lV;nt$<AYMui&Be=
zV^Yf!b4n9Ip2$y0&Cx5Uyv3iMS_1P@d}&^?5GW2n1Op2f69=ORBL^cNBOfCdBL`EF
zGy?-eGBY&N)EO8UKotu((xjL{k+y)bhG`*VEps|UElWB>Eo(YMEn5v!7E=nNBttEG
z33Ccl3Ue=GEk{L92}=z#M7)GGg{6jFgrS*H0+QkLYB^zYHJlO*wOlnE3)n!3xQ01}
zp@u7kRf3_FNs^(KyM|){dksSkw<JRiBZyqcRLfJtvVfz83zBVXSQl{A@T9OwGC=uk
z3pi?+YPeI_B^h91g>Il?r<OOJp_VV5p_V_Lp;n+`SqV!G1H^Tp>=-+xma~Lm0cQ<E
z4QC2N3InVNsucu_GNOqJ)d<!IWiw4+EOMw3N?`=E^lEs)c5}k)%@e5MLlr5!0aqPU
zBY+{tk1i6<P{UipSHoW;kj_#o3}TBjfOz5z=}fgEDI6)BpsF*SrB)PFb*Av8@TarX
zilsBuikEPeFf8D%5dp=@LPjW?6;!A;GchuNWqFWfL2S5eg;NP{jW9S=%wm{oIcp_q
z#24_DFl6yB5GY|-AXvkhA|TF?!X(YGkTFHDM5so%h7(kfFJuN~!3m6o@4^{MgtJ6a
zSW}o%gi?finPZr0C1aRsrD~;Xq_RY7L{dajMAMm4#8Sk2nQLV#OiILR#A}3WL~F#F
zSzH)mm10<GWozYX<!cpcq_V_oWNYM7Bx>Yq6jCI6S!)$1FcqeRGt@{ekN~F`=>?K0
zQXrdaq!&n~NJCgz(lu-~tSK@nvMJIja=omzN;T38WT5J07c$f;*C;QLN|6VvRtBqP
zNl{3VO;H4^R;f{js#dB|5of4X1vM607;02q7@8St)oP@(Bx+P^)KVBzRC>XUr(gz6
zRlg!e1_p*AP=3*5EaGNhU`U@e69i7$zXZwLVooege)<3Z|NpnRax&91O45r`Q}b?d
zrX&_+LzryEi8;CXdAHa>LPe=5RlFfZrKt)5`Nf$fnPsU^u3u_;BACnPmY7qFBn{=m
z<e|2Iv`?=VS?B(+YKgrj(@S0k28Nexpkk5v7HeKnetGdN*1Y8WoZ?&TX_@K8nN_J(
z!p`|cMXAXp3ZQZ;GZ|D#6h{gBW)>HNi&1#ZQS7G4bc+pKo)t%N6=&p^r{!d(78OJ7
zy=kw>e2X<PrywKo7He{TPJYoXR<PnK(NIJ|sZf$plv<pTpObQn*)i0)iYZ(7C8+e#
zWV*$bn^=^cS_E=akv0PZLluv*LVlV;aAIyjPHJ%#bGEK-6{j+cp9~6OP_YFn^Ff7q
zFsP6d0mTAC4MQwXEn^Mi0)~YQ6PXHGf*}R0CSw#QsAL4YN|Wgplb*pX#vE|j3n8Q!
z7#MzW+2mvvmw+k;yL6C|AoJN6suT-y@=M~A@+&}ev7Sv%esW??v7H`5HK@wX$xqgl
zE8=HhV7SGeSd?CzUsQ67wKyZOAQe*KuoflerKjFv230bwpnA5bhzF#YEw!Q~H815B
zTVhdqZem3dDB5nZfNGOl%*7=sw>VStN^?_-5=&BVu_YE1fRz>{=B4E4-eSuG)m1sS
zSc~#Y^HOdx=cMM{VlK!jxy4ajngn+2Eq180Z!uS87TjVlPEIT-NiDj?mXn$eGKs4=
zwIsd*94fcK>`Dlm1*H2HPi9_OYEemiMPfx}aTIGwd1hXPrh1Vm$k~jQx426Yk$a0Z
zzbGZO=oWi1B+iOt85kI%m`gH>Z*k_u!vw{^Ix}za<(1|pr53@&Z!u-(6-j{PcuR^(
zQ{xMux!@Lrn+MGWxA@XPxd0&z<-_G!K*<=KfNrsX8ZEb2L8MU>dqHAOD#+g&AUl{U
zif=I|=jYsFPRmTc#hh4S2qKK4*uX(%c#E;(7IRr*&Mn5|A_I^oSc;QN45L_b@`??k
zm=i0E!Azql=A69ZTP!(=NyU&91ggi2K*{J9FDUTf=|u^YlCr?bm5Y%D47r%Om=qZK
z7&#aP7)6+P7zG$5m>@DNOd^b8jAD#jj9iRDOhQZ`5~6~Kk%y6mQI1iLQHarqNrF*?
zQHD{9iGvBGQiPF%iGz^~RJ${S>i41)kUtsV4Ke`+1_n^F0@v*lOwhWWVId=;UQcJJ
zWlv|Q<w$3!<%HFiwOsHzp1WdA2}=zNq!tC&@mxrCd>^Q$hRD_MNHEm$)^J1Xc$O4~
z8eVuE&sW2}fW3yXh7a8A<CA1q$W+S@uGe{y>U4gjI-Y$2M-4xyhKI^?fcYG-dbfsi
z0Y?o(4POc;QX9Z8oS}qs0T-wSDB;ZFuHmd<2i2XRdb5|6kpU{kk;0e4ox-2OizLRD
zB9OwJA_x;(z>^}hfEUyd1=SrWX0l>2leGrZOx7B7Gx<`4OE?$sFJxFCu#f>%XQzmW
zGo*+v;04vUB@7FMYS<StmI&9dH#15zq%bXHf{Sy2#W^s<*}&p#F!6=V3z<Q_T)>ke
z26eYcia3-dnj(?Hx)9VZ;9MXE384kzDUu-B6oz01O({GzTagT?&I9G^TO5hW$)!b!
z$(6SRlM-_h^O93j;$cEXa-ag7IVoSaNEeiSI2@sBHJOU^Kqi0-c;@s9-6}yRsAdJ2
z<|^i*3f&?dkO_R%wF*cD%u7(#g4Q=H?W@!rL!Cjo6sl_#xD-H~q{QT81TV1^%&8Le
z1D7<Q+5}n%MR5nD7J(c2dC95iX<+cozKR3ZQ_z$E7ve?6Ah();2v8MMWC~)Lfe3RD
z0V=wSEI=$LP+nt8%S<mVN-gpNb07t35g&-dQj%Ixa*H`VIq?<;s0OOc$;>SRjnUj<
zEh$PYs48*<sbYhbyI^T|kPvH0W=T$}ra%#>^<E?lvI<ni75Rc#ejvghM1bRiIWajo
ziZuxo2T|-L8S$xkB}GL6AZbvETNDUl1%U`qO$jb((aS6uaG6z{nv<Gbl9~byDp*IN
z5LASDfQm3iE?7au$ijqDjB$X9F#&K<#=#`Q2!aCO4hM)O!6?Hh!6*kV)_52>7>kNQ
zT@O&Wf(!-q7daRh7(k7IVm?L&hIEEb#)=6g3^fc{jG!z6ZlKoHvXn3_V6I`PVL@rO
zvw}q#(L~v5SZmm_nTiT(*g%c;qNEPS1uQ8{kYc285}3yf<%vMmw1GugV4`3iD~uP;
zP{Il+)GAz3*jyN5Z8{m+8QK}sm{QnNI9fPL7)sbX7-2pBT9y*lEKv7^MUtV0xrRlO
zp@RX^ga;Kjg-gJuae|5@NK+n^0ZUjHaHR-9S==dtAQm`N3PCfaJSYYgK!hTQ097oS
zEJavEF&apq)Kd(~zUiO_l1MFM2V)H*$W7TyMJzRpNDU=$y3k}Q(g)c9N;jHJw;0oJ
zaYE|lqSO?y0&qQ7WXQn4unUwlpzS18a2f*TS#ZxdGZkxF=@v^#YH`U+aNC)=C<$Z_
zs6@EMSbU2qB;XcPh~F(HHxLe}5(_9wO#wAa%Tkexf+|thg3RKQ)ZENuWMNGnNG1j6
zSx`Hy$P#2YJd+k>g1DgUR^$d^fpS?9sA4Y42Q`w}!R?wNACMHtRks*ZAsHtEBm#0G
z*z1~1MI|7CQV;<ulWqxOczXpXP*gxkn~{YPQlWEzI^IltOdO1SOrU(h#VEt5z$C=T
z#aL7~GV%gA7t}Bf?Yw{#_we=}I3HXFML9GdXn+$$VtOiQ00uTpgst09R0c{!<)GAu
zloN_7K%$i(q6$P*gNPbXmB8$no*KoC)TN?wl8<6eOeZqYi(vEPK~NwNo$ecu(>*9O
zL9-X&Ch7xl9Z<_s;ZVcu!Vqgw%UZ%v!w9JhvY1ksY8X<OB^gp!dYNn4YS<Pq!$t@}
zC1wo^Y=p3oE1V&PwU#}Ft(Ic~V?|d9dkuIzb^>E;4S2AFtAwM5shP2s8`QL`;Z9+y
zVXxr;jlS0M)Np|sowd9*Tqzuq47GeUT%aaqEq@JH3KyvHDhV1`>MP*}HPc)eVw-AN
zD{4x(YdF9)RY9#_4GU;g6YM%3P%{g{U%*=f9^_fbTq{(<lEqgel)|0D)5}yVTq6u>
z64r{O@YaZc25YjJY#3@pYM3O!&BO(Q3mFy&Eo7+Sfz*5uvWB%rFpb5Ap>S6?L*W`w
zqc=;qgfmOHh9`w7g|C+hLe`3waAt{=a4rxn;anhABf5~WMsy()H$w_HNKLI6NCy-n
zRPiiitl>eZf~bX%wc<5mHR2KswGt)bDf}gz3xsRL7BYf|n#5BCz<db^A2i5QBT*v&
zYUsjzkj4_s5Y7Ou)dZon8mJ<;#hy}>Uyxr~qRAY^nwOTESQ5pS2OfebY6PWzP!@R!
z>eLj0hLdk`rKY5(#%JcGq*mNwFHZ#xuoOcIZcrTu>Wrsk=B6(=^!lH-#}-RX##=0)
z4s4Z-Z)##5e5eDpPEn|m0?Vdityv_BKy4GyU`<J4acapep1jiBc#tzoi&Bek@#o|x
zro<PQfCgeg)zvN5lKi6Nj9V-q@giH02W&tDxat8_Ot)A;<H^O~>I&Qp%*?yRo|a!!
zo>-J}i@i8Mtt2<G;udRiPJUkMEw+@@lEmbUTg=G?rMFn~N^=V;i>$z<L2+sBEsosM
zoRZ9foXT6wIr-_}!2?anTWrbsd7#cM8^{HzMYniBRW2yNK*P>a+z6)@-(t%J4ILMQ
z8-=%6!JJzhdGTOIF^C0<nbHz)Rd|avDX}CuqX^XSyu}OZY=bK2{Ji4$N^ry!HG|y6
z4UQd1w18vf7Gw4;4zLUIOG|DsrbTh2#e@Bpa*GQz&JhnXsrZ%{hyoc{hEj0e5>Krt
zNKJ-LLx5Tr(1G_`qVUK<E@;8EG=u<G&s<=e(~A-dGH!#)Cp$>J%*4eg#U#MU2WrnU
zi!pI93NeA|XbwgmCeQ#mh%E%}qVq5rFi9}VF|jb}F>-JUFbXggf$Deoz&WT<4llX3
zK}s&>3Xd8l7lv4eS{6vjmBp0B3~I^NFr+X^GNdrqFr=_ZGNiEfvedHHur6Q$wJmE{
z7J}!rYM2+WgXTLJ845uq7+Wn{3S%w%1jdSp8b%j}35>CRwH!4Zpz<k;8C*JXmT)ZK
ztl>;yu3@WT2bW)55P8s`I=I~8sVFMps$m0{PHD9)6$vF=(2^^pmKV}K1-pYATvmZ=
z>IFPC+zT0N`AS%`cx(7l7*n`<L7jE}1$+w`KxGvuSJ!YsN+JkZ!&1WwE|1oPGZfAX
zXDH#w5(MEIt`z1J?p`JcSu0S&ktI~Zu|T+lV}VGGz(U3v0dPsf2vSoE)&bImqKa!F
zV+|KVRSH-wgscS*&`U7X3YUnc@RV>Y5Ude|ls_e+DZF65Foa(tP$O6)T*HIxgET~W
z#ODX@;DUxZD^rV#!GpWt(npi!7I$VIJi8aI1SN8w{L&J*P!ta+lYm4(!*nk}b6=4B
z02-(SX8}#dqE=8f&sfw3>f4fE5{VUo8W~0HAUANpN}wWeleZI8#&m&*ZV&-3cR;00
zQ4fd<?!WeeSbZR(A4E(95tBf~WDo%=Rf?v7SW`j7G!OwAHqsO+nhvs$8=RA%MG6<9
z5Gk4kQVANrE1C^r%>fZ}LBu=|0V*wu=7U(E!W$eMMGHaPMId4^h*$z5mV$_7AYwU)
z0JR7Z*%;hZ5@cjxC<eKb0o40s6k%jy<bma3CO#%UNFIjgUp_`YCLQ#=3u<!R;)svW
zP0Y-TkFQeH)6)kH66%L!r21qQm&7OL<>yuA=9d=7=a%VJ<P=wc6M`nwEzZ=6<kXz_
zl+2=A>;*-s@wq|4xA+osa^g!eb5jfQGxJJ{Z*d{9*>Y1;G86M|v83b%1#7b2;>gS^
zC@qOkNxQ`co?k7#B?z)O9#msw=I13AWu_M2;?6CLFD?P4qV&pJeEFp%AT7{nE?NUh
zK}@$;ic?clZZRd7X>x&6Z2H+VAn?W>Iy&;w9zLp(ZVU#G?LouY3=9kpU}|pKL!~am
zL{7pOJMA?YUxL!jEtcG(+#*fpTWq<hB^miC;7Gg0la`;80;@{EBRnrbqpB}K#UMy6
zWMmhbiwvQ(ktQ=Z+ko@aEpAXw0oU%S#UY?M+x)!L_>BA#lx{0%PypN*1Qp`xpl+*B
zEn^D90)`UC8paZ)h0Ksnt0rSrh_jVGc<xj`7dCgQACTdq51TvHhtHkr=a-h~LuxjC
zXrLCySJy&>;;U=p!G5c*)hj5`WCjo46oHc`D77N@i6O}noIpXY19t$*Kxq?{H`y4$
zd~E%4=v+~ifOCFsQf6LiiUM>>IEo9=)74}I5Aj5C!aKP{cJjfY1!@3+LrbKV5mfa^
zGL$f4bn-Qsioo4hP)C-KzgENj#h}TA=&l!G>7S6=UuP^z1*Pc7D8a;{<c!Rc)MU__
zgkn86XFqT$hc52!><4a#MM*-(>hz%V;E1&ikf|0~PyhJRlFXc9J#6b7ia_P_EfKhy
zWW<_>BG7ahk`!e91K6k{P=dTA4A%u(2LUMxi$G&nNSeUK2xLt}5vXBsOE5RFq#!51
zBquXTub>jdy2S>ei$H^iQS8O3iAniIc~Q*8dBsKGq8Ob2*a{N!QWA@AF%_f~fy;G4
zP`L=2wFeLIg9p*TqX$Kxt`xXc0xl*&#amutZfbmd(MC|Y%>nUYO4=<}aLIFv7h15C
zr6!l;7ZriJZMS%eQj1G-N{YdaF$53P#DWZrfO5hue#i(Ryyh!f3$`FFGcPd*RJ8Fy
zvsFCAXwaMzWJEDOr3l=G<xeh31+|l*>WaZNOA)C2zr~RZ%@wyeLAHX&cWyB!=Rn%Y
zkkW@ARAfSe9h_Njaf8NGAp*s>m{M|X2_WjF_=3cejN&5D_{}Zm(!Atbyl`_t4Ms>!
z1R5T?#a5CJn)$oM3SP?rN#(U5pD`Dt7D4g~xQYc0h(Sh`z~e#S!By}`Fu3vq)s>Jz
zCQ!x#5A0$aX}ZM%nm|P=L*NC;Ee@O9{FKt1R69_^p?DJm0|RK<2MIHQMo&R)Qw~NB
zW)1-ke=c(lNe(FvGY(@eRu)DcW-dl1MlL2cCUCnJw9JE%kC~5AfQgHdhmnhkiIIts
zjfIJkgBdp6#K$PW2r`e2k%v)&1=RB8V-x{TDseG#F>^7>aKT2fKod=TOngjSjC|mU
UDX>eJK<#1B)RP!=G>eN70G;25VE_OC

literal 0
HcmV?d00001

diff --git a/__pycache__/train.cpython-37.pyc b/__pycache__/train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e05fa6688d18b45eec70987ff4883af0f7ac0409
GIT binary patch
literal 5576
zcmZ?b<>g{vU|`_No|kw{fq~&Mhy%mS3=9ko3=9m#bqov)DGVu$ISjdsQH+crHd78$
zE^`z!n9ZESlFJ&!3TCt9utl+@Fr={Nu;+3_ae&3xayWCjqPTLoqqxC*mK>fao)m@@
zmK@$FUNFrU#RsPOqxiwJK$JiVR|-c8XA5JLU<ykxgC_S&kgGHqZ*e*$7H1~MyE*%5
zGTst(&PgmT&P>ZpPAtjH&+|<!DauSP)?~aTkdt3r9ABE3oLW?pn3-2n2~y1&9O4)f
z8XWKMt;uwY%{4DQGcWZPn`>EWUP-ZAGRU_eI|LaR7<d^N7(o1DHzo##8pasrTINpP
zPR<gB1&k#OSxgI<7cvydq%h_&r7$gI>|`imsbS7y&0@=9Phpm1s9{K9kz`0=tzk%E
zlVnI??`7#^Dd7P7hM|LH0p~&n7lvlWX2x2U60Q`k8kS}zMvypn2g5=}Murle6xI~(
zUdB#tIExR?;woXt;?3fNm{P-#!UMLASCS!xua~)#6)elYK%j$lA!8?d3V#Yij$kco
z4QrMV*nPq&0x5#MOfgKgY_;qq3=2e3m}}T;SQl{AurFkkW{_Z5$hd$zg>fNcEk}uH
z4M&PlFJmVYoW%@hF}5?bGqtm{v$k`mv8D*8h_rCja<((HGq<y}v$yl5v8IToh_!Ik
za<wzGGq$s|bEUDQh^I)jaMW^_h}Cd5Gj{Tnh}Cd4Gj{Tqh}Cd6Gj?)x@GKB7k;sx<
z$k@)G#+V|RBGtmt%vj3<6_;Ab*v{C_1h#>rowJ>*ox7c<owuE@oj;8yMLI<WteO`|
zHCsDJJ5M`*8cT|7iX2#$4=O9YkTFF*MWKbEnX#6?gC|R-hB-wsMX8sGk)ebuMR|d2
z2Tzt<mVAneBtr*3oW%iSEo5$HWMlxVQ2>*QU{VQ8DuYRt64eydIc&88H3DG1S{hR@
zgQl9_Etcfcltj0ej0_A6x7czLD^rV#UowF>>?uY01^J~Vnk=`tGxOrpQWHx`i&Be=
zKzZ{PPkw0$TqueMsx-bRu_QGaWE3dTf=mG?SWtGAVPIgWVTcu}WvpS$Vkly&VO+qt
zkRgnLk)e<&g&~+hlhIF;=@xHMYGG++QEG8~dQoD^E!Mo!+|(RRrdv#U2DcasRx%U`
zGcYjxvU9eI2`x@7DvrrbEGo&&OHMS53CM7X2`I|XN=+^)j&byi4=yRn&rXeT^bC#<
zN=+|HEiR5JDN4-D(<`X_#c7kCnwMHpP-G_oavI2;EDTk0VEOpm{FKxjJ)4~T<iwm}
zJ3YA4Dgj@Rj6y+TQDSatNoo-&OS30~@_K6VEvB5JTRi2dnduoN@hPdviItj6MdAz$
z3`HOtia;R}qR9yIUP)<DUVL(XX<i9fKSWD$GRO^}Fk@$6U|?flU;z2E_y_|7LkH6W
zhJ_4_3?0m%@M2`BVN79^WLUtokYNEcD09{_)i9<op@=YdFfCxIVOq#o%hJKPfVGCD
zhIs*73iCq7g-o@q9V`pjYgiU=EMx#>`UPAIK_-IqBZ(|zs%5KTUBC^smj_HUF5q3r
zP{WYL7ti0p0;UCO*ub=44TLXL!;E4sNUbn9Zdv?pF;?7StSsVWU|`S`xy1?&>RX&u
zsYUtFm`={g&r7|<RFZ#-JuSbeJh3R{7Dsw&Nj#{`yv13XS6o<{np%~5i$6I(x1h8n
zH9i+ouHIrxNi9iC&bY;#Tu=&5Fa?#jIFb^Rv%zMt6qlqH+~Nb7RFYqkm;*77wYVg)
zB=r^x*l>23NsPI-nDa|Zia<$Klf4KO4Yyd65=)XZZn2~!mL%R{uZ#yfyhxvcfgy^;
zC%?Ejiai%1rpXF&Nm*uc>Mbsiu@KQ)tl;Eyi!;BVBr`X&Dz)eqFW3l}^T7$aNCcEp
zWEmJ3j6mrTWIqEF2O|fg5Tg(i7n2a928hSR!w3@NV62jY8Uad=h?EKzL`g`X&;}=@
zbw~-Rgb|c@`lrB?67xca1uQ5@2_XVXO04jt#0E}E?C_+-ynv&I6;wZf%KjP_P+|fl
zngu-YM8&m`0VV=UR2_H{Q8+^iLm)#CLj(gf$rMS0LIo5UkYpotizT%zG3OR*Nq$js
z#x3?dNYW{i2Psnk5sDx}2}CG^2u%>70wPpFgc^uY2N7BzLK{TrfCya>p$AHtkVr2w
z00|g^%0Sl2__D+taD3ll$|;Lt%?0r_S&NK8vK-)u1}QKFaXE{VGg4DZb5e_LahE0L
zU`FvRj<Up@%#_5ERCiE3i-9scBL^cFJf69jG{CW3B@2#B^s)%18y?9V3=9nLNZy1Q
z$umfe<Pyd#wi=cc<`kA*P+7safW3wll&hgp&ep-afU|~`U_`T{L^LzFln|(42h)OR
z5zXp{9?^`qm{T%yHHC^SK=EJ+BCJ3JYDC{+FV0UZ$xW;%Lc};pk<9_l;Yg9qRvBNC
zT3iB&ZN{`)jPX%yxnLnp7D$A1f+HNP2pr=)ASRZ0W&w#uV2NW!K1NV(2Ib@`Nvv@U
z(h09YLG=a;tTqT?U|;|>_CSpxMurlmET$ADNKDl*#51R`b}(eI#Iu5$cv&p*Y$>3!
znI)b*6I5oi#B)GtPAJU<QWwt+&T8y_>3>#!yyTk{V4tbO$iPq~7y_y(GxO3F3X1Z<
zb*Cm1DBG1J=G<aQNi9wWS7@0pm>C!}d2X?kWag&cVlB=|O)a>^Qc{?bdyAzYDY2*s
zp2Kc&C8ZXZ#HSYICuiK^$j!`yl)^$_QD`VYOLGCR7*g)JB?uM)1q&>oZ?S^wPff;K
zY^mV(GN^40%^Uee@enaY!Fr1yY(-{XdVErTeo1LTHYj3*K$(x5i%|etsDg`DrYaS%
zTJ($u7DO0;5<jf4_+epWVCdk^V(8#ZVN78HHQj0$vKSXILE;O<&SLK1&SJ@8?cjiu
zY@o(@3d<bk6xI|rke&{fEQStNm<bT|H4IrSS?t*iMI|ZhH4O0_2+?@<JcSwt7KSX2
z6h?CfCWd&98W5W!o26(<2M4HzX9UN878l5jcy6%Gkd(m#HU+}w?O@4b$%3%?I+z#m
zgKK)0c!3V~1%lw(p9@N}L1{)P%><<xAT&5Var!|DY{pw$xrr5!XfM9Sk(iuZT9lYv
zsmWXfE_O{ol_(!f6jEP;8cFO$scA*2#Tnp4_EG~>E(_gaE-K9{@&lE!Y^4RD67&{#
zacW6?L4I*bT4n_zsTG0B@uC1waRJINw>V2ui*hsbK(#$9IQ<p*gX981L=cDw2ARnP
zZV$w#WG0tD(<-FeW=+mXO)S!6ED8m=f~Uw0qyyycB3lp(Tn2E1lO`n5-GXp(@)J{1
zi;6&ne33cGd_HgjN`<A(TU?+74pn6jGARs1fU0V6Q{Wa~QE48iq5;(vnfZB*pacu@
z7y}a@BM&1FBO9X_6CaZRBO4<hGanNdBL@>m6pTSF1x5}qS)~lN7CkYenw<>F2QUmz
z%U3{k2}98ZNO{x2l*N?AyntmP!vfZY3=7y6GAsmjOc)k0rZ9Cd<CJ6WV8kiM0ydL9
zg;f$^78_KI9Z8G>Y(A<wPN*2NIxetXsOq?(Vo-J9w8`UFgor;-bVhMP0?9Gd`4)3x
zX>t@KuHs?ovM3UiOxZyZ<{0W6#RFo%<f1shaSsyb1+!ohnjE*-^HK{+O7l{qxN`E-
zp(PhmGDT~VfFc)MVrjD6;werngEw!BQa~vq4Md=&5#I8m%#zf2s5Og0Nd(l4WMF1t
zlmSB#Mi{J8g7^`fs1XfD6a!GoG*E8@lrf6=z`eFC21wrn9D<;J8YCyOCg<cAr$TcF
z*!2(s<cwR~pkxLLl+3*JjUdN?%w%AzQo!mIB*ow=NRzP$RDD*tJBB(d<flP2fhwfp
z)DkO&YAZdXv|1enXqlgxpQliqS^^hN%u7)K*^DGulnKhfY><K{0Th~`d|3p_dr^=)
z3+V*hVgr=~rNx@+Mc{S^sF_y;YGvKx$t;dfEKAJHNleN~EdqJw7CWd@P@GzFiv!j-
zzQtOckywy=i@CTo_ZAnTr+ACCpeQr1qzK$f1$BvUv4T4Uw^$rg5_50y<P^mh!`qI6
zL8&RF$*Dd;{&@j8i6yCtrA44t>n*m@yv)MVR8S{9DK)3~7PnhYequ>TYF=@EQ4y$>
zPz3I=Fy-eJfqIcq?7<<9K_RX#QT(m}{>~op&i=juKCU1EPB^nD22?z;rR5jpCYC^w
zCpamB3s5X2Wf7=^C`tkarr<5M<ovv}%=BCAkdmyp2vo7&;(<gkM63u@&E8_lE4am0
zmY7qTS`5+z>iZT+fg%Ca<tYM<<`sE>xZxlo14QJ2h&-^{ic*V9b4tJ|OH%;c48v9o
z7ny-{fE-X{2V#M<O)7{5FNKTJK-_c?0gCq`aDIZMRfv<p*@GXHRzYcxftia@f>DH#
ziIIm%jFF3xi<t{d@-VV7@i6f*b1;fA$}xe(m_$TH1cmq^O$Z4wJta9l5l%ivCPqF+
zHbxdk0VXy^8026pY6pcLqnoBslwe{}az<uJYBG3WMbFLI51g=~B%x!5deD&z#3*7B
zDDgyz`s5cE>wyU5(L`_>xW$s3Ur-svkx^NYT9jW<0*<gK-pus8%#u_+NG%0UC*U+v
zlm!mr0!Zrg1_g~ThybU3kUPP_jzoZii^C>2AJomU12s>JLA@AQ&BKG-oMHw5`>l}g

literal 0
HcmV?d00001

diff --git a/__pycache__/train_graph.cpython-37.pyc b/__pycache__/train_graph.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..966d938bfed7c0d6f1b8803f3d022ec82c7a5032
GIT binary patch
literal 5978
zcmZ?b<>g{vU|^7Do}PR_g@NHQhy%mS3=9ko3=9m#bqov)DGVu$ISjdsQH+crHd78$
zE^`z!n9ZESlFJ&!3TCt9utl+@Fr={Nu;+3_ae&3xayWCjqPTLoqqxC*mK>fao)m@@
zmK@$FUNFrU#RsPOqxiwJK$JiVR|-c8XA5JLU<ykxgC_S&kgGHqZ!x<&`)M-X5_Qf=
zEH2JW%S=uz$;{95O)V+POfJ@Byd{v6UtAntnwOkfRFasPS5gU5%^4ix7!n#B@9(Y2
zbc@Y3FFi9a^%k3JS!!NMv0E|=$Y=%z20;b}1|9|m29UpteV7;+YM5e}J9#?!JGn|2
z7BH4DWHBvZUdT`+lfszCl)|)-v6G>MrGqhxHHBG{A&V`ArG_DeRgxivt(UozwS+x|
zJ%yu%p@Ve+$3g}dhGxcQ##)vV&J@lXmS!eKkT_Qd!$L+zh7#@+mK3gD#!g;1O90N|
zE@8;x$>N2WQp1qKEy+;BkisL$kiy%`+{p%(<;&vlU`ydk;qPVY<VayoVaO4vWvyY&
z5`?==C`&j+AVsj3DTb+*t(LunVSz}BPz`$x>jL%~_Jxen3=#|r85eM+FfL@Q<tP!Y
z;YbngW$a{zvsmCPrgnyQ=62S0wsxL&-ZZupkrdGuj#|!khIW>A)^?6|{&s;hwiK}x
z@fMC+u6BlYrgqkLu6FJ;))a{p$rg@U?h>&Y&Su6=z7nw-u4cwg!4k0=?q<eL&JMl>
z;w2JUk_#Ez1=AQ)q*A0?IGP!2d7$D_3mMxP+nL&#!S-;rbGP%h^R)}K3#M_W$fU@E
z)$$^#WpC$f=W7>CV@Z)qkq682L1m>EGNve`D7G*(GuHBV@MXz#Fs3M_DEBfkGL&$p
zs4S4};LDQBl21{UWatotvp8X_h0M*2j0|8k3Sd$ZOe%p%WiY8yqMD*MhpkqiMgYuL
zOJfRV&{X%k#gbf_lIT{%$iTqxl8J$V;TC&JQGP*wX^AFt6l-2uYGO$gTV66KVHbgN
zWim)1C~bn&f)gbuGs-Y9Fw`)_iqtaJFlI3nG1V|GU|h%$#=yu>$dtkm%%I8Wr^$4S
zw<xu+G_xqRI6l27G36F(UTJP>jwaJBCOv~&j0Gzhiuf5A7=F1pTg8MHrxq2*<R%uC
zWacF&8pZ@<xWoh$<!7ZPmlVf1dd3Hr6y;~9#yENg#|NdR7o`>#$CMN$X6AtmF38X;
zsQkrglb)KFT2WABCjoLG$h9mCRVolwx%nxnIZ(AWIr+(nImLE*a1B)gz94ypg2bZ4
z+|-iPB2X4)2M0lF@hzsDqFX%WshQ~+CGjb#$%&PkOhv*B3=Bmg3=9nEX<+coUX$?_
zTUugrNq$j?CL_q>C8b4q@yYq6c_qa!K|!d=c#9okN^vsCaiAatg)bWe0|UsoViraQ
zh7P6$3=0_;89JCj!O6%_!<fP-$*_QFA;SV@P(H0?s$ooFLJ?uEVFcxTBsDBGj43QA
zYFIm%7O>PX)iBqvEM%@_t6>AFu4V6FT)<kx-oYZtP{Rr$7qF$UE@WKDRLju;&XZs<
zj)e>hI2ST3;93ZB4T~fLy68ftTFx4d1>8_~^MGl_1-uIxY8bNk;`uvR!L&dPCzuwj
zVFS}bHQ+J{ZZ=4*FgWS3`Q2iyxW$#4lAao$nU|7UQ3Q(7Ta1-O;tUK7nj*JY!I654
zvnsVH9~7x6w^);N^7B$}F_q-sVo%F2Do-p*xy6y5S`rT`hi`F~<`ox~rlwY<-r`Tr
z&n+k|NsZ5il-0M`Qc_D2lQV8HCl{206KFx@Esmtb<ZQ4REX5_M1-JM>CY9uuB<4WO
zV=XR8EJ?k^0ydl-W)frWE#~~vk|Jr4Be{w|@p_9jDX}Cu;}%OwVo73=6ezQCgS`bw
z2)EcP<H24j(gR6xq{V~1krKt?lV4mM#hwe1)no&?vMe(>^%fV%N{Hw!R&b)e#hG7F
zl9`)Xm0EO*7i<_P%rcWxi(y=-`@ngvNC1>@L7CSGlu1-TiI9ndk%LhP0-5-jI9Q~Z
z#2BSOqD(xDAUO`kDg}tiU>iXpUXY;&&qiP=lqAgwOVYm>5J|d(5tQiqAHWke^FoFN
zEFBCB8R3Z<Ap%R(2oYGKMu>nCH7iP@W?slp%bvno!(PLl!j{cs!_dLFfDI*qv!}2_
z6FBn%jv5Y7#n-{QfD@tyl*B>FcmWSeQs-L8022iz^$t8qJDeefA&?=6A%X#%kU9K{
zBthW~ig<7u2B(em1&3b$^Y++c2`+*mX<FzOOKMqS&MnrG{G#NHTkLs|bX_C|(k>4o
z6hMR`h)@C%8X!U$M1bP3NEO6V0}+}aLJLG_g9sfEp^He7MfxBC0}ugrl_4nMu~x>H
zCFXz=VUaONfGMXeiZvG`sL57j0utc>Cp?fUGZ2@vI5{IVr8FnC=oWWbVh%jXfjJln
z?iNQ`VoqjCVo9n$DAj?y!obME$OTDrj6$GP$H>DZ#mEItd7xBRr3_A1@mNzF%rxwY
z?gM6`yEXucju~94GD|WnKqNYr1spZ(9Uw7ii3&<skkT4m6q?jPNs%FoH=eJ9r3OUv
zgG*7~cmcALnqZL?DE6&Egbj$mnZ~G<yx1z^OHzwVkkS`pd=y(QSWuG{lD;^>=?kn3
zoWOWMOn9<_@G#RA3rJNOC{2Mpfjv!u$}(_@lE;&xK<2<3I-uGb)EWoZ${`F444`&5
zs8!0yP{NeOl){w4oWjz}RKpO@oWk0{ki`<ungXihSmN1IKy@2SJbNan?qiAPfYO{$
znhT^Zo*P^^u=}O|S^4pjZ&HALrVb+mLzQ3%s6msNm#$Dyln-v=XflBcgp$OZTP!K5
z#mV6EBJ%|^1A`{dEtZnZ+|*mF#W|^|1-DpA3R7}#u@odF78Sws{w=Pg)Z&u()Pnrv
zj9VPJnR$@PO9(6q4GU-`C;%1%Wn^d=-x36ifPw{HQ?h~^GoW@(D!A(c>gGUmVt!FP
zL<~{M+~NmYk(rkspOl|pQd&?CiY6gYg~QFoC;+Wkz|{*=l@?en_RJ2JLKuS*ORTVp
z2HbYy%3|nXNMTH20=1%R7_t}_FhODs#Li;w;L2jjV(s97lWd@NQwqx*<`mWxHjthU
zR%8<(>T4LXShCo&8H!3$*lQT#IS``p?0E_`3@i*;94U<E3``9195o;|M>b2*lnxG1
zyM_^*3bMFBX2f%YZHA-|9<V78Hg5-O7E2a{&DX)QfFIm$Vu=^%U|%2zZcA}OX*MX$
z1f`jwG$Vutrz%cANQJ_9iz_#=0uuSfw>T1$lS_*dlPfiui@?=_DFXvT6dz0!(x3pf
zTiJ_J(~44yGr&phC8&X`DRhgus5Gw#RC3>9D=h$(;<vbqQ%m9t@{3E-GAj@Xt_W1m
z6a|9Hlu&RbQIcAeo0$h{DzSo7VNn1`E(k;fg9uRGy2S-<OvR^UCYL}{EV#@EHR)0l
zi!>RF!a%hb_bv9E{KS;hqT(VqkXDcfitIrwaB;y6PN~o|=m8S81i63@oOn{<snZE0
z6AmIk9S?A0=oVj5X&$Ji1JzHN`FT@8$rRKaV_@QA<YDAtWMdR#;$sqEWMkxG=40ey
z<X{4cf-$I##K-|At2Dt@VNb%SHY9`c01U%Z@D)(4#ZYtsQr2`ZWie$jFJM{7uz+<T
z!veO23=2U$Lxu&6DNG&AIOUi-7;(z6fX!r2VU<Lf#Re5)M-t-zn~$oF6Do$RjtguT
zsyc3{7*riNMe_I+AtDPDcTt>>pmGd#zQvqanj8g*rFd9sEQ$gpPIgdf<QVE4#RFo%
z<f1sh5e*XO1+!ohnjE*-^HK{+O7l{qxN`E-p`{j5;zVmFfzv7|6=<^D;werngZBuE
zK=nycIw+B#r4-)sqRf)ic&IhoKnVquLl~G@7-hgvgb@a-G$4KiCuQ^oGm0@N<r%17
z2+A47eBkbN76YUg2o6V3hZ>R#L7koARA|nCl(-;|K$0^zD6xTpCNnSn2*`;bGa1;b
z)bKb6Nej3t(qt@(WME*Za(4`MR>)6-H2y&qQ*mmEl|r?Zo>5w@jsmpo&&<zLC{8Vb
z3n%8KD1huo5-iFDWnnf*(US-YSy1jQ0_DaiNX~^+JGa<CWkG4Nrg{;$c>-#A7IA<w
zGEZi4d}3K*W=>*KPHGXzOSjlTeX8Qrl3P4^rMdB-VT;nD)Z$yr#ihBoxDY-0TdV~|
znRz8e;6^lP_~I5TxWjad#W5u@_ZCl1QG7AHEh`w5no^pa>J#Lj7m$-!lA2gr1Zq^@
zVk^zdEG$jE#g>zpl$ujq1ZqFu;&#i)Pb>*Z%`46?Dgw0^iojhyru@7jc5n&~4si?$
zadnB}cMb4&_K0`(_YLrI1qpD%nMJW6-?63T7v(0FKoToBVS)=$ETv@;sJtiwl^~je
zx7d>N^U^ZYi&8;muxI9_fO>sJ*`V+RrJq|&c?GxF$`W%*Q;R`rK_d~6UI?iFSL6&*
z=LsSrKtu+J$N>>~AmiAIQj1G-O2Da0QwZEf1KWeG^er+6X#?8}8mNV2Aa{@$xFog!
zvA_`y&Qy*dE-31Yz}XAbA-yGtkvKuZ#i06uftia*f)Ugu<znPw=3)eqJdA8iJWN8&
z9E>uIa!g<`CNWttaS>5SdqP4?M^TPXgp-eviII<yjgf^>fQgL}207S@T0!ZN(M?k*
zN-(h~IU}<qHMs<odi30#{lKX!N)kGjs|Ouz%7l;Z7J(8`l&DXBaj_nVKpy1<Cxcro
z$@vA9Q5+eS1*t{(1ts7(i{i~p&&w=H)q~bipris$CPi7`;4XlqQXf#@_<;y;1^~Ge
l90f=OC}eJN*yQGex?gsnwra5$Xvi2;EirL0@*uaZm;o%b@n--4

literal 0
HcmV?d00001

diff --git a/_utils/Result_container.py b/_utils/Result_container.py
new file mode 100644
index 0000000..0e95dee
--- /dev/null
+++ b/_utils/Result_container.py
@@ -0,0 +1,54 @@
+import numpy as np
+import os
+import pandas as pd
+import pprint
+
+def mean_dicts(list_dicts):
+    number_dicts = len(list_dicts)
+    keys_dicts = list_dicts['Fold 0'].keys()
+
+    mean_dict = {}
+    for k in keys_dicts:
+
+        values = [list_dicts['Fold {}'.format(l)][k] for l in range(number_dicts)]
+
+        mean_value = values[number_dicts - 1]
+        for l in range(number_dicts - 1):
+            mean_value = np.add(mean_value, values[l])
+        if k != 'cm':
+            mean_value /= number_dicts
+        mean_dict[k] = mean_value
+    return mean_dict
+
+class Result_container(object):
+
+    def __init__(self, target_metrics, output):
+
+        self.target_metrics = {}
+        self.results = {}
+        for o in output:
+            self.results[o]= {}
+            for m in target_metrics:
+                self.results[o][m] = {}
+
+    def update(self,output, method, metrics):
+
+
+        mean_m = mean_dicts(metrics)
+        #pprint.pprint(mean_m)
+        for k in self.results[output].keys():
+            self.results[output][k][method] = mean_m[k]
+
+    def save(self, output_dir, name=None):
+        dir_name = os.path.join(output_dir, (name+'.xlsx'))
+        writer = pd.ExcelWriter(path=dir_name)
+
+        for o in self.results.keys():
+
+            # Write each dataframe containing a metric for all methods to a different worksheet
+            #items = self.results.items()
+            sheet_metric = pd.DataFrame.from_dict(self.results[o])
+            sheet_metric.to_excel(writer, sheet_name=o)
+
+        # Close the Pandas Excel writer and output the Excel file.
+        writer.save()
diff --git a/_utils/__pycache__/Result_container.cpython-37.pyc b/_utils/__pycache__/Result_container.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb327b26fa5271aa851b3f12d0161b29996ea1dd
GIT binary patch
literal 1794
zcmZ?b<>g{vU|`t9+nHFx#=!6x#DQTJ1_lNP1_p*=8wLi36owSW9EK=HFwGRj1g4pz
zSim%E6l)4o3UdoX6q`Fk3QG!W3quNPDtj|?6h{hUFoP!BOOTm<$&4V1fq{V?ikU&C
z_b@OplrVHKHZ#^Tb}*(eN-`{9T*y$%l*v%bT#;A9<iZe}*2&P$(9W2~1h$2vgsFzH
znbC!znX#6ohNXrvg++ovlA)HhLIorP;(=siMQYhfm>00ru-35Dut_p3WU6H?3<+oG
zU`S!jW-h9!VXa}5WvFGXVXa~AV6b5*lnZC5VFtO@uZqnrKPN@O0PbKW1_lN;1_lOU
zkb~tI7#PwSY8Yb0zz$|uz)-`ukWrGMgF%vEB2yttFoPyz6+763>RL_4TWo3hMY)M3
znoPHt^bBq>=4dkAV$8n9m6KUq5}%TpTvEJ};a7mORZM7cYEf}aZemeMW?pilVN5`V
zOH4pfepYI7NpXy$XMAu;QGRx6jH738d{An7QEG8<OnhlcW=?TTP-<~$PDy-neqKpp
zW?pKMUP0wAE*p>q$@#ejcDx{0L%dKWmz$cH2eDSqCMQ2RF{jv0525rWD2i?|CFg3g
z-eS&4&AY{tomyFZi?t{*FFo}ZQ(nO>=ERf~P4*&CXhiYkmF6a;7D0@Q;sPm!vN_?_
z++r(B%qdMRj^Y9ffjLE>q*Vk;1)5AntPBhcVB?B77#J9CL6m{R1!PCD1Srigaxn5R
zu`qHmNigv+3NeZ>a)3k_i-Z^$7?ME>U>KC5z-d;Efq@|voL-}tQW#U1S{S03Q<zg&
zS{R~OQaDoBQrKG<qgYc|gBdhAZwa6WU^2)YkeMLIfv__u2wfN$7%~}Z7~&x$V}%ho
zb!yczgG93!YnW{qDx_){T^M48YFS`nk_<I0HVlPo;S7aB;S4DZpyc7F$#jdaB(W$x
zwIn__wWKIBx%d`)5jYl$HCb=56sP8-6^Vd?gDt<bq@c9q7GwS`#$0faK?rbIam2@G
z=4F<|$4i2o#{+UO6B{ENBNrG}Nuqm852ijD6agURpdbdZ!Tz%X=foOjP`;|rsbR=s
ztO4alriBbH46#zRpnSwC$p90t0kN5E7z!m2zG4Q)NRcQgN<pbkldVV$#1;p8B{#Js
zBR}O9JJiQfY~U1;3rPW>)PUqQw$g%>#FA7ckjFrdWMJlC5@D<o$M6zFogX~LK<2<>
zEDr3WEXD;)HH<Y(DU9NvQh>RHIg6!+Ifbd0v6iL6sD>enbpadLk1DmSCCpju3phYx
zApb37tYxcV%i^qIsbNiF?qy<RDAWpPs9{;awUB`k5|=D~RjhgyImH#4jJH?{5=$~P
z8E<hFXQZZ<#OEdErfPEEV#+Tr0+kuJShDgn^KLN}q}<|mtw>JI2`|blNi9Nz)-6t#
z#F9j}qQu<PTbyY{`MKcCbc>@TKRy+t{uWDdVp*ytJ2*^laY0f>d`f1~Ef$bfQ5-3m
zMPN&Bv6VxN24$Wo9*Aoop$-XRP#}S`Tah{g1A`VQkin^kQHYU;Nq|L&QHZ%p6eFm?
zwln%^vfbi{2ip@Le~T+VJ~uz5G$%Da{uWPsd|_!~4#+L>@kLUgh?fNsAUlxkMOY3h
zPC-ed7!<=COdOmX97P~8P0m}ad8N4pl|}p@{R-gJQ;?XKl2}{>N)fl%3JQuc^Gd*}
l?G{^p5~z#>>%?ZwEe;!q6YW5STQR7@0~H?}j2uiHf&dOSt7!lL

literal 0
HcmV?d00001

diff --git a/_utils/__pycache__/plot_utils.cpython-37.pyc b/_utils/__pycache__/plot_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08e282b35f76714334f984cdfdfdb75929a6f35c
GIT binary patch
literal 7536
zcmZ?b<>g{vU|?u7ZB6{Dz`*br#DQTJ1_lNP1_p-WA_fMA6owSW9EM!RC?-Y-n>mUF
zEXEqe3Z~hj*ugYM6lV%k3UdoX6jusM3Tq2P6n6?+3VRDf6i*6A3TF#L6mJSw3U>-u
zIzt*u3Qr1e3u6>t3Ky8qoWhsF-@+KhpTZc-pegVY<PyJRkUKyuVFm^UP6h@BXONp(
z7#SF97_t~@7}FV37<(COK|ID<mKuf{MoES&rdrk#<^?P@tTjy83=0`+*-BWm*lJj7
z*d!ThSU_Y7Q!i63LkW8p#{$k8<`m`yTs4d}ObeM7GS#w|urA=PVPDADFH*}<!;r;O
z!vXS1FB2m}I713UAVUyC1Vaj24XZRmEoTXD7GDZ`3P&$v3{x#vEq4iT7XJc)6wZZ=
zwLCRECCm#1Yd9A&mM||6g0UAeN;5Pw*7DYHX9+{xQo{*$5myaw3U@C{Enh`ciAW7!
zGouSbtX~XMEq|>*jlcrYg$%WVHQZTZDLf0fO1NrRYxpG@Qh1x07BbcdED&GFP$RHF
zVj)8cpEQFwLycexn>a%nQwo2IK#E|BP>OIb8zV#Eqi}{2-Ym%!krdGsF_3>sco#^e
zh^I(^+y`=}2*W}qMntGcB87@j3GV{w8li=Zj0`2b3uG2DfYL!QgQk?9CgUxpq)JVu
zTP!JwC5g9KGxJhXE2_AvYvZeH^(u0TD{ip`XQZZ<7-}-!;wlEQ;`0)7Q(rPNFfeE`
zy%b?!V0g&`Vl&=iPs>a%&a6s}VvEdi&vPmb(PX;CoRgn^i#0hvC%>qQsk*j`S<fi-
zB@;+JQxV9BB6bD_hAQ6tw6x;X5`}{N%)FA~TWraxc_pbux7d>MOG@%{HCb;lRaD+$
ztEenVtth$01tOF4^NUi7Z!u*g-eM|Ce96ziz;KJTC^J2yM3d<jM^S!BVo7FxUJ)qO
zrO%oP0z2)~&z=E+FZNZO5bqV_rEALG;x5iFDv2*k%qdMRE&?UjTWk={EvCGJTdd_7
zsYR)`m<m#Eak?ayB)SzP=BD0a$}hgfQjl1Zaf>A@KQr$ZTUvfmZeqzTZr6(B)SU35
zOpx<AO7i1VLHt|H1vw?RIEqV?3UcyGif=I`=icH-Pc4a0&P^;R;s&{$IXAK57IRW!
z(Jju@ywcp%qQsKaTU;PD8L1#&-Qq~h%LDl;^%g5gwBi;gh$_!aDap9SR#B3foLzj2
zr7|Zo_ZE9`Vp(chCMei*@{3b7g>NyXq}^f#`{@=_NyaVq{L+&6l+2=AY&nTZsX4_(
zk{}nbXXd43CZ`tP;!MlTNsR}4j;$Q*lPIptyfSDA-C_nA&6HRX#hzOU_P{MRaP$`6
zVl7HdE-}2tn0bo@#Ja@>b9!24I@r9G3`L;q|I5qSDkiizwWv5IH?gQBGcP&OFeV_w
zB_^OKKPxr4q&UXWGd{SaC_g(j#?do4J}5Q4D7CmaCcd;JGp9HP6owF{UP0w8aWJ<y
zGd(XeEi*YW4;GUgpmN3tRPr$LF|sjoG4e5qF^VzrFiNm+FbOd7Fv>AXF$ytqFp4os
zF)}eSG4e4(RB$mCxiT;?Br}8bGB7X*AQxN~j0_AV3^k0+jJ1q43|Wk|%r&6miUnL;
zv7|6&vn*r;7hbG2EH#W03@MBYnQGZem=~~t_~7D+J&OZW#Da?)j#|zd&MeLv<{DN>
zh8lJd2`a2o*m_xNxsYVopfc=GH5|RHwcHg^C0sRJHQdciE)20gF-)~QwY)Vv3%D0D
z)bN5xafVvH8qO@96wU>_HGB&hYj_v%f#vuYGNf>U3mU!@Zg4@vlfnxsyi)|g1<jOj
zh8oTV0x66of;FrujFJq^jOk1@j5UnuOeum3nZZSj5U5y65djxN>{-I#A_-Kuh-8T_
z5UXKI5nsqu%U{C2K)i;3AtSV~k?;d&c}=DwP~^O11(gDfMW8Ay<Q8MPCKEU(6oCqp
zTZ~1T%m{7~D0_p7k0M?M1_p4Ep^3ZrDB=St0%_4?Eds?}kswGw2t<I&gCbCwRmGgG
ztNT(HBqj!O2UC$ahz*iYheZ4Xdrd}&#X`B6#l@L<>ChaY2P!rsZZT)(m4J&2aP}#Z
z0%?~9XQbT3ip<>7+*|CqnRy^akqSso6+~!)6fjq178GfNxH=$05k!Dpq6d;?0VU{L
z%sHufMfwa33{m{BVk15=C9AZ!q(}~=ULHg!fCxj7^961(<(3thg7{`2!W=|^vUQOK
z$PA9er2Mkf_>zn$j-=F_{POscj3R505*rX<2eKBNd5xmDKsh%atO&{mYb|mFDRcr6
zpd1X&#zih5ee59T#^;uSyiyDz7#KO2*cjOu`4|Nld6)zkr5HsRd6+~%JXlU+l4Im!
z<YDAvED8eUG*EQ_3Uf9F1_n^d0NDU;w$v~zU|7gd%T&U+fT@Hri@AoWge8TsmuVqm
zEprL$0=9(=F-)~AwXC3qKn+t3a|%-l`vT4s<`kBNOew4>Y`v_E3^l9^xKh|bX|RMf
ziyPF4N#O#gTGj<TplYy$bpdY;BS=1lA(%mv8&n~(rj%3`q=FOc%V!`zgIpD&$yx*|
z6K=8OCKhLd6WJ{mPyv36t+=o>u_!gYT4bI3!>ShhB2SPW^kk*UcZ(%Czo-bDQf{%7
z6lIp);>^!WEsoF0%uc<<l9ZpHgGed2n2Ym@qu4W26H9Uv3!(&5GRsnn(m{!^ATcMk
zq$Kqgdu}2~pctG=Zt;|WYKi!q#LE2Ak|J=LU@I~PxdWUYB0#KAP<%0_6@eoi6v#-?
zC<%^6ko!_|z*T2_T54iRX;Er1$TP*D%)`LQ0g6gSE=D0HE=E2^4#uJcQ1pSSaZvPu
zq7lRfnOqDCu@uHyMsT1qK{7trI!&fqT*aW;xws@Vx%d`iB3KkcfD;ri*d+zf%oYYJ
z#z7`CFcu|)+|S^a3~DlfECgjaP~{1VmEtgFP{~}vSi-b`8Pbeg$k@!-%-G3P!dSxu
zYF2_uvIQ&)LCrUI6fstiSSNEkQ#*4S6R2!%;V5CNVFxwgni)HxrY&G!$gqH8Aww4H
z0?vgDoeU*hS=<YFQdkx;mM~@Uf>K)vUk7tDV+wmOb2DQtM+sLJe+owmTMB0iS1)r6
zQ!QsLR}EJRw<JTBKnHUNLk&j?PcL&VcZE+4w+ln8ODzvnRxpKkflv+40^x-W3mI#8
z7KkilsNn%MAX7NOWj0tQg%4bA^P@BncZD-R%@6}M5_>`Au2_nA3qvPk4Oa@ABtw>X
z2SYnUJ7XGCibRTJ3rC4Wic|_uFJp>y4M&PhFKY~QEpIJfEq@8q0?7_g!&@Llwnjh%
z)Hsz|$RN#7E0`h&>Kv3vcQDrog6f@S##$k$*)lb}H9{%!DGDiyy)2BN@R6<IPf<!y
z2IaOI&Mdha<`fkOuY_xXJUH(uWGOCCs$or0T_6O?gA18zg-f^=DAx#s@?#0t0u_*|
z8kPyn#a`g><V@jAQA<&WhNno1Mv7*NR*H6tPKs`dUOIb<eu_alQ;K1VQHpVjNs4KT
zS&Df&dx}MhWja%e)f|>up^7IZY$?_$HYv6#cIhmj7>>OV!&EC;D^{Xf!n8oGMidmv
zpp;W9UZM)(iGk9KK#6*mMvX{~c#S|BlO#ipSPf$hLyCQh1Gt2Ni!acu5vmcBWB|Dy
zp?ZPVLWTv}V4sMluw}DMU@9`HVFQ^h!63;{D^Vkn;s_Q!k|LGIl;Tt?iO`#+lj5A>
zlH$5R2%%#kV~SvkAd(v06t^156!%^ZMurJYg(jfdJ4>d9v4c6qgNU&A#25D9?Ca%s
zi#sK?IJqdZ0Mw{X24!+k{sZMZP<t6vX%>U(qI8BDhFFOhrdq~YrW&RWhHSPXmJUWx
z-kZo&$Px@`{eiQ(Ci5*OJp)k3pfs=K7Gs$v(=EP&qSWG4c>C=Ze_3KtW@1uKYB9LO
zQ4H1xu5*5I+2mvvmn7%s7TAGGpkh$@z{XHz2rfBNGK))!GLuR{PK{4aEJ;nzFUm|#
z%+a&S$xlwqDYnx?=#F9uPAxHpR`{@%1>-H|ywY4vrXo>L-v(4Eftwwg%;2WVE!MoE
z{PN;kta-`#ImO^QLX#2PXaO})phi8gr?h<nu0u2#qj<qhbXb2T8RSz~(1TnS3@RSg
z7$8Mo4MPf}Gy}K*WL(H7!T>7%YMD})7J&NIObeNU88n&wpe~Gbs1mQX(lbn}RprvC
zwo0j0C`c^HOwB7P*3@Jz@&c8#tVQ{yc`1<Uin+Knw+Pfw(PX~GT##H6#h#dxQ<j)h
ze2XJ7sW?BUv?LW0KcK=N97(rW(n|A^3qkG!r4a^3DaI;oA|nW-6Q0UIsST9Uz^M$>
zSnOb|Vd!9J26>0k4{WO@V-aYSLz4+yK^K*PDm#QjIBaqfbCXgM?aCM!7(RnSuS%at
zhro2+V$62aWWB{$5yhTZT9OY608UUZw4f-zpg4*rIlmOtUro-*PtGo~Wnf@{2Qu6j
zARoXZOa`o^h9Oo2l2=j~7BD~tFd+Fvld+1w+MrfJSE1U_%23ZJtyWc&5gPnOX&~R(
zgCdAA3+#Ic0jfKTvKc_*Dhv#;9Kj9gNGs%|rj=;2fE&)>6bp6V1ADL#q??``#g~+n
zUlCuDAD@_)oRMGj5){~)jJH@oeR^<{y-K9opf(;kgMr(^Ohur9otL00UlY=vuVT#&
z3RW<HP=*l72tpY{C=&=}3ZcxZc--^zQxx(`LAg4$O4un8&QZ{?Gs+DL)~w<x&d5wF
zQOHkIFhF7(BC(B-*v3d~6C}1N65Fhb-!Uh%EL9;ftt7Qb!O|e5ir*zQF$K<7NYP}u
z#g-csY-x~kizOvDDEJm9*cI_$W^R5_Nn%cBNo7$TC{h|gL_LUT1Vs;9L1J-9YK|r=
zq`_4I5@XINO$8^?TkOzw9CUPIrG1eHC{;klDo)y`r-8vc`zrnb1#s8hN}<|H&n&I>
zB`B~pS;0da;I7Oq&XS_UykgJ@#4V1r{Jauye1W=AnoPG?OEODxQj0+SfhuYA*o!Yo
zEJ{x;0Y{?VEjG|NNNUP07Es{|$@Xled6|W!skd0NQ!63E1VvgPZ}5O!6c0}K#kaUX
z8sotON4MBY^1<4{T^>-uRHP2lz>$-gS6q;ooLb}$3Lki1s0h?!0H<t7hX~#cisA+h
zX~Y*K7A58u-(oJvOufaDl9^m`i#@S8v8X7qvItbP6#0S-XUj=VPt8k-Vku57DF6+#
z6hJ&32eJxFZ>b10EC4Oz(sJ_ii;7}Eog_}McR(e>E&k%vl6df-3b^<!jslOwK&-jN
zR+O5XUzU1{1ylk+1K+VCwOCUM+~B^&0js!fv6g`9uc8i+Bbh<%a!_0R76+_t8^xbm
zQIMKkl9~c-?1IY<aK|i)IkBQRiX|zrsJIB!XN+PiPfbZrExyGHva~pg15^xwn70@!
zqu2}bi$ED29Lq)EP8zt056b1Y*z!wD3Q9|gz{!uX09;dnOAJjGa6*KXmYJZS$^sFf
za^#jQC=^gKI&w`K1uE1FKm{q27^4WI2$KYp7^4D{2$KjS7o!NH9Ar3!Nq~uig^Q7g
zkpnci!pOxa0O>t2@qzmeAhjHfY>XmI0*q{o9E=={JWQYg86HL%Mv$o>okENXj0#BV
zixz|W8=!FmkmaBrKB)N*D(Q>Gm_T)47Gnw{r~#kC0&XI**0Mpm0t=W@SQj#a*ftC$
z%vmfoY@p^ahz}b5P2mJJ<4TybSZmloodj^>jy;77+>B+;VuOjMaHsIpFr@H;RM)UB
zV6R~UHS|ir-2z6263#5H1>7~vDSQifpp9Af5{3o5ps{F1h7tzQ;0uV(0=W=0whMP7
ztPu}tw5N!rvxCNT)0t8vQ>0R)Q)E(PQ{+<Q)7e1{>vX0Rr8z9M92Kuh_*0ZqR8mw^
z)S%7SYmhERi9m_q0-+kt6eei~a5I*>L;%DCH)DAqeyriH;Q=>eL8ImjDeNigARm>0
zeTpmx^{)nKP^g9tCbK}ehNFfH+Q{VuHE4_e)Ubf;05@lOYj{&MvsorE7EMXv&SOf^
zs^vrI%@Tn$))(*~bSz{{(M!=oQX`t8U&EJT0BX(_{t0KOVa;N%0XOPFjaiJ)HpUm)
z;O47|AGnr-G~ytwx}sdrpb4a|y2YNBlb={(Yy_?ui<%f17(iWpa1H|1+Tfg1)B?`K
z*(s@-ERf=S0VuP8<v~(K<)F4Ahzl$3!SbL&xTprE5^wccR0|piA4c^iq)r4^hv144
zR3}2jz+A8dNDLNWpaQB09BQCo1P8b#8zig|Qm6q+a@|=39vW-~MLtpmTGR%Lp>_}f
zHXquM0mU2sI#Ud3yoN2Y7(8eMu1YvEGK)(<W#}#Tr2GmHBZ{-QJh3Pj#4MT$vI$$|
z1ZrS|$8I1s6sW2NS5x51xM&(k+jI~y6GY4c`GOx@<rK{Z34p4ZB2W!eG!Mi@jPHSK
z1W*g02vj2!Ed<FTf^-px3-&jtAO%+qkOsUo$g!xE0C;8s)R_Vm#KjJvQkYSMQHW83
zS&UJFkqbOP#Kp+Lgso7QV-jQHVB}#073y+Kpy4A>6;QMi<PCvag1L#{0L#ft(krM0
z*?fx)LW9Hj7HeK<Zb2m^AV5|6Ew+Nhyp+V^B2Zg8ioG~BF)6<&4_uLhhws57i{SAo
x@URiM<^bCT@gSO4Ic#$CQ%ZAE?LeK4V$k?7sM-QmN*v4_Djc#Lyc{e-o&ZAVU_Sr=

literal 0
HcmV?d00001

diff --git a/_utils/plot_utils.py b/_utils/plot_utils.py
new file mode 100644
index 0000000..73fd8e9
--- /dev/null
+++ b/_utils/plot_utils.py
@@ -0,0 +1,203 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import seaborn as sns
+
+
+def plot_significant_values(df, value, th,  out_dir):
+    df = df.sort_values(by=[value])
+    labels = df.index
+    values = df[value].values
+    indices = np.where(values < th)
+
+    df= pd.DataFrame(data=values[indices], index=labels[indices])
+    file_path = os.path.join(out_dir, '{}_{}.xlsx'.format(value, th))
+    with pd.ExcelWriter(file_path) as writer:
+        df.to_excel(writer, sheet_name='Sheet1')
+
+
+
+    inv_values = 1 / values
+
+    fig, ax = plt.subplots(figsize=(20, 12))
+    my_cmap = plt.cm.get_cmap('YlGnBu')
+    colors = my_cmap(np.log(inv_values) / np.max(np.log(inv_values)))
+    rect1 = ax.bar(labels[indices], inv_values[indices], log=True, color=colors)
+    for i, rect in enumerate(rect1):
+        height = rect.get_height()
+        ax.annotate('{}'.format(format(values[i], '.2e')),
+                    xy=(rect.get_x() + (rect.get_width() / 2), height + 1),
+                    xytext=(0, 1),
+                    textcoords="offset points",
+                    ha='center', va='bottom')
+
+    # ax.plot([0, len(indices[0])], [th, th], "k--")
+    plt.xticks(rotation=15, ha='right')
+    # plt.subplots_adjust(bottom=0.5)
+    plt.ylim(0.1, 1.3*np.max(inv_values))
+    fig_path = os.path.join(out_dir, '{}_{}.png'.format(value,th))
+    plt.savefig(fig_path)
+    plt.close()
+
+
+def plot_mv(mv, th, out_dir):
+    th = int(th)
+
+    labels = mv.index
+    values = mv.values
+    indices = np.where(values > 0)
+    above_th = np.maximum(values - th, 0)
+    below_th = np.minimum(values, th)
+
+    fig, ax = plt.subplots(figsize=(20, 5))
+    rect1 = ax.bar(labels[indices], below_th[indices], log=True, color='g')
+    rect2 = ax.bar(labels[indices], above_th[indices], log=True, color='r',
+                   bottom=below_th[indices])
+
+    for rect_below, rect_above in zip(rect1, rect2):
+        height = (rect_below.get_height() + rect_above.get_height())
+        ax.annotate('{}'.format(height),
+                    xy=(rect_above.get_x() + (rect_above.get_width() / 2), height + 1),
+                    xytext=(0, 1),
+                    textcoords="offset points",
+                    ha='center', va='bottom')
+
+    ax.plot([0, len(indices[0])], [th, th], "k--")
+    plt.xticks(rotation=45, ha='right')
+    plt.subplots_adjust(bottom=0.5)
+    fig_path = os.path.join(out_dir, 'missing_values.png')
+    plt.savefig(fig_path)
+
+
+def plot_correlation_features(df, out_dir):
+    corr = df.corr()
+    mask = np.triu(np.ones_like(corr, dtype=bool))
+    f, ax = plt.subplots()
+    sns.heatmap(data=corr, mask=mask, center=0,
+                cmap=sns.diverging_palette(230, 20), square=True)
+
+    ax.margins(0.01)
+    plt.xticks(rotation=45, ha='right')
+    plt.tight_layout()
+    plt.savefig(out_dir)
+
+
+def plot_p_values(statistics):
+    a = 0
+
+
+def plot_distribution_categorical(df, variable, table, title='', out=False, out_dir='', p_values=[]):
+    #
+    present_values = sorted(list(np.unique(df[variable])))
+    expected_values = list(table[variable]['description'].keys())
+    variables_names = list(table[variable]['description'].values())
+
+    variables_names = [variables_names[count] for count, v in enumerate(expected_values) if v in present_values]
+
+    data = df[variable].value_counts().sort_index().values.tolist()
+    colors = plt.cm.get_cmap('Set3')(np.linspace(0., 1, num=len(variables_names)))
+
+    fig, axs = plt.subplots(figsize=(15, 6), nrows=1, ncols=2)
+
+    bars = axs[0].bar(variables_names, data, color=colors)
+    for bar in bars:
+        axs[0].annotate('{}'.format(bar.get_height()),
+                        xy=(bar.get_x() + (bar.get_width() / 2), bar.get_height() + 0.5),
+                        xytext=(0, 1),
+                        textcoords="offset points",
+                        ha='center', va='bottom'
+                        )
+    axs[0].tick_params(labelrotation=45)
+
+    def func(pct, allvals):
+        absolute = int(round(pct / 100. * np.sum(allvals)))
+        return "{:.1f}%\n({:d} patients)".format(pct, absolute)
+
+    wedges, texts, autotext = axs[1].pie(x=data,
+                                         autopct=lambda pct: func(pct, data),
+                                         textprops=dict(color='k'),
+                                         counterclock=False,
+                                         colors=colors)
+    y = np.asarray(data)
+    porcent = 100. * y / y.sum()
+    labels = ['{0} - {1:1.2f}%'.format(i, j) for i, j in zip(variables_names, porcent)]
+    axs[1].legend(wedges, labels,
+                  loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))
+
+    plt.setp(autotext, size=10)
+    fig.suptitle(title, size=20)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
+    if not out:
+        fig, ax = plt.subplots(2, 2, figsize=(12, 8))
+
+        labels = {'mRS90d': ['mRS 0', 'mRS 1', 'mRS 2', 'mRS 3', 'mRS 4', 'mRS 5', 'mRS 6'],
+                  'dmRS': ['Good outcome', 'Bad outcome (>2mRS)'],
+                  'shift_mRS': ['shift of 0', 'shift of 1', 'shift of 2', 'shift of 3',
+                                'shift of 4', 'shift of 5', 'shift of 6'],
+                  'mortality': ['Alive after 90d', 'Dead after 90 d']}
+
+        for count, output in enumerate(['mRS90d', 'shift_mRS', 'dmRS', 'mortality']):
+            x = int(np.floor(count / 2))
+            y = int(count % 2)
+            sns.countplot(ax=ax[x, y], x=output, hue=variable, data=df, palette="pastel")
+            ax[x, y].set_xticklabels(labels[output])
+            ax[x, y].get_legend().remove()
+            if count > 1:
+                p = p_values[count - 2]
+                color = 'k' if p > 0.05 else 'r'
+                ax[x, y].text(0.2, 0.95, 'P value: {:.6f}'.format(p_values[count - 2]),
+                              ha='center', va='center', transform=ax[x, y].transAxes, fontsize=9, color=color)
+
+        fig.legend(title=variable, labels=variables_names)
+        fig.suptitle(title, size=20)
+        plt.tight_layout()
+        fig_path = os.path.join(out_dir, '{0}_distribution_target.png'.format(variable))
+        plt.savefig(fig_path)
+    plt.close()
+
+
+def plot_distribution_numerical(df, variable, title='', out_dir='', p_values=[]):
+    fig, ax = plt.subplots(1, 2, figsize=(15, 6))
+    df[variable] = df[variable].astype('float32')
+    sns.histplot(ax=ax[0], data=df, x=variable, palette='pastel', kde=True)
+    # sns.kdeplot(ax=ax[0], data=df, x=variable, palette='pastel')
+
+    sns.boxplot(ax=ax[1], y=variable, data=df, palette="pastel")
+    sns.swarmplot(ax=ax[1], y=variable, color="k", size=3, data=df)
+    fig.suptitle(title)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
+
+    fig, ax = plt.subplots(2, 2, figsize=(12, 8))
+
+    labels = {'mRS90d': ['mRS 0', 'mRS 1', 'mRS 2', 'mRS 3', 'mRS 4', 'mRS 5', 'mRS 6'],
+              'dmRS': ['Good outcome', 'Bad outcome (>2mRS)'],
+              'shift_mRS': ['shift of 0', 'shift of 1', 'shift of 2', 'shift of 3',
+                            'shift of 4', 'shift of 5', 'shift of 6'],
+              'mortality': ['Alive after 90d', 'Dead after 90 d']}
+
+    for count, output in enumerate(['mRS90d', 'shift_mRS', 'dmRS', 'mortality']):
+        x = int(np.floor(count / 2))
+        y = int(count % 2)
+        sns.boxplot(ax=ax[x, y], x=output, y=variable, data=df, palette="pastel")
+        sns.swarmplot(ax=ax[x, y], x=output, y=variable, color="k", size=3, data=df)
+        # ax[x,y].legend(title=variable, labels= variables_names)
+        ax[x, y].set_xticklabels(labels[output])
+        # ax[x, y].get_legend().remove()
+        if count > 0:
+            p = p_values[count - 2]
+            color = 'k' if p > 0.05 else 'r'
+            ax[x, y].text(0.2, 0.95, 'P value: {:.6f}'.format(p_values[count - 2]),
+                          ha='center', va='center', transform=ax[x, y].transAxes, fontsize=9, color=color)
+
+    fig.suptitle(title, size=20)
+    plt.tight_layout()
+    fig_path = os.path.join(out_dir, '{0}_distribution_target.png'.format(variable))
+    plt.savefig(fig_path)
+    plt.close()
diff --git a/architectures/3D_CNN.py b/architectures/3D_CNN.py
new file mode 100644
index 0000000..21a40e9
--- /dev/null
+++ b/architectures/3D_CNN.py
@@ -0,0 +1,64 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+
+
+class _3D_CNN(nn.Module):
+    def __init__(self, num_output):
+        super(_3D_CNN, self).__init__()
+        self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)
+        self.conv4 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
+        self.conv5 = nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1)
+
+        self.BN1 = nn.BatchNorm3d(num_features=8)
+        self.BN2 = nn.BatchNorm3d(num_features=16)
+        self.BN3 = nn.BatchNorm3d(num_features=32)
+        self.BN4 = nn.BatchNorm3d(num_features=64)
+        self.BN5 = nn.BatchNorm3d(num_features=128)
+
+        self.pool1 = nn.AdaptiveAvgPool3d((64, 64, 64))
+        self.pool2 = nn.AdaptiveAvgPool3d((32, 32, 32))
+        self.pool3 = nn.AdaptiveAvgPool3d((16, 16, 16))
+        self.pool4 = nn.AdaptiveAvgPool3d((8,8,8))
+        self.pool5 = nn.AdaptiveAvgPool3d((4,4,4))
+
+        self.fc1 = nn.Linear(10244, 1300)
+        self.fc2 = nn.Linear(1300, 50)
+        self.fc3 = nn.Linear(50, num_output)
+
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight.data)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Conv3d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out',nonlinearity='relu')
+                nn.init.constant_(m.bias, 0)
+
+
+
+    def forward(self, x):
+
+        x = F.relu(self.BN1(self.conv1(x)))
+        x = self.pool1(x)
+
+        x = F.relu(self.BN2(self.conv2(x)))
+        x = self.pool2(x)
+
+        x = F.relu(self.BN3(self.conv3(x)))
+        x = self.pool3(x)
+
+        x = F.relu(self.BN4(self.conv4(x)))
+        x = self.pool4(x)
+
+        x = F.relu(self.BN5(self.conv5(x)))
+        x = self.pool5(x)
+
+        x = x.view(x.size(0), -1)
+
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = torch.log_softmax(self.fc3(x), dim=1)
+
+        return x
diff --git a/architectures/Edge_GCN.py b/architectures/Edge_GCN.py
new file mode 100644
index 0000000..bfa90f0
--- /dev/null
+++ b/architectures/Edge_GCN.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import ChebConv, EdgeConv
+from torch_geometric.utils import dropout_adj
+
+class Edge_GCN(nn.Module):
+    def __init__(self, nfeat, nclass, dropout=0):
+        super(Edge_GCN, self).__init__()
+
+        self.dropout = dropout
+
+        self.gc1 = EdgeConv(nn.Sequential(nn.Linear(nfeat, int(nfeat/2)), nn.ReLU()))
+        self.gc2 = EdgeConv(nn.Sequential(nn.Linear(int(nfeat/2), nclass), nn.ReLU()))
+
+
+    def forward(self, data, edge_index, weigths):
+        print(data.shape, edge_index.shape)
+        x = self.gc1(data, edge_index)
+        edge_index_drop, _ = dropout_adj(edge_index, p= self.dropout)
+        x = self.gc2(x, edge_index_drop)
+        x2= torch.log_softmax(x, dim=1)
+
+        return x2
+
diff --git a/architectures/FCN.py b/architectures/FCN.py
new file mode 100644
index 0000000..f2de6cb
--- /dev/null
+++ b/architectures/FCN.py
@@ -0,0 +1,52 @@
+import torch
+import torch.nn as nn
+
+
+class Basic_FCN(nn.Module):
+
+    def __init__(self, in_features, layers, out_features, dropout_rate):
+        super(Basic_FCN, self).__init__()
+
+        self.layers = []
+        self.n_layers = layers['number']
+        l = nn.Linear(in_features, layers['layer1'])
+        self.layers.extend([l])
+
+        if self.n_layers > 2:
+            for i in range(self.n_layers - 2):
+                name0 = 'layer{}'.format(i + 1)
+                name1 = 'layer{}'.format(i + 2)
+                l = nn.Linear(layers[name0], layers[name1])
+                self.layers.extend([l])
+
+        if self.n_layers >= 2:
+            name0 = 'layer{}'.format(self.n_layers - 1)
+            l = nn.Linear(layers[name0], out_features)
+            self.layers.extend([l])
+        self.layers = nn.ModuleList(self.layers)
+
+        self.dropout = nn.Dropout(p=dropout_rate)
+
+        # self.logsoftmax = nn.LogSoftmax(dim=1)
+
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight.data)
+                nn.init.constant_(m.bias.data, 0.03)
+
+    def forward(self, data):
+
+        if len(data.shape) == 1:
+            data = torch.reshape(data, (data.shape[0], 1))
+        x = torch.relu(self.layers[0](data))
+
+        if self.n_layers > 2:
+            for i in range(1, self.n_layers - 1):
+                x = torch.relu(self.layers[i](x))
+                x = self.dropout(x)
+        # x = torch.sigmoid(self.layers[self.n_layers - 1](x))
+        #
+        #x = self.layers[self.n_layers - 1](x)
+        x = self.layers[self.n_layers - 1](x)
+
+        return torch.log_softmax(x, dim=1)
diff --git a/architectures/GCN.py b/architectures/GCN.py
new file mode 100644
index 0000000..239fd36
--- /dev/null
+++ b/architectures/GCN.py
@@ -0,0 +1,38 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import ChebConv, EdgeConv
+from torch_geometric.utils import dropout_adj
+
+
+class GCN(nn.Module):
+    def __init__(self, nfeat, nclass, dropout=0):
+        super(GCN, self).__init__()
+
+        self.dropout = dropout
+
+        self.gc1 = ChebConv(nfeat, 200, 3)
+        self.gc2 = ChebConv(200, 50, 3)
+        self.gc3 = ChebConv(50, nclass, 3)
+        #self.gc1 = EdgeConv(nn.Sequential(nn.Linear(nfeat, nfeat/2), nn.ReLU()))
+        #self.gc2 = EdgeConv(nn.Sequential(nn.Linear(nfeat/2, nclass), nn.ReLU()))
+
+    def forward(self, data, edge_index, weigths):
+
+        #x = self.gc1(data, edge_index, weigths)
+        x = self.gc1(data, edge_index)
+        x = F.relu(x)
+        x = F.dropout(x, p=self.dropout)
+        #edge_index_drop, _ = dropout_adj(edge_index, p= self.dropout)
+        #x = self.gc2(x, edge_index, weigths)
+        x = self.gc2(x, edge_index)
+        x = F.dropout(x, p=self.dropout)
+        #edge_index_drop, _ = dropout_adj(edge_index, p=self.dropout)
+        x = F.relu(x)
+        #x = self.gc3(x, edge_index, weigths)
+        x = self.gc3(x, edge_index)
+        #x1 = F.sigmoid(x)
+        x2= torch.log_softmax(x, dim=1)
+
+        return x2
+
diff --git a/architectures/ML_algorithms.py b/architectures/ML_algorithms.py
new file mode 100644
index 0000000..075f073
--- /dev/null
+++ b/architectures/ML_algorithms.py
@@ -0,0 +1,146 @@
+from Metrics.ClassificationMetrics import ClassificationMetrics
+
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+from sklearn.neural_network import MLPClassifier
+import xgboost as xgb
+from scipy.stats import uniform, randint
+
+def apply_LR(x_train, x_val, x_test, y_train, y_val, y_test):
+    # Number of classes
+    c = np.unique(y_train).shape[0]
+
+    # Parameters for hyperparamenter optimization
+    solvers = ['newton-cg', 'liblinear']
+    penalty = ['l2']
+    c_values = [1000, 100, 10]
+
+    # Define grid search
+    grid = dict(solver=solvers, penalty=penalty, C=c_values)
+    scoring = 'roc_auc' if c < 3 else 'f1_micro'
+    multiclass = 'auto' if c < 3 else 'ovr'
+    try:
+        grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=grid, n_jobs=-1, cv=5,
+                                   scoring=scoring, error_score=0)
+        grid_result = grid_search.fit(x_val, y_val)
+
+        # Summarize results
+        print("Best dictionaries: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+
+        # Use selected hyperparameters to train the model with the best dictionaries
+        clf = LogisticRegression(C=grid_result.best_params_['C'], solver=grid_result.best_params_['solver'],
+                                 random_state=0, max_iter=500, multi_class='ovr').fit(x_train, y_train)
+    except:
+        clf = LogisticRegression(random_state=0, max_iter=500, multi_class='ovr').fit(x_train, y_train)
+
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+
+def apply_random_forest(x_train, x_val, x_test, y_train, y_val, y_test, save=False):
+    # Parameters for hyperparamenter optimization
+    # Number of trees in random forest
+    n_estimators = [2, 5, 10, 100]
+    # Number of features to consider at every split
+
+    max_features = ['log2', 'sqrt'] + [a for a in (2,5,10, 20) if a<=x_train.shape[1]]
+
+    # Maximum number of levels in tree
+    max_depth = [5, 10, None]
+    bootstrap = [True, False]
+    criterion = ['gini', 'entropy']
+
+    # Define grid search
+    random_grid = {'n_estimators': n_estimators,
+                   'max_features': max_features,
+                   'max_depth': max_depth,
+                   'criterion': criterion,
+                   'bootstrap': bootstrap}
+
+    rf = RandomForestClassifier(random_state=True)
+    grid_search = GridSearchCV(estimator=rf, param_grid=random_grid, n_jobs=-1, cv=2,
+                               scoring='roc_auc', error_score=0)
+    grid_result = grid_search.fit(x_val, y_val)
+
+    # Summarize results
+    print("Best dictionaries: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+
+    # Use selected hyperparameters to train the model with the best dictionaries
+    clf = rf.fit(x_train, y_train)
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+
+def apply_mlp(x_train, x_val, x_test, y_train, y_val, y_test):
+    # Number of classes
+
+    mlp_gs = MLPClassifier(max_iter=300, verbose=False)
+    parameter_space = {
+        #'hidden_layer_sizes': [(10, 30, 10), (20,), (50, 20), (20, 50, 20)],
+        'hidden_layer_sizes': [ (20,), (64, 32, 16), (128, 32, 16, 8), (40, 20)],
+        'activation': ['relu'],
+        'solver': ['sgd', 'adam'],
+        'alpha': [0.0001, 0.05],
+        'learning_rate': ['constant', 'adaptive']
+    }
+
+    clf = GridSearchCV(mlp_gs, param_grid=parameter_space, n_jobs=-1, cv=2,
+                       scoring='roc_auc', error_score=0)
+    clf.fit(x_val, y_val)  # X is train samples and y is the corresponding labels
+    print('Best %f using %s '% (clf.best_score_, clf.best_params_))
+
+    # Use selected hyperparameters to train the model with the best dictionaries
+    clf = mlp_gs.fit(x_train, y_train)
+    metrics_tensor = compute_metrics(clf, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)])
+
+    return clf, metrics_tensor
+
+def apply_xgbBoost(x_train, x_val, x_test, y_train, y_val, y_test):
+
+    params = {
+        'gamma': [0.5, 1, 1.5, 2,5],
+        'subsample': [0.6, 0.8, 1.0],
+        'colsample_bytree': [0.4, 0.6, 0.8, 1.0],
+        'max_depth': [2, 4, 6],
+
+
+    }
+    xgb_model = xgb.XGBClassifier( random_state=4, use_label_encoder=False, eval_metric='mlogloss')
+    grid_search = GridSearchCV(estimator=xgb_model, param_grid=params, n_jobs=-1, cv=2,
+                               scoring='roc_auc', error_score=0)
+    clf = grid_search.fit(x_val, y_val)
+    print('Best %f using %s ' % (clf.best_score_, clf.best_params_))
+    best_model = xgb_model.fit(x_train, y_train)
+
+    metrics_tensor = []
+    metrics_tensor = compute_metrics(best_model, splits=[(x_train, y_train), (x_val, y_val), (x_test, y_test)] )
+    splits = [(x_train, y_train), (x_val, y_val), (x_test, y_test)]
+    """for x, y in splits:
+        prob = xgb_model.predict_proba(x)
+        output_one_hot = np.zeros((y.shape[0], len(np.unique(y))))
+        for i in range(output_one_hot.shape[0]):
+            output_one_hot[i, y[i]] = 1
+        CM = ClassificationMetrics(classes=2)
+        metrics = CM.compute_metrics(prob.squeeze(), output_one_hot)
+        metrics_tensor.extend([metrics])"""
+
+    return best_model, metrics_tensor
+
+
+
+def compute_metrics(clf, splits):
+    metrics_tensor = []
+    for x, y in splits:
+        prob = clf.predict_proba(x)
+        output_one_hot = np.zeros((y.shape[0], len(np.unique(y))))
+        for i in range(output_one_hot.shape[0]):
+            output_one_hot[i, y[i]] = 1
+        CM = ClassificationMetrics(classes=2)
+        metrics = CM.compute_metrics(prob.squeeze(), output_one_hot)
+        metrics_tensor.extend([metrics])
+
+    return metrics_tensor
diff --git a/architectures/__pycache__/Edge_GCN.cpython-37.pyc b/architectures/__pycache__/Edge_GCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e84e8b4313f41fe71b4b9ef5bca63c59c83a4320
GIT binary patch
literal 1235
zcmZ?b<>g{vU|{gPH#yOinStRkhy%kc3=9ko3=9m#5ey6rDGVu$ISf${nlYCtiir`z
zX3k||Vqj#*Wr<=1i!kM|<+4Yy=W;}GfceZhoVi?4T#O9v3@I!rtSt;Ftf@>{+|A5U
zJSmL944P~&LH7A+GTq{E&PYvi&d)2m#o?Ngo(f`VGT!1&DatR%FD;2rOvy?HDTQH>
z^FZukH3kNTRE8+V6ox3K6t;GTG{zLB6y_GrC}xO-Q7r8YEDTYs!3>)05ToPWo&Aa!
z85kInVTOQcP6h@BW{`g~7#J8z7)lsx7@C<DFfC+YWT;`TVTflgVJTr<z*fS#fW3w>
zg;9i|gabrNGc+?UWGZ1@z`2lNAyYGB4MRK^LM1{?4HKFw?i7Y#22CcvmrM){44OQ*
zSc^*wQj3aM85kIDam2@G=4F<|$KPUy`o9R|v|CJhdAGQNQwvK|^GY%kb8fNuWagzN
z7Tsdb%qzLY5|ru_dW$(d*$_k+X|mj6DNfBvyTzK9mYP^{i!CoXC$YG=h!bQT<1Nnk
zcrZUcekH>%e`l+h(Bjmh;+WjTqLR$K<V3@mfDD(IfTH}Y)Z~)l7)Q_e;F6;J?9>=X
z&*1o=)byg%;^LUZqU4OslGNmq(xTMj7*MQ%f>y7fvWSC$fdPbzK?DOM9}^ps7Gsql
zTnWs7@E8E81I2<fC<eU1F;K&p#aP3X#n{Zm$WX(ufT@NN6o$1dCCoKUH4IrSDU7{L
zF-*0rwQMyE3s`GdYFHOC)v}bZWwF<=fRaEjQ!RT9doY70i=QUrEyjYEpeWa5yv3Z7
znXAchi?yH#6l$!+8Hoj{Mf?m53`HOZfWxOq02B+XCHY0k8MnA|^3&ss^V3Rl6Du@1
ziUdJYEGda4iMO~?Q_@r8GxJhXD{irur)H*?WE9_GthmLG5RV7N-z~=YTTB&3MWP@}
zz=Rmc5cahEqVmL|6p)F<JfOtP$j8XTD8$Ia$j4MA01qsvGEGKsqSa&rC6Bzs+|>B^
zTU_z+x%nxjIUqJqe0*VPVh%(G90NtFAZvJ`F`b;6lM^40aD*mfkqF2^Ah&>%9XlxO
zL1Dnb$j1p5(c~?X0U5yo4hg-yydqF}5hV=c>ZO(DC6{F8=OyOc;zD9Gy4?~2tBFrf
z&Cg9ODauR+X$GZqNJ17xmn|*H%qa#(z%4dku#dnFK)4pd0=e}ThYiHfcAy9?2Dyj>
K9BcxNJWK$4SU1}M

literal 0
HcmV?d00001

diff --git a/architectures/__pycache__/FCN.cpython-37.pyc b/architectures/__pycache__/FCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48d5168787abc3f17c28ddf1a6e4d22b47176aad
GIT binary patch
literal 1531
zcmZ?b<>g{vU|>+>nwPkRje+4Yhy%kc3=9ko3=9m#S_}*fDGVu$ISf${nlYCtiiwfI
zogsxOg}H?xg*lZei@BLOiY0|Hm_d`}CCCK7WDp6%%uq(L3IhW}Dnk@w3PTiAJ3|^{
z3S$aW3r7@l3Udle3qur3I|B<t6l*YpChILur^MpS<ajq{zhqXZc93J)7#J8p#ub||
zGBA`dlrYvXG&3z=TFAi2kj_xU5YJr0l)@+pVzHF4E?}!+tYJ!Fl4MxORLfe!kj1=!
zy@oZNaUmlkLk&X~OA2!~OVON)kP;3EPlBPD(S;$_sg^B;WdUamTMDZ<!$QVdb|{Y-
z%;Nx?%T&WI$xy>o!-380!mx0d`K(1(z`8-Uu}XkVL(<QT>8=vi1zaGPE@TAxri3+%
zyM`r&trx`Osj#bISirlG!G$5#sFt&YuZFXPHH)p8spwY;YZm_kff~*%!7L$=@4$TF
z8qO?{ETI(kg-nbLg}=fXQW%06G&%fkvE`NKCZ!hLV#`UaOf52e$;80GP{j`BRM);_
zWME*>WW2>#kiJ8%^>yHmdG?y3w^)ly3sQ@UKq_x>#K&jmWtPOpLk%dt#gP{e)?Ivy
zDKGC9n@?t5YGTnXw$zG})V!2itVM}=>8ZEa((;RP6H9J!`R1pT=A`;$7MI*&cPYv*
z$S*Cq#hwCV<bq|3Z*gT7XXX``B<3Zj-eLjScZ<Iwu`DySD84i=6J%8UEw=L1%=C<s
zTP!JwC5g8<lk>sqOX6>_BxNQRYjWLUDNfBvyTzTE7oV1zSW;S)T3iH5M7MbIOH1HF
zw|Jm-#}_4*q~2o8xy6_n#hRCxn`&?iOc~x{%++MP#Tg%;oRe5w93Q`u;g^@QRZM7c
zYEf}aZemeMW?pilVN5`VOH4pfepYI7NpXy$XMAu;QGRx6jH738d{An7QEG8<Okz=T
zMrKKBGT1FKZq9ys1(ijd3=9nHu&l<w$j8LS$i*nY$j8LRD8k6c$j2zcD8vL35diU+
z_*nQDc^Cy4tAyZLL=P4t$)E@VWfxHP1?3xOkhyahKzXHxF^jR8F@-UksYs=SX#sN$
zBS-|4J5m_am=-eCGL|qcV5woqVg-rSFg7zTWUOVXVaQ?w=aZ@mvl8|cMlhdIf}xqo
zg&|g{mKmm}h8dI}V0spC)G&c`7lwdy0x0jXr7%h`K-APSl`v&-f^upKOD_{ByZaS^
zGIkLt=@o&Tt;u+cIVCe!lj|09PHNsQ*5ZuBg4A2ACHY0k8MoMrQXzbnqSTyHaEuml
zGcYg|@q-9q1_p*(+&TH_@x}RRCAo<enk+>!APGs31Y<>!B8a03C)7Z~>}mN$<%vZp
zAmL&mPyjOVF^VyYFmZuMMm`o1rYb>1a6?sVvVrmfD5b>5-{Ojoha}zj_**>j@r9*{
zIS?6e{1)khtl@<wq2$z@ocQ=6kZoW>ld%X?G!}u}0?uQgFenDaAqNwW99Tq?wFp!a
q-{Js=fnHu-5f8{jHb`ayD*+pYM1V}Y#bE<+l^rN-Ax1LtFarS46^2{@

literal 0
HcmV?d00001

diff --git a/architectures/__pycache__/GCN.cpython-37.pyc b/architectures/__pycache__/GCN.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fcd4d2fcac2fd81b246c8c8cdd10b29546684431
GIT binary patch
literal 1173
zcmZ?b<>g{vU|>j4nU~1R%E0g##DQTJ1_lNP1_p-W2nGg*6owSW9EK<e&6vv+#l#3<
zGv_ifF)%XZvP7|hMVNBfa@nKUb2*|oz<lN$&Rni2E=C4-h7^_*))s~o)>Nh}?q=pF
zo)pGl22Hk?Ap86@nQn17XQU=M=jWB(;&4q#PX)0w8E<i?6y+D>mzKmQrer09l)^B`
zc_4PN8Uq7EDnk@w3PTiA3R^ov8e<Am3Udo*6f?xaD3*2x7KSL+U<OV0Tg>jxenpH7
z3=GLI{U92InL%E4Vqjn>VJKm&VQ6Mrz_gHok)ejUh9RE0gr$Ztg)xPx8N_2PVM$?3
zVTSV9N?1~uYnY%s_7sL-22B>fmnRq)7+!+Rd}+kMz@W)-i?z74AhoE7m4Sib7Ds%1
zW?p7VeEcnTsP~GPK;q2l$%eO>)02%rgs~>eEtcZcoU~i4d1<MMCAZk}l5-M^i;K8H
z1~T5_jE@KN<KtH{{PJ?PiU}=FEh>)5O)M(O%u7x*j0wnai3upm&q_@$DUNaUj1Mj;
z%Fj-Xar6w14@ylhN-ZvqNi0gv$Sg@sE-5WaEsk+__R}k<ECMA7PEcro7z~VjOl(Yi
zjC_n$JRo_P%i-b2#=yV;3O{F%jw}WSh8l(i3^j~3ObeN6SxOieFx9XuWCXFYm}^*S
z7_yjC7<-v&S!x&-u+*@?RH4ctbk#5{V1=t<%VMu#0mW@EQ!Q%^C_Y*IG#PI(7Q6&S
zs3zkr=9J7_O^zZSP*5_u-C`+9%_#+kRuL})149uXNSw7KzbHB57I#j5dVFzyT1jqV
zg(h1OKS+utC9x#&7FTLYdTM-TUP@|36nlAUW_n3R@h!%RTTB&3MZzFsz=Q}$1AAJ2
zQF&rf3W!rI1WJs|Jd9k70?a&&0!&;?JWPB{RotLJgR0YH1SdUBHc$e{OUzA;kH5tg
zAD^3_Qknx|^Tfv&mL}#vWWdo-qyVyq7aG&asX00E@d#IFG8TbsEdtpOPFNhEH~>Wk
z2O}RhSVWVzND5>G2iV(sd3m>(^75jDVLZLG(!Au7%>2B>oLgK7c98@~j}TZ@e0pkr
zZfZ$UX0l#h9ypm6fs*wtQFPhTlFXdqB9QI3*nGiW0=od=TnG!~*jpSn5Kr5IVy+mJ
P&Nvu(7=;)G7<rfh-B22W

literal 0
HcmV?d00001

diff --git a/architectures/__pycache__/ML_algorithms.cpython-37.pyc b/architectures/__pycache__/ML_algorithms.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbb131a9cdf60971f8ccc94d907cf2f5a60ecd32
GIT binary patch
literal 4090
zcmZ?b<>g{vU|=|wIWMtLoPpsnhy%mS3=9ko3=9m#T?`BiDGVu$ISjdsQH+crHd78$
z6mtqg3UdxiE^8DkSd1lyEtfrt9n5CU;mGBT;smqVa=3E2qqxCrrW~Fq-V}xu_8h)k
z{wV%jfhYk+h7^tz&K8C!!4%;X?i8LD#wei_-W0wThA80_{uF^0hA5E~!4#nuhA7b#
zreFq5k(VGBYBJsub<RmFF3wELOinDx%+K>pEh)-OF80%8yd@Ttn3s~D>y}@XT3iBC
zm0G09cuUA9KRvU!Br`cEHN7acxHvOEPm}Q$k9$#ON^ojoQF4ZJm?q;bUSFR8ByCK$
z*h}*=)AEaQZ?P98=A~rjl_ax)9KpcAAjrVLz`?-4;0y{W21W*k5{3ngHOvbcvzSsC
zB^hejQkYVh)0t}7Q&`d&YdKO_Q`l12)0t~IOPFieYdC5+Q#g8=Yq@IJQaH02ikwoo
z@|aS%Yq_EPpcI}wrWD><p2~z0mJ-%xh8nIEz7+l%?i9upfnK&+-WuKoY&DEEEDM=x
z`AXPR1Z()RIBNK^IMbNK7@8Rw8A@0|yc8ix1~4rw$&kXBB9g+JBHGKkfUSn1hIt`V
zEq^#eo)RMi2$rxy<;8lL5%Q4?c}x)uC0sT9ARTE;ARE(|YM5$R)0onkQ^b3jY6WWe
zYXs6j@gw0^#K^$FP{o;-T3(W$r<<I9i!&!PDJL^8HL>UxQ;t#Q3uXp}mnjSk3@<^c
zPm}o;TXB9)S!&TO_JY*B#GI1KTa3;xLCNnHdr^LJd}3*G6i1q2d~RlPQT{EK#L|-d
zTg>@oMKAyV|Ns9bD+2?ACfhB})Z&uN+{BXnqFY=AiA9OI@##gGDYw}2;<NITif=I`
zm)&A7PR=jN%uBz;omy0sUlb4GrdG*0r52Ycq+}+8(okYiW@@pOf@+#VX>n#=x`JwP
z5kCV1LlG|n14HH)Mg|5=R)oMU9#CS)&y6oGNi0dd#gUs>5uaI-T6Bv$w=}0DGd>xV
zJT;k%WI!g!f(SW~TN!V$6&K`WmK1Ap-(t!uxW!hQmswbvdW*F<Be5X$77NHJMIc`l
zu`n<&++t44EV;#6P?VWha*I1DwYVf6?C$tmJYWvkuf_4V_>=Q<3rb5;<8vX|KvVD*
zdqsRnQDSD^E!K+ovc#NQY!&e(sl_F?*ehY;l@Reth&W^NEp|vm6c_O^Ffc@MB!lEj
zQ;To0fC95f3gjLxu;;*jyTuI>jxPph94MzKwYW5=<Q8*sPTDO#s9WMoQuB)Qi&ioe
znKLjj{0ecliU}=FEh>)5O)M(O%u7x*j0wnai3upm&q_@$DUNaUj1Mj;%Fj-Xar6w1
zhvw86Q0~kuNlh*(ElMqp@%4#M%t_BL$}GvqE!Hcjyv31NP>@p@?-Rrg%84qV?8zj+
z%)!XU$i>XZD8MMd$i&FQ$i>LREW#|rD8b0aD8nSjRAkA(z~Gk54pPp*z#s(8&7j0r
zd;nDLF@o}M3QIamEjuXhrZcrOq%o$jr*O1zq;R@0G&71b)N+8;Fo1G(3RenuI#VrI
z3Qr1eI#Vq-B#+l{gYxYh)>@tth7_(8{$9pf-V(+d-Wr}1ffPni>6Rkg%T~)*!?ysO
zw?KKaggHf|hChp?hChoHmYZvM7cg}|@~l7!Ta5sSjgcEgdzoqlYXoWp!MRb)?<Er`
z>4|~}9uNTvpIa<B`RPWtSc(gaN|Hgj736=AI#9Y31_dA}sirg3FvN=0GS)D5Fk~^L
zFi0|FGZwMbFivDDWC>=_WPAzIsmWNR1WMLSx0v({ZZRfmG8SoqtN}%Jkp(EexomPW
zi%UT1)6NiNG{_z{hAMSPghBIA8n~?1v&qR%PRuE`(?jUdWGS)&83^~mOA!VJh7h+~
zEa{nfnYY+e^Gb^H3o12PZ}H^C!%KnUTRforn3kFdPA|7OLA;dIf|86|oXJI?Y?_&$
zcZ)MAKfk27q$sgKlM#|nL1q_$3UzQHrpZ<$2C`cmL`Z-LNe}_b2u0GMLXJS`p~+Yz
z4+<RiB2bEg<Z4jTEK&s-qXr_>L4*cKkq{){v_JydAVLR3=z<76P|jm1PAp3;aslyO
zL4+HK@Bk6+pxnw04M|X*yv0<MR%8s4G64~$Ai@kpfMNv}L!ijHC5$B&K$aJSoXNn<
z!z{(j#mK?P!OX-6fgDU+j35ZgM?8#jOhw)d3=Ht<5LBSUtHT)#pz1IMoP~OsYS~g)
zKv^h-J)I?mBb_mYGleUi2~_2$Gl44nbfy&kIjptpC5$y}HS8$@kZh46(#uxMQNsbt
z7>o=h%qgNZ99b+i97q|1Z2=RgYFfxt%TdBs!vSJrWDK!hrdm!=l?%=o;(nPrj0_BJ
znoLEYviTN!S!z*IesQWMW04~$Hke<68b2=;K-H80NPy*K11JRjK!iU?6y#4$rk9|&
z18cd(Qk0rg8pT|ko^p#NF(olKUDsLW^&>MW`}A2eL13l*Eso^;yyB9?ypmfSi7AN%
zC7ETZnygVm8JQ_5sd@1^iIu5E@x_@{sl~Ur5|c|Z%fKxgaP@PGH8H0kBk>k*PHJLN
zUS?i;d{JUaY7scSg7XI=(XtldOs|6A+779HQ$Qq9P?Ls066M05M5EYpa|+_qi=+6#
z)lzB+$TtOv$*D!CDG*!_fD4CPoRIXFn^OP^*kVx50;M`GMixdkMm9zsW^h_#6k!IZ
zwJ7AY2C9@naR6?5?LjK6Q&`i%&8!rTbmkOJP=hOlBb_CM1zb2Yr|`^Ssbwo+$YM<4
z0;RhY{$A!<_7bKV_8PVnffN=<x`U=VW>8aSAyX}92}_Dd4QCc>4QCb`yf|iGzzj-h
z3z=%U(iv*GOW13;Kzxjp2P%@evB}i()Ns{sgVUp!UplA|W?*<=Uu4U`zyRg_ut$`B
z>BeC27?z%1+Cvq6fQj6+e+g<dyaYw1Cd)0>^u*lU#9N%jrAft!xdl0?Q3A>NIS^)i
zQe{a|YHAUv6mg5<$j!-5&&e+?hPGFt1WSuk<8u;|Qgh-{^OEyZQj4OvQ_B)_pzRAt
zx<e#6LgggxbjNXvxgtF&iZ{aD3DM*xkPLb8C&OE8;N({v#aWS_6rY=)l9~f43_+>z
z78khP1Q7+7iJ%f1k|IH+BP3b!L6T)fdXiIqesM_}sNMp#;~1EE7&#cZ82MNPAf+Iq
z2qO;@3!@yP7*|meD0PBrH&8^Pl!Oz&5n9Vs(N@Fg!Vp^*!&J*$%TmLz092VX*0Po`
zE?}x*$zld|8A@167#FbCuq<S3W=vyR$XLr(p;p3H!v>LU2I&{6Wlv!Om6A0qHSCfM
zX-qZ@g<jzdCG088DJ;E=;PR5AhIIkwLWUYPQ0lE=TEJDqna;S7k&&U0E1aPQ)KF)I
zG}OVV4V=g{8E>(J8+NInwkdBxQ7Wh{8edS9pOjbxDhO|}R;3o@7lSMOTg*ABc}0q#
ziix!-F)ux}2;}o3P-%FJy|}P6HMJ`B7F%jXNorn-CKsdt0EaXv%x^JP++wV}#R4)q
ziZ8#kq@c7UK0hxtJ|n;67Gq`<le6zFc4+GnEo#6vw1eUURP-@0vM};7axuy=iZF^W
za)H`;Okfq7jBc7@w`6@GtzkV_(n9Gff?a%zHLo<cpc3Lgf#Pg%@v4`aSDc!gl#>ds
zx1&U0!jMi3G)aMST9i0U3Y?(gi&Jw_!959Z3XT$k$>yb&7A5Ay=cSgE=NDxcf%0b*
zdqsK@C~+0Vg94GeI61SRQV-OZC@uoEM~gsZ8@PQ6u40QoWgj?+f&GURDu`6eVUwGm
iQks)$2WprVgW4-ROdO0nP{_e3z{H`zrOYA0p$GuN6Z-=I

literal 0
HcmV?d00001

diff --git a/dictionaries/dictionary_modalities.yml b/dictionaries/dictionary_modalities.yml
new file mode 100644
index 0000000..b22e741
--- /dev/null
+++ b/dictionaries/dictionary_modalities.yml
@@ -0,0 +1,764 @@
+#### Data divided into
+# 0. Output
+# 1.Meta data
+# 2.Clinical data
+# 3.NCCT data
+# 4.CTP data
+# 5.CTA data
+# 6.Treatment
+# 7.Treatment output
+# 7.Control CT data
+# 8.Temporal
+
+# Before-Imaging: Metadata, NIHSS, Unkown_Onset, Time_Onset_to_Admission
+# Pre-treatment: Metadata, NCCT, CTP, CTA (+NIHSS, Time_Onset_to_Admission, Time_CT_to_Angio)
+# Post-treatment: Metadata, NCCT, CTP, CTA, Treatment, Treatment out (+NIHSS, Time_Onset_to_Admission, Time_CT_to_Angio, Time_Puncture_to_Recan )
+# Post-treatment 24 h: Metadata, NCCT, CTP, CTA, Treatment, Treatment out, Control CT (+NIHSS)
+
+Ouput:
+
+  mRS90d:
+    type: 'ord'
+    info: 'Functional outcome at 90 days'
+    categories: 7
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+      6: 'mRS 6'
+
+  dmRS:
+    type: 'cat'
+    info: 'Binary functional outcome at 90 days'
+    categories: 2
+    description:
+        0: 'Good outcome'
+        1: 'Bad outcome'
+
+  mortality:
+    type: 'cat'
+    info: 'Mortality at 90 days'
+    categories: 2
+    description:
+        0: 'No'
+        1: 'Yes'
+
+  shift_mRS:
+    type: 'ord'
+    info: 'Shift in mRS'
+    categories: 7
+    description:
+
+        0: 'Shift of 0'
+        1: 'Shift of 1'
+        2: 'Shift of 2'
+        3: 'Shift of 3'
+        4: 'Shift of 4'
+        5: 'Shift of 5'
+        6: 'Shift of 6'
+
+Metadata:
+
+  Sex:
+    type: 'cat'
+    info: 'Patient sex'
+    categories: 2
+    description:
+      0: 'man'
+      1: 'woman'
+
+  Age:
+    type: 'int'
+    info: 'Patient age'
+
+Clinical:
+
+  NIHSS:
+    type: 'int'
+    info: 'National Institute of Health Stroke Scale'
+    categories: 43
+    description: None
+
+  pre-mRS:
+    type: 'ord'
+    info: 'mRS previous to stroke'
+    categories: 6
+    description:
+        0: 'mRS 0'
+        1: 'mRS 1'
+        2: 'mRS 2'
+        3: 'mRS 3'
+        4: 'mRS 4'
+        5: 'mRS 5'
+
+  aHT:
+    type: 'cat'
+    info: 'arterial hypertension'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  HLP:
+    type: 'cat'
+    info: 'hyperlipidemia'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+  DM:
+    type: 'cat'
+    info: 'Diabetes Mellitus'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  aFib:
+    type: 'cat'
+    info: 'atrial fibrillation'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  s.p. stroke:
+    type: 'cat'
+    info: 'previous stroke'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH:
+    type: 'cat'
+    info: 'anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH mono:
+    type: 'cat'
+    info: ' mono anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH duo:
+    type: 'cat'
+    info: 'dual anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  OAK:
+    type: 'cat'
+    info: 'oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  VKA:
+    type: 'cat'
+    info: 'vitamin K antagonist'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+  DOAC:
+    type: 'cat'
+    info: 'direct oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+NCCT:
+  C_br:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  INS_br:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  L_br:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+  
+  e-ASPECTS:
+    type: 'ord'
+    info: 'electronic ASPECTS'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+
+  ASPECTS oberfl_tief:
+    type: 'cat'
+    info: 'depth of regions affected'
+    categories: 4
+    description:
+      0: 'superficial'
+      1: 'deep'
+      2: 'both'
+      3: 'ASPECTS 10'
+
+  pc-ASPECTS:
+    type: 'int'
+    info: 'posterior ASPECTS'
+
+
+  Volume e-ASPECTS:
+    type: 'float'
+    info: 'automatic volume on e-ASPECTS'
+
+  ICV krank:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, ipsilateral (HU)'
+
+  IVC gesund:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, contralateral (HU)'
+
+  ICV Index:
+    type: 'float'
+    info: 'ICV krank/ICV gesund'
+
+  Vessel Occlusion Location Admission:
+    type: 'cat'
+    info: 'Location of vessel occlusion at admission'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+  Vessel Occlusion Side Admission:
+    type: 'cat'
+    info: 'side of occlussion at admission'
+    categories: 3
+    description:
+      1: 'left'
+      2: 'right'
+      3: 'both'
+
+  Tan Score:
+    type: 'ord'
+    info: 'collaterals score'
+    categories: 4
+    description:
+        0: '0%'
+        1: '0-50%'
+        2: '50-100%'
+        3: '100%'
+
+  Coves Score:
+    type: 'ord'
+    info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+                  assigned to three veins in the affected hemisphere'
+    categories: 7
+    description:
+        0: 'Coves Score 0'
+        1: 'Coves Score 1'
+        2: 'Coves Score 2'
+        3: 'Coves Score 3'
+        4: 'Coves Score 4'
+        5: 'Coves Score 5'
+        6: 'Coves Score 6'
+
+
+  BATMAN:
+    type: 'int'
+    info: 'Basilar artery on CTA score'
+
+
+  Clot Burden Score:
+    type: 'ord'
+    info: 'Evaluates the extent of ipsilateral thrombus'
+    categories: 11
+    description:
+        0: 'Clot Burden Score 0'
+        1: 'Clot Burden Score 1'
+        2: 'Clot Burden Score 2'
+        3: 'Clot Burden Score 3'
+        4: 'Clot Burden Score 4'
+        5: 'Clot Burden Score 5'
+        6: 'Clot Burden Score 6'
+        7: 'Clot Burden Score 7'
+        8: 'Clot Burden Score 8'
+        9: 'Clot Burden Score 9'
+        10: 'Clot Burden Score 10'
+
+  Vessel Stenosis:
+    type: 'int'
+    info: 'Presence of arterial stenosis'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Vessel Stenosis Location:
+    type: 'cat'
+    info: 'Location of arterial stenosis'
+    categories: 8
+    description:
+      0: 'No stenosis'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'MCA'
+      4: 'PCA'
+      5: 'VA'
+      6: 'BA'
+      7: 'ACA'
+
+
+  Arterial Dissection:
+    type: 'cat'
+    info: 'Presence of arterial dissection'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Arterial Dissection Location:
+    type: 'cat'
+    info: 'Location of arterial dissection'
+    categories: 4
+    description:
+
+      0: 'No dissection'
+      1: 'ACI'
+      2: 'VA'
+      3: 'other'
+
+CTP:
+  'CBF_lower30_volume':
+    type: 'int'
+    info: 'Volume in which the CBF is lower that 30% of the CBF of the contraleteral side -- core --'
+  'Tmax_greater6s_volume':
+    type: 'int'
+    info: 'Volume in which the Tmax is greater than 6s -- penumbra + core --'
+  Mismatch Volume:
+    type: 'int'
+    info: 'penumbra volume'
+  Inverse Mismatch Ratio:
+    type: 'float'
+    info: 'volume core/total volume'
+  Hypoperfusion Index:
+    type: 'float'
+    info: 'Volume Tmax>10s/volume Tmax>6s'
+  CBV Index:
+    type: 'float'
+    info: 'Unknown'
+
+CTA:
+
+  Vessel Occlusion CTA:
+
+    type: 'cat'
+    info: 'Location of vessel occlusion on CTA'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+Treatment:
+
+ Stenting:
+  type: 'cat'
+  info: 'Use of stenting'
+  categories: 2
+  description:
+      0: 'no'
+      1: 'yes'
+
+ Thrombectomy:
+   type: 'cat'
+   info: 'Use of thrombectomy'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Device:
+  type: 'cat'
+  info: 'Device employed for thrombectomy'
+  categories: 3
+  description:
+
+      1: 'Stent retriever'
+      2: 'Aspiration'
+      3: 'Both'
+
+ PTA:
+   type: 'cat'
+   info: 'Use of percutaneous transluminal angioplasty'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Number Maneuver:
+   type: 'int'
+   info: 'number of maneuver that were necessary'
+
+ Lysis i.a.:
+   type: 'cat'
+   info: 'Use of intraarterial lysis'
+   categories: 2
+   description:
+     0: 'no'
+     1: 'yes'
+
+ Lysis quantity:
+   type: 'int'
+   info: 'quantity of lysis used'
+
+Treatment_out:
+
+ Frustrated Recanalization:
+
+   type: 'cat'
+   info: 'Whether the recanalization was frustrated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion after Recan.:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ TICI:
+   type: 'cat'
+   info: 'Reperfusion score'
+   categories: 5
+   description:
+       0: '0, No reperfusion'
+       1: '1, Minimal reperfusion'
+       2: '2a, Partial reperfusion (<50%)'
+       3: '2b Partial reperfusion(>50%)'
+       4: '3, Total reperfusion'
+
+
+ SAE:
+   type: 'cat'
+   info: 'Subarachnoid hemorrhage encephalitis'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion new SupplyArea:
+   type: 'cat'
+   info: 'Vessel occlusion in a new Supply Area'
+   categories: 2
+   description:
+       0: 'no'
+       1: 'yes'
+
+ Vessel Occlusion new SupplyArea Location:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization in a new supply area'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ Vessel Occlusion new SupplyArea Treatment:
+   type: 'cat'
+   info: 'Whether new vessel occlusion was treated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Infarct new SupplyArea:
+   type: 'cat'
+   info: 'Whether there is an infarct in the new supply area'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+Control CT:
+  Lacunar Infarct:
+    type: 'cat'
+    info: 'Presence of lacunar infarct'
+    categories: 3
+    description:
+        0: 'no infarct'
+        1: 'Lacunar infarct'
+        2: 'not a lacunar infarct'
+
+  Infarct Volume ControlCT:
+      type: 'int'
+      info: 'Volume in ml/cm³ of the infarct in the Control CT'
+
+  Hyperdense Media Sign:
+    type: 'cat'
+    info: 'Presence of hyperdense Media Sign'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+  pc-Aspect ControlCT:
+    type: 'int'
+    info: 'posterior ASPECTS in Control CT'
+
+
+  Aspect ControlCT:
+    type: 'ord'
+    info: ' ASPECTS in Control CT'
+    categories: 11
+    description:
+      0: 'ASPECTS score of 0'
+      1: 'ASPECTS score of 1'
+      2: 'ASPECTS score of 2'
+      3: 'ASPECTS score of 3'
+      4: 'ASPECTS score of 4'
+      5: 'ASPECTS score of 5'
+      6: 'ASPECTS score of 6'
+      7: 'ASPECTS score of 7'
+      8: 'ASPECTS score of 8'
+      9: 'ASPECTS score of 9'
+      10: 'ASPECTS score of 10'
+
+  C_br2:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br2:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  INS_br2:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  L_br2:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br2:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br2:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br2:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br2:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br2:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br2:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+Times:
+  Unknown Onset:
+    type: 'cat'
+    info: 'Whether onset time of symptoms is known'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Onset_to_Admission:
+    type: 'int'
+    info: 'Time from symptom onset to admission in minutes'
+
+  Time_CT_to_Angio.:
+    type: 'int'
+    info: 'Time from CT imaging to CTA imaging in minutes'
+
+  Time_Puncture_to_Recan.:
+    type: 'int'
+    info: 'Time from CTA to Recanalization in minutes'
+
+  Time_Recan_to_Control:
+    type: 'int'
+    info: 'Time from Recanalization to Control CT imaging in minutes'
+
+
+
+
+
+
+
+
+
+
diff --git a/dictionaries/dictionary_timepoints.yml b/dictionaries/dictionary_timepoints.yml
new file mode 100644
index 0000000..f8b8e9f
--- /dev/null
+++ b/dictionaries/dictionary_timepoints.yml
@@ -0,0 +1,808 @@
+#### Data divided into
+# 1.Admission: Metadata, Clinical, Unkonwn onset, Time_Onset_Admission
+# 2.PostImaging: NECT, CTP, CTA
+# 3.PostEVT: Treatment and Treatment output
+# 4.After24h: CCT
+# 5.Output
+
+Admission:
+
+  Sex:
+    type: 'cat'
+    info: 'Patient sex'
+    categories: 2
+    description:
+      0: 'man'
+      1: 'woman'
+
+  Age:
+    type: 'int'
+    info: 'Patient age'
+
+  NIHSS:
+    type: 'int'
+    info: 'National Institute of Health Stroke Scale'
+    categories: 43
+    description: None
+
+  s.p. stroke:
+    type: 'cat'
+    info: 'previous stroke'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  Unknown Onset:
+    type: 'cat'
+    info: 'Whether onset time of symptoms is known'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Onset_to_Admission:
+    type: 'int'
+    info: 'Time from symptom onset to admission in minutes'
+
+  #pre-mRS:
+    #type: 'int'
+    #info: 'mRS previous to stroke'
+
+  pre-mRS:
+    type: 'ord'
+    info: 'mRS previous to stroke'
+    categories: 6
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+
+
+  aHT:
+    type: 'cat'
+    info: 'arterial hypertension'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  HLP:
+    type: 'cat'
+    info: 'hyperlipidemia'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  DM:
+    type: 'cat'
+    info: 'Diabetes Mellitus'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  aFib:
+    type: 'cat'
+    info: 'atrial fibrillation'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH:
+    type: 'cat'
+    info: 'anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH mono:
+    type: 'cat'
+    info: ' mono anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  TAH duo:
+    type: 'cat'
+    info: 'dual anti-platelet drug'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  OAK:
+    type: 'cat'
+    info: 'oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  VKA:
+    type: 'cat'
+    info: 'vitamin K antagonist'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  DOAC:
+    type: 'cat'
+    info: 'direct oral anticoagulant'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+Pre-EVT:
+  C_br:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  INS_br:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  L_br:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  #e-ASPECTS:
+  #  type: 'int'
+  #  info: 'electronic ASPECTS'
+  e-ASPECTS:
+    type: 'ord'
+    info: 'electronic ASPECTS'
+    categories: 11
+    description:
+      0: 'ASPECTS score of 0'
+      1: 'ASPECTS score of 1'
+      2: 'ASPECTS score of 2'
+      3: 'ASPECTS score of 3'
+      4: 'ASPECTS score of 4'
+      5: 'ASPECTS score of 5'
+      6: 'ASPECTS score of 6'
+      7: 'ASPECTS score of 7'
+      8: 'ASPECTS score of 8'
+      9: 'ASPECTS score of 9'
+      10: 'ASPECTS score of 10'
+
+  ASPECTS oberfl_tief:
+    type: 'cat'
+    info: 'depth of regions affected'
+    categories: 4
+    description:
+      0: 'superficial'
+      1: 'deep'
+      2: 'both'
+      3: 'ASPECTS 10'
+
+  pc-ASPECTS:
+    type: 'cat'
+    info: 'posterior ASPECTS'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  Volume e-ASPECTS:
+    type: 'float'
+    info: 'automatic volume on e-ASPECTS'
+
+  ICV krank:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, ipsilateral (HU)'
+
+  IVC gesund:
+    type: 'int'
+    info: 'Internal cerebral vein intensity, contralateral (HU)'
+
+  ICV Index:
+    type: 'float'
+    info: 'ICV krank/ICV gesund'
+
+  Vessel Occlusion Location Admission:
+    type: 'cat'
+    info: 'Location of vessel occlusion at admission'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+  Vessel Occlusion Side Admission:
+    type: 'cat'
+    info: 'side of occlussion at admission'
+    categories: 3
+    description:
+      1: 'left'
+      2: 'right'
+      3: 'both'
+
+  #Tan Score:
+  #  type: 'int'
+  #  info: 'collaterals score'
+
+  Tan Score:
+    type: 'ord'
+    info: 'collaterals score'
+    categories: 4
+    description:
+      0: '0%'
+      1: '0-50%'
+      2: '50-100%'
+      3: '100%'
+
+  #Coves Score:
+  #  type: 'int'
+  #  info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+  #                      assigned to three veins in the affected hemisphere'
+
+  Coves Score:
+   type: 'ord'
+   info: 'cortical vein opacification score (score of 0(absence), 1 (moderate) or 2(full) opacification
+              assigned to three veins in the affected hemisphere'
+   categories: 7
+   description:
+      0: 'Coves Score 0'
+      1: 'Coves Score 1'
+      2: 'Coves Score 2'
+      3: 'Coves Score 3'
+      4: 'Coves Score 4'
+      5: 'Coves Score 5'
+      6: 'Coves Score 6'
+
+  BATMAN:
+    type: 'cat'
+    info: 'Basilar artery on CTA score'
+    categories: 11
+    description:
+      0: 'BATMAN Score 0'
+      1: 'BATMAN Score 1'
+      2: 'BATMAN Score 2'
+      3: 'BATMAN Score 3'
+      4: 'BATMAN Score 4'
+      5: 'BATMAN Score 5'
+      6: 'BATMAN Score 6'
+      7: 'BATMAN Score 7'
+      8: 'BATMAN Score 8'
+      9: 'BATMAN Score 9'
+      10: 'BATMAN Score 10'
+
+  #Clot Burden Score:
+  #  type: 'int'
+  #  info: 'Evaluates the extent of ipsilateral thrombus'
+
+  Clot Burden Score:
+    type: 'ord'
+    info: 'Evaluates the extent of ipsilateral thrombus'
+    categories: 11
+    description:
+      0: 'Clot Burden Score 0'
+      1: 'Clot Burden Score 1'
+      2: 'Clot Burden Score 2'
+      3: 'Clot Burden Score 3'
+      4: 'Clot Burden Score 4'
+      5: 'Clot Burden Score 5'
+      6: 'Clot Burden Score 6'
+      7: 'Clot Burden Score 7'
+      8: 'Clot Burden Score 8'
+      9: 'Clot Burden Score 9'
+      10: 'Clot Burden Score 10'
+
+  Vessel Stenosis:
+    type: 'cat'
+    info: 'Presence of arterial stenosis'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Vessel Stenosis Location:
+    type: 'cat'
+    info: 'Location of arterial stenosis'
+    categories: 8
+    description:
+      0: 'No stenosis'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'MCA'
+      4: 'PCA'
+      5: 'VA'
+      6: 'BA'
+      7: 'ACA'
+
+  Arterial Dissection:
+    type: 'cat'
+    info: 'Presence of arterial dissection'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Arterial Dissection Location:
+    type: 'cat'
+    info: 'Location of arterial dissection'
+    categories: 4
+    description:
+
+      0: 'No dissection'
+      1: 'ACI'
+      2: 'VA'
+      3: 'other'
+
+  'CBF_lower30_volume':
+    type: 'int'
+    info: 'Volume in which the CBF is lower that 30% of the CBF of the contraleteral side -- core --'
+
+  'Tmax_greater6s_volume':
+    type: 'int'
+    info: 'Volume in which the Tmax is greater than 6s -- penumbra + core --'
+
+  Mismatch Volume:
+    type: 'int'
+    info: 'penumbra volume'
+
+  Inverse Mismatch Ratio:
+    type: 'float'
+    info: 'volume core/total volume'
+
+  Hypoperfusion Index:
+    type: 'float'
+    info: 'Volume Tmax>10s/volume Tmax>6s'
+
+  CBV Index:
+    type: 'float'
+    info: 'Unknown'
+
+  Vessel Occlusion CTA:
+
+    type: 'cat'
+    info: 'Location of vessel occlusion on CTA'
+    categories: 11
+    description:
+      0: 'No occlussion'
+      1: 'ACI'
+      2: 'Carotis-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+  Time_CT_to_Angio.:
+     type: 'int'
+     info: 'Time from CT imaging to CTA imaging in minutes'
+
+Post-EVT:
+
+ Stenting:
+   type: 'cat'
+   info: 'Use of stenting'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Thrombectomy:
+   type: 'cat'
+   info: 'Use of thrombectomy'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Device:
+  type: 'cat'
+  info: 'Device employed for thrombectomy'
+  categories: 4
+  description:
+      0: 'No device - no thrombectomy'
+      1: 'Stent retriever'
+      2: 'Aspiration'
+      3: 'Both'
+
+ PTA:
+   type: 'cat'
+   info: 'Use of percutaneous transluminal angioplasty'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Number Maneuver:
+   type: 'int'
+   info: 'number of maneuver that were necessary'
+
+ Lysis i.a.:
+   type: 'cat'
+   info: 'Use of intraarterial lysis'
+   categories: 2
+   description:
+     0: 'no'
+     1: 'yes'
+
+ Lysis quantity:
+   type: 'int'
+   info: 'quantity of lysis used'
+
+ Frustrated Recanalization:
+
+   type: 'cat'
+   info: 'Whether the recanalization was frustrated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion after Recan.:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+#
+ #TICI:
+ #  type: 'int'
+ #  info: 'Reperfusion score'
+ TICI:
+   type: 'cat'
+   info: 'Reperfusion score'
+   categories: 5
+   description:
+      0: '0, No reperfusion'
+      1: '1, Minimal reperfusion'
+      2: '2a, Partial reperfusion (<50%)'
+      3: '2b Partial reperfusion(>50%)'
+      4: '3, Total reperfusion'
+
+ SAE:
+   type: 'cat'
+   info: 'Subarachnoid hemorrhage encephalitis'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Vessel Occlusion new SupplyArea:
+   type: 'cat'
+   info: 'Vessel occlusion in a new Supply Area'
+   categories: 2
+   description:
+       0: 'no'
+       1: 'yes'
+
+ Vessel Occlusion new SupplyArea Location:
+   type: 'cat'
+   info: 'Location of vessel occlusion after recanalization in a new supply area'
+   categories: 11
+   description:
+      0: 'No occlusion'
+      1: 'ACI'
+      2: 'Carotid-T'
+      3: 'M1'
+      4: 'M2'
+      5: 'M3'
+      6: 'M4'
+      7: 'PCA'
+      8: 'ACA'
+      9: 'VA'
+      10: 'BA'
+
+ Vessel Occlusion new SupplyArea Treatment:
+   type: 'cat'
+   info: 'Whether new vessel occlusion was treated'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Infarct new SupplyArea:
+   type: 'cat'
+   info: 'Whether there is an infarct in the new supply area'
+   categories: 2
+   description:
+      0: 'no'
+      1: 'yes'
+
+ Time_Puncture_to_Recan.:
+    type: 'int'
+    info: 'Time from CTA to Recanalization in minutes'
+
+After24h:
+  Lacunar Infarct:
+    type: 'cat'
+    info: 'Presence of lacunar infarct'
+    categories: 3
+    description:
+        0: 'no infarct'
+        1: 'Lacunar infarct'
+        2: 'not a lacunar infarct'
+
+  Infarct Volume ControlCT:
+      type: 'int'
+      info: 'Volume in ml/cm³ of the infarct in the Control CT'
+
+  Hyperdense Media Sign:
+    type: 'cat'
+    info: 'Presence of hyperdense Media Sign'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  pc-Aspect ControlCT:
+    type: 'cat'
+    info: 'posterior ASPECTS in Control CT'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  #Aspect ControlCT:
+  #  type: 'int'
+  #  info: ' ASPECTS in Control CT'
+
+  Aspect ControlCT:
+    type: 'ord'
+    info: ' ASPECTS in Control CT'
+    categories: 11
+    description:
+        0: 'ASPECTS score of 0'
+        1: 'ASPECTS score of 1'
+        2: 'ASPECTS score of 2'
+        3: 'ASPECTS score of 3'
+        4: 'ASPECTS score of 4'
+        5: 'ASPECTS score of 5'
+        6: 'ASPECTS score of 6'
+        7: 'ASPECTS score of 7'
+        8: 'ASPECTS score of 8'
+        9: 'ASPECTS score of 9'
+        10: 'ASPECTS score of 10'
+
+  C_br2:
+    type: 'cat'
+    info: 'Caudate region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  IC_br2:
+    type: 'cat'
+    info: 'Intenal capsule region affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  INS_br2:
+    type: 'cat'
+    info: 'Insular ribbon affected'
+    categories: 2
+    description:
+        0: 'no'
+        1: 'yes'
+
+  L_br2:
+    type: 'cat'
+    info: 'Lentiform nucleus region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M1_br2:
+    type: 'cat'
+    info: 'Anterior MCA cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M2_br2:
+    type: 'cat'
+    info: 'MCA cortex lateral to the insular ribbon region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M3_br2:
+    type: 'cat'
+    info: 'Posterior MCA Cortex region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M4_br2:
+    type: 'cat'
+    info: 'Anterior cortex immediately rostal to M1 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M5_br2:
+    type: 'cat'
+    info: 'Lateral cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  M6_br2:
+    type: 'cat'
+    info: 'Posterior cortex immediately rostal to M3 region affected'
+    categories: 2
+    description:
+      0: 'no'
+      1: 'yes'
+
+  Time_Recan_to_Control:
+    type: 'int'
+    info: 'Time from Recanalization to Control CT imaging in minutes'
+
+Output:
+
+  mRS90d:
+    type: 'ord'
+    info: 'Functional outcome at 90 days'
+    categories: 7
+    description:
+      0: 'mRS 0'
+      1: 'mRS 1'
+      2: 'mRS 2'
+      3: 'mRS 3'
+      4: 'mRS 4'
+      5: 'mRS 5'
+      6: 'mRS 6'
+
+  dmRS:
+    type: 'cat'
+    info: 'Binary functional outcome at 90 days'
+    categories: 2
+    description:
+      0: 'Good outcome'
+      1: 'Bad outcome'
+
+  mortality:
+    type: 'cat'
+    info: 'Mortality at 90 days'
+    categories: 2
+    description:
+      0: 'No'
+      1: 'Yes'
+
+  shift_mRS:
+    type: 'ord'
+    info: 'Shift in mRS'
+    categories: 7
+    description:
+
+      0: 'Shift of 0'
+      1: 'Shift of 1'
+      2: 'Shift of 2'
+      3: 'Shift of 3'
+      4: 'Shift of 4'
+      5: 'Shift of 5'
+      6: 'Shift of 6'
+
+
+
diff --git a/evaluate_model.py b/evaluate_model.py
new file mode 100644
index 0000000..6ae1de8
--- /dev/null
+++ b/evaluate_model.py
@@ -0,0 +1,433 @@
+from architectures.FCN import Basic_FCN
+from architectures.GCN import GCN
+from Metrics.RegressionMetrics import ClassificationMetrics
+from IO_utils.FeaturePreprocessing import FeaturePreprocessing
+from IO_utils.clean_table import clean_table
+from IO_utils.split_utils import split_data_cv
+from IO_utils.Dataloader import MyDataLoader
+from IO_utils.List_Reader import TableReader
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import torch
+import pprint
+
+
+def get_metrics_unc(uncertainty_array, mean_preds_array, label_array, p):
+
+    #order = np.argsort(uncertainty_array)
+
+    metrics = ClassificationMetrics(classes=2)
+
+    indices= uncertainty_array<p
+    m = metrics.compute_metrics(mean_preds_array[indices], label_array[indices])
+    print('####')
+    print('Metrics using >{}'.format(p))
+    print(np.count_nonzero(indices))
+    pprint.pprint(m)
+    print('####')
+
+    return m
+
+def plot_boxplots(p, y, uncertainty ):
+
+    order = np.argsort(uncertainty)
+    samples = []
+    auc = []
+    ths = []
+    n_samples = uncertainty.shape[0]
+
+    # Minimum 10% of samples
+    metrics = ClassificationMetrics(classes=2)
+
+    for i in range(order.shape[0] - int(0.2 * n_samples)):
+        number_samples = order.shape[0] - i
+        metrics.clear()
+        kn = order[:(number_samples - 1)]
+        m = metrics.compute_metrics(p[kn], y[kn])
+
+        samples.extend([number_samples])
+        auc.extend([m['auc']])
+        ths.extend([uncertainty[kn[-1]]])
+
+    true_positive= []
+    true_negative = []
+    false_positive = []
+    false_negative = []
+
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_positive.extend([uncertainty[i]])
+            else:
+                true_negative.extend([uncertainty[i]])
+        else:
+            if label == 0:
+                false_negative.extend([uncertainty[i]])
+            else:
+                false_positive.extend([uncertainty[i]])
+
+    data= [true_positive, true_negative, false_positive, false_negative]
+    data2 = [true_positive+true_negative, false_positive+false_negative]
+    palette = ['lightgreen', 'darkgreen', 'salmon', 'darkred']
+    labels = ['True Positive', 'True Negative', 'False Positive', 'False Negative']
+    xs = []
+    print(np.mean(data2[0]), np.mean(data2[1]))
+    print(np.std(data2[0]), np.std(data2[1]))
+    for i, col in enumerate(data):
+        xs.append(np.random.normal(np.round((i+1.1)/2) , 0.04, len(data[i])))
+
+    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(13,6))
+    bplot1 = ax1.boxplot(data2, labels = ['Correct predictions', 'Missclassifications'], showfliers=False)
+    for x, val, c, l in zip(xs, data, palette,labels):
+       sct1 = ax1.scatter(x, val, alpha=0.4, color=c, label=l)
+
+    ax1.legend()
+
+    ax2.set_xlabel('Uncertainty threshold')
+    ax2.set_ylabel('AUC')
+    lns1 = ax2.plot(ths, auc, 'k-', markersize=3, label="AUC")
+    ax2.invert_xaxis()
+
+    ax3 = ax2.twinx()
+    ax3.set_ylabel('# of Samples')
+    lns2 = ax3.plot(ths, samples, 'k--', markersize=3, label="# Samples")
+    ax3.invert_xaxis()
+
+    # added these three lines
+    lns = lns1 + lns2
+    labs = [l.get_label() for l in lns]
+    ax2.legend(lns, labs, loc=0)
+
+    #plt.xlabel('{} uncertainty'.format(uncertainty))
+
+
+
+
+
+
+def plot_selectedsamples_metrics(uncertainty_array, mean_preds_array, label_array, uncertainty=""):
+    order = np.argsort(uncertainty_array)
+
+    samples = []
+    auc = []
+    acc = []
+    b_acc = []
+    th_entr = []
+
+    n_samples = uncertainty_array.shape[0]
+
+    # Minimum 10% of samples
+    metrics = ClassificationMetrics(classes=2)
+
+    for i in range(order.shape[0] - int(0.2 * n_samples)):
+        number_samples = order.shape[0] - i
+        metrics.clear()
+        kn = order[:(number_samples - 1)]
+        m = metrics.compute_metrics(mean_preds_array[kn], label_array[kn])
+
+        # print('###### ')
+        # print('Number of samples {}'.format(number_samples))
+        # print(m['cm'])
+        # print(m['auc'])
+        # print('###### ')
+
+        samples.extend([number_samples])
+        auc.extend([m['auc']])
+        acc.extend([m['accuracy']])
+        b_acc.extend([m['balanced_accuracy']])
+        th_entr.extend([uncertainty_array[kn[-1]]])
+
+    plt.figure()
+
+    plt.plot(th_entr, acc, 'bo-', markersize=3, label="Accuracy")
+    plt.plot(th_entr, b_acc, 'gx-', markersize=3, label='Balanced accuracy')
+    plt.plot(th_entr, auc, 'rx-', markersize=3, label='AUC')
+    plt.xlabel('{} uncertainty'.format(uncertainty))
+
+    plt.figure()
+    plt.plot(samples, acc, 'bo-', markersize=3, label='Accuracy')
+    plt.plot(samples, b_acc, 'gx-', markersize=3, label='Balanced accuracy')
+    plt.plot(samples, auc, 'rx-', markersize=3, label='AUC')
+    plt.text(26, plt.gca().get_ylim()[0] + 0.05,
+             'AUC acc {} \n AUC bacc {} \n AUC auc {} '.format(np.round(np.trapz(acc) / len(acc), 2),
+                                                               np.round(np.trapz(b_acc) / len(b_acc), 2),
+                                                               np.round(np.trapz(auc) / len(auc), 2)))
+    plt.xlabel("Number of samples")
+    plt.ylabel("Performance")
+    plt.ylim(0.7, 1)
+    plt.legend()
+    plt.title("Metrics ")
+    # plt.show()
+
+
+def plot_uncetainties(p, y, c, e):
+    true_pred = []
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_pred.extend(['lightgreen'])
+            else:
+                true_pred.extend(['darkgreen'])
+        else:
+            if label == 0:
+                true_pred.extend(['salmon'])
+            else:
+                true_pred.extend(['darkred'])
+
+    plt.figure()
+    for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+        i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+        plt.scatter(c[i], e[i], c=g, label='test', s=10)
+    # sc =plt.scatter(c, e, c=true_pred,)
+    plt.legend(['TP', 'TN', 'FN', 'FP'])
+    plt.xlabel('Predictive uncertainty')
+    plt.ylabel('Epistemic uncertainty')
+
+def plot_age_uncertainty(p, y, c, age):
+    true_pred = []
+    for i in range(p.shape[0]):
+        pred = np.argmax(p[i, :])
+        label = np.argmax(y[i, :])
+        if pred == label:
+            if label == 0:
+                true_pred.extend(['lightgreen'])
+            else:
+                true_pred.extend(['darkgreen'])
+        else:
+            if label == 0:
+                true_pred.extend(['salmon'])
+            else:
+                true_pred.extend(['darkred'])
+
+    plt.figure()
+    for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+        i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+        plt.scatter(age[i], c[i], c=g, label='test', s=10)
+    # sc =plt.scatter(c, e, c=true_pred,)
+    plt.legend(['TP', 'TN', 'FN', 'FP'])
+    plt.xlabel('Age')
+    plt.ylabel('Uncertainty')
+
+
+def test_graph(config, loader, test_indices, state_dicts):
+    models = []
+
+    for m in state_dicts:
+        model = GCN(nfeat=loader.dataset[0].num_features, nclass=2, dropout=config['dropout'])
+
+        model.load_state_dict(torch.load(m))
+        models.append(model)
+
+    n_models = 10
+    n_droput = 100
+
+
+    for i, batch in enumerate(loader):
+
+        samples = len(test_indices)
+        predictions_y = np.zeros([samples, 2, n_models, n_droput])
+
+        x = batch['x']
+        edge_index = batch['edge_index']
+        edge_weight = batch['weights']
+        y = batch['y'][test_indices]
+
+        for m in range(n_models):
+            for k in range(n_droput):
+                model = models[m]
+                model.train()
+                test_out = models[m].forward(x, edge_index, edge_weight)
+                f = torch.softmax(test_out, dim=1)
+                f_cloned = f.clone()
+                f_cloned[f_cloned == 0] = 1e-30
+
+                predictions_y[:, :, m, k] = f_cloned[test_indices].detach().cpu().numpy()
+
+        mean_preds = np.mean(np.mean(predictions_y, axis=2), axis=2)
+        predictive_uncertainty = np.sum(-np.multiply(np.log(mean_preds), mean_preds), axis=1)
+
+        expect_data_uncertainty = np.mean(
+            np.mean(np.sum(-np.multiply(np.log(predictions_y), predictions_y), axis=1), axis=1), axis=1)
+        epistemic_uncertainty = predictive_uncertainty - expect_data_uncertainty
+
+        """for sample in range(samples):
+            print('#####')
+            print('Sample {}'.format(sample))
+            print(' True value: {}, Prediction  mean: {}'.format(y[sample], mean_preds[sample]))
+            print(' Predictive {}, Epistemic {}'.format(predictive_uncertainty[sample], epistemic_uncertainty[sample]))
+            print('#####')"""
+
+        print('Mean predictive uncertainty ', np.mean(predictive_uncertainty))
+        print('Mean epistemic uncertainty', np.mean(epistemic_uncertainty))
+
+        return mean_preds, predictive_uncertainty, epistemic_uncertainty, y.detach().cpu().numpy()
+
+
+def test(config, test_loader, state_dict):
+    models = []
+
+    for m in state_dict:
+        model = Basic_FCN(in_features=test_loader.dataset.features.shape[1], layers=config['layers']
+                          , out_features=2,
+                          dropout_rate=config['dropout'])
+
+        model.load_state_dict(torch.load(m))
+        models.append(model)
+
+    n_models = 5
+    n_droput = 1
+
+    for batch in test_loader:
+
+        samples = batch['x'].shape[0]
+        predictions_y = np.zeros([samples, 2, n_models, n_droput])
+
+        x = batch['x']
+        y = batch['y']
+
+        for m in range(n_models):
+            for k in range(n_droput):
+                model = models[m]
+                model.train()
+                test_out = models[m].forward(x)
+                f = torch.softmax(test_out, dim=1)
+
+                predictions_y[:, :, m, k] = f.detach().cpu().numpy()
+
+        mean_preds = np.mean(np.mean(predictions_y, axis=2), axis=2)
+        predictive_uncertainty = np.sum(-np.multiply(np.log(mean_preds), mean_preds), axis=1)
+
+        expect_data_uncertainty = np.mean(
+            np.mean(np.sum(-np.multiply(np.log(predictions_y), predictions_y), axis=1), axis=1), axis=1)
+        epistemic_uncertainty = predictive_uncertainty - expect_data_uncertainty
+
+        """for sample in range(samples):
+            print('#####')
+            print('Sample {}'.format(sample))
+            print(' True value: {}, Prediction  mean: {}'.format(y[sample], mean_preds[sample]))
+            print(' Predictive {}, Epistemic {}'.format(predictive_uncertainty[sample], epistemic_uncertainty[sample]))
+            print('#####')"""
+
+        print('Mean predictive uncertainty ', np.mean(predictive_uncertainty))
+        print('Mean epistemic uncertainty', np.mean(epistemic_uncertainty))
+
+        return mean_preds, predictive_uncertainty, epistemic_uncertainty, y.detach().cpu().numpy()
+
+
+if __name__ == "__main__":
+
+    # %% DATALOADIND
+
+    ## Clean original table
+    excel_dir = "../data/TheList_anonymous_mv.xlsx"
+    clean_df = clean_table(excel_dir=excel_dir, pre_mRS=2)
+
+    # Given a clean table get features and labels
+    table = TableReader(input_df=clean_df, tables=['all_timepoints'], data_dictionaries='timepoints',
+                        mv_strategy='median',
+                        output_feature=['dmRS'])
+
+    output_vector = table.output_vector
+
+    fold_indices = split_data_cv(output_vector, seed=5, cv=5)
+
+    results_pred = {}
+    results_epis = {}
+    for p in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+        results_pred[p] = {}
+        results_epis[p] = {}
+
+    # for f in [5, 10, 15, 20, 25, 30, 40, 50]:
+    for f in [15]:
+        features = table.select_features(method='mrmr', k=f, fold_indices=fold_indices)
+
+        feature_vector = table.final_df[features]
+        FP = FeaturePreprocessing(feature_vector, table.selected_d)
+        feature_vector = FP.create_features(feature_vector)
+        config = {
+            'layers': {'number': 3,
+                       'layer1': 40,
+                       'layer2': 20,
+
+                       },
+
+            'dropout': 0,
+            'out_classes': 2}
+
+        ########
+        mean_preds = []
+        combined = []
+        epistemic = []
+        cls = []
+
+        for k in range(5):
+            dataloader_fold = MyDataLoader(feature_vector, output_vector, fold_indices[k], table.selected_d,
+                                           one_hot=True)
+            dl = dataloader_fold.get_loaders()
+            state_dict_paths = ["C:/Users/martinca1/PhD/Projects/AI_Stroke/out/models/features_{}/"
+                                "model_{}_fold_{}.pt".format(f, i, k) for i in range(5)]
+            pred, unc, epistemic_unc, y = test(config, dl[2], state_dict_paths)
+
+            # _, _, _, _ = test(config, dl[0], models)
+
+            mean_preds.extend(pred.tolist())
+            combined.extend(unc.tolist())
+            epistemic.extend(epistemic_unc.tolist())
+            cls.extend(y.tolist())
+
+        p = np.array(mean_preds)
+        y = np.array(cls)
+        c = np.array(combined)
+        e = np.array(epistemic)
+
+        # with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/predictive_uncertainty.xlsx") as writer:
+        for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]:
+            results_pred[per][f] = get_metrics_unc(c, p, y, per)
+            # pd_per = pd.DataFrame(results_pred[per]).T
+            # pd_per.to_excel(writer, sheet_name=str(per))
+
+        # with pd.ExcelWriter("C:/Users/martinca1/PhD/Projects/AI_Stroke/out/uncertainty/epistimic_uncertainty.xlsx") as writer:
+        #    for per in [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,0.1]:
+        #        results_epis[per][f] = get_metrics_unc(e, p, y, per)
+        #        pd_per = pd.DataFrame(results_epis[per]).T
+        #        pd_per.to_excel(writer, sheet_name=str(per))
+
+        true_pred = []
+        for i in range(p.shape[0]):
+            pred = np.argmax(p[i, :])
+            label = np.argmax(y[i, :])
+            if pred == label:
+                if label == 0:
+                    true_pred.extend(['lightgreen'])
+                else:
+                    true_pred.extend(['darkgreen'])
+            else:
+                if label == 0:
+                    true_pred.extend(['salmon'])
+                else:
+                    true_pred.extend(['darkred'])
+
+        metrics = ClassificationMetrics(classes=2)
+        m = metrics.compute_metrics(p, y)
+        print('Combined metrics')
+        print(m)
+
+        plot_selectedsamples_metrics(c, p, y, uncertainty='Predictive')
+        plot_selectedsamples_metrics(e, p, y, uncertainty='Epistemic')
+
+        plt.figure()
+        for g in ['lightgreen', 'darkgreen', 'salmon', 'darkred']:
+            i = [i for i in range(len(true_pred)) if true_pred[i] == g]
+            plt.scatter(c[i], e[i], c=g, label='test')
+        # sc =plt.scatter(c, e, c=true_pred,)
+        plt.legend(['TP', 'TN', 'FN', 'FP'])
+        plt.xlabel('Predictive uncertainty')
+        plt.ylabel('Epistemic uncertainty')
+        plt.show()
diff --git a/test.py b/test.py
deleted file mode 100644
index e69de29..0000000
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..056072a
--- /dev/null
+++ b/train.py
@@ -0,0 +1,204 @@
+from architectures.FCN import Basic_FCN
+from Metrics.RegressionMetrics import ClassificationMetrics
+from Loss.Loss_uncertainty import loss_uncertainty
+
+import copy
+from hyperopt import STATUS_OK
+from ignite.engine import Engine, Events
+import neptune
+import pprint
+import time
+import tqdm
+import torch
+
+
+def train_model(config, loaders, save_metrics=False):
+    # torch.manual_seed(0)
+
+    train_loader, val_loader, test_loader = loaders
+    device = torch.device('cuda' if torch.cuda.is_available() else False)
+    model = Basic_FCN(in_features=train_loader.dataset.features.shape[1], layers=config['layers']
+                      , out_features=2,
+                      dropout_rate=config['dropout'])
+
+    p = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print('Model parameters', p)
+    model.to(device)
+
+    # METRICS
+    train_metrics = ClassificationMetrics(classes=2)
+    val_metrics = ClassificationMetrics(classes=2)
+    test_metrics = ClassificationMetrics(classes=2)
+
+    # OPTIMIZER
+    optimizer = torch.optim.Adam(model.parameters(),
+                                lr=config['lr'],
+                                #momentum=config['momentum'],
+                                weight_decay=config['weight_decay'])
+
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
+
+    # LOSS
+    import numpy as np
+    values, counts = np.unique(train_loader.dataset.labels, return_counts=True)
+    weights = torch.FloatTensor(1- counts / np.sum(counts)).to('cuda')
+    #weights = torch.FloatTensor((0.5, 0.5)).to('cuda')
+    loss_train = loss_uncertainty(weights=weights)
+    loss_val = loss_uncertainty(weights=weights)
+    loss_test = loss_uncertainty(weights=weights)
+
+    def train(Engine, batch):
+
+        model.train()
+        optimizer.zero_grad()
+        data = batch['x'].clone().to(device)
+        y_train = batch['y'].clone().to(device)
+
+        out = model.forward(data)
+
+        Loss = loss_train.get_loss(out, y_train.unsqueeze(1))
+        m_train = train_metrics.compute_metrics(out.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+        Loss.backward()
+        optimizer.step()
+
+        Engine.state.loss = loss_train.get_total_loss()
+        Engine.state.metrics = train_metrics
+        Engine.state.m = m_train
+        Engine.state.out = out
+        Engine.state.y = y_train.detach().cpu().numpy()
+
+    def validate(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            y_val = batch['y'].clone().to(device)
+
+            out = model.forward(data)
+
+            lv = loss_val.get_loss(out, y_val.unsqueeze(1))
+            scheduler.step(lv)
+            #print("Epoch {}, lr {}".format(trainer_engine.state.epoch, optimizer.param_groups[0]['lr']))
+            m_val = val_metrics.compute_metrics(out.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+            Engine.state.loss = loss_val.get_total_loss()
+            Engine.state.metrics = val_metrics
+            Engine.state.m = m_val
+
+    def test(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            y_test = batch['y'].clone().to(device)
+
+            out = model.forward(data)
+            f = torch.softmax(out, dim=1)
+            _ = loss_test.get_loss(f, y_test.unsqueeze(1))
+            m_test = test_metrics.compute_metrics(f.detach().cpu().numpy(), batch['y'].detach().cpu().numpy())
+
+            Engine.state.loss = loss_test.get_total_loss()
+            Engine.state.metrics = test_metrics
+            Engine.state.m = m_test
+
+    trainer_engine = Engine(train)
+    validator_engine = Engine(validate)
+    test_engine = Engine(test)
+
+    @trainer_engine.on(Events.STARTED)
+    def training_bootup(engine):
+
+        # print("Training started")
+        # print("Train_loader,: iterations/epoch: ", len(train_loader), ", total number of samples",
+        #      len(train_loader.sampler))
+        # print("Validation_loader,: iterations/epoch: ", len(val_loader), ", total number of samples",
+        #      len(val_loader.sampler))
+        # print("Test_loader,: iterations/epoch: ", len(test_loader), ", total number of samples",
+        #      len(test_loader.sampler))
+
+        time.sleep(0.001)
+        engine.pbar = tqdm.tqdm(total=300, desc='Training progress')
+        validator_engine.state.loss = 0
+        engine.state.best_epoch = 0
+        engine.state.min_loss = 1000
+        engine.state.best_train_metrics = {}
+        engine.state.best_val_metrics = {}
+        engine.state.best_test_metrics = {}
+
+        engine.count = 0
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def run_validation(engine):
+        validator_engine.run(val_loader, max_epochs=1)
+        engine.pbar.update(1)
+        engine.pbar.set_postfix({'loss': validator_engine.state.loss,
+                                 'loss_train': trainer_engine.state.loss,
+                                 'accuracy_train': trainer_engine.state.m['accuracy']}, refresh=True)
+
+        test_engine.run(test_loader, max_epochs=1)
+
+        if validator_engine.state.loss < engine.state.min_loss:
+            engine.count = 0
+            engine.state.min_loss = validator_engine.state.loss
+            #
+        else:
+            engine.count += 1
+            # print('Strike ', engine.count)
+            if engine.count > 40:
+                trainer_engine.terminate()
+        if True:
+            engine.state.best_epoch = engine.state.epoch
+            engine.state.best_train_metrics = trainer_engine.state.m
+            engine.state.best_val_metrics = validator_engine.state.m
+            engine.state.best_test_metrics = test_engine.state.m
+            engine.state.best_model = model.state_dict()
+
+        train_metrics.clear()
+        val_metrics.clear()
+        test_metrics.clear()
+
+        loss_train.clear()
+        loss_val.clear()
+        loss_test.clear()
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def write_neptune():
+
+        if save_metrics:
+            neptune.log_metric('loss_train', trainer_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_val', validator_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_test', test_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('train_AUC', trainer_engine.state.m['auc'])
+            neptune.log_metric('train_accuracy', trainer_engine.state.m['accuracy'])
+            neptune.log_metric('val_AUC', validator_engine.state.m['auc'])
+            neptune.log_metric('val_accuracy', validator_engine.state.m['accuracy'])
+            neptune.log_metric('test_AUC', test_engine.state.m['auc'])
+            neptune.log_metric('test_accuracy', test_engine.state.m['accuracy'])
+
+    @trainer_engine.on(Events.COMPLETED)
+    def run_testing(engine):
+
+        # print(time.ctime(), "Running testing")
+        # test_engine.run(test_loader, max_epochs=1)
+
+        engine.pbar.close()
+        # time.sleep(0.001)
+
+    trainer_engine.run(train_loader, max_epochs=300)
+    print(
+        "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f}".format(trainer_engine.state.m['auc'],
+                                                                                         validator_engine.state.m[
+                                                                                             'auc'],
+                                                                                         test_engine.state.m['auc']))
+    # visualize(h=trainer_engine.state.best_out, color=trainer_engine.state.best_y)
+    result = {'loss': trainer_engine.state.min_loss,
+              'epoch': trainer_engine.state.best_epoch,
+              'train_metrics': trainer_engine.state.best_train_metrics,
+              'val_metrics': trainer_engine.state.best_val_metrics,
+              'test_metric': trainer_engine.state.best_test_metrics,
+              'status': STATUS_OK}
+
+    #pprint.pprint(trainer_engine.state.best_train_metrics)
+    #pprint.pprint(trainer_engine.state.best_val_metrics)
+    #pprint.pprint(trainer_engine.state.best_test_metrics)
+
+    return result, trainer_engine.state.best_model
diff --git a/train_graph.py b/train_graph.py
new file mode 100644
index 0000000..7346ef0
--- /dev/null
+++ b/train_graph.py
@@ -0,0 +1,223 @@
+from architectures.GCN import GCN
+
+from Metrics.RegressionMetrics import ClassificationMetrics
+from Loss.Loss_uncertainty import loss_uncertainty
+
+import copy
+from hyperopt import STATUS_OK
+from ignite.engine import Engine, Events
+import neptune
+import pprint
+import time
+import tqdm
+import torch
+
+
+def train_model_graph(config, loaders, indices, save_metrics=False):
+    # torch.manual_seed(0)
+
+    train_indices, val_indices, test_indices = indices
+
+    device = torch.device('cuda' if torch.cuda.is_available() else False)
+    model = GCN(nfeat=loaders.dataset[0].num_features, nclass=2, dropout=config['dropout'])
+
+    p = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print('Model parameters', p)
+    model.to(device)
+
+    # METRICS
+    train_metrics = ClassificationMetrics(classes=2)
+    val_metrics = ClassificationMetrics(classes=2)
+    test_metrics = ClassificationMetrics(classes=2)
+
+    # OPTIMIZER
+    optimizer = torch.optim.Adam(model.parameters(),
+                                lr=config['lr'],
+                                #momentum=config['momentum'],
+                                weight_decay=config['weight_decay'])
+
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.7)
+
+    # LOSS
+    import numpy as np
+    values, counts = np.unique(loaders.dataset[0].labels.data, return_counts=True)
+    weights = torch.FloatTensor(1- counts / np.sum(counts)).to('cuda')
+    #weights = torch.FloatTensor((0.5, 0.5)).to('cuda')
+    loss_train = loss_uncertainty(weights=weights)
+    loss_val = loss_uncertainty(weights=weights)
+    loss_test = loss_uncertainty(weights=weights)
+
+    def train(Engine, batch):
+
+        model.train()
+        optimizer.zero_grad()
+        data = batch['x'].clone().to(device)
+        edge_index = batch['edge_index'].clone().to(device)
+        edge_weight = batch['weights'].clone().to(device)
+        y_train = batch['y'].clone().to(device)
+
+        out = model.forward(data, edge_index, edge_weight)
+        # f = torch.softmax(out, dim=1)
+        f_cloned = out.clone()
+        #f_cloned[f_cloned == 0] = 1e-30
+
+
+        Loss = loss_train.get_loss(f_cloned[train_indices], y_train[train_indices].unsqueeze(1))
+
+        m_train = train_metrics.compute_metrics(f_cloned[train_indices].detach().cpu().numpy(),
+                                                batch['y'][train_indices].detach().cpu().numpy())
+
+        Loss.backward()
+        optimizer.step()
+
+        Engine.state.loss = loss_train.get_total_loss()
+        Engine.state.metrics = train_metrics
+        Engine.state.m = m_train
+        Engine.state.out = out
+        Engine.state.y = y_train.detach().cpu().numpy()
+
+    def validate(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            edge_index = batch['edge_index'].clone().to(device)
+            edge_weight = batch['weights'].clone().to(device)
+            y_val = batch['y'].clone().to(device)
+
+            out = model.forward(data, edge_index, edge_weight)
+            #f = torch.softmax(out, dim=1)
+            f_cloned = out.clone()
+            f_cloned[f_cloned == 0] = 1e-30
+
+            lv = loss_val.get_loss(f_cloned[val_indices], y_val[val_indices].unsqueeze(1))
+            scheduler.step(lv)
+            #print("Epoch {}, lr {}".format(trainer_engine.state.epoch, optimizer.param_groups[0]['lr']))
+            m_val = val_metrics.compute_metrics(f_cloned[val_indices].detach().cpu().numpy(),
+                                                batch['y'][val_indices].detach().cpu().numpy())
+
+            Engine.state.loss = loss_val.get_total_loss()
+            Engine.state.metrics = val_metrics
+            Engine.state.m = m_val
+
+    def test(Engine, batch):
+        model.eval()
+        with torch.no_grad():
+            data = batch['x'].clone().to(device)
+            edge_index = batch['edge_index'].clone().to(device)
+            edge_weight = batch['weights'].clone().to(device)
+            y_test = batch['y'].clone().to(device)
+
+            out = model.forward(data, edge_index, edge_weight)
+            # f = torch.softmax(out, dim=1)
+            f_cloned = out.clone()
+            f_cloned[f_cloned == 0] = 1e-30
+
+            _ = loss_test.get_loss(f_cloned[test_indices], y_test[test_indices].unsqueeze(1))
+            m_test = test_metrics.compute_metrics(f_cloned[test_indices].detach().cpu().numpy(),
+                                                  batch['y'][test_indices].detach().cpu().numpy())
+
+            Engine.state.loss = loss_test.get_total_loss()
+            Engine.state.metrics = test_metrics
+            Engine.state.m = m_test
+
+    trainer_engine = Engine(train)
+    validator_engine = Engine(validate)
+    test_engine = Engine(test)
+
+    @trainer_engine.on(Events.STARTED)
+    def training_bootup(engine):
+
+        # print("Training started")
+        # print("Train_loader,: iterations/epoch: ", len(train_loader), ", total number of samples",
+        #      len(train_loader.sampler))
+        # print("Validation_loader,: iterations/epoch: ", len(val_loader), ", total number of samples",
+        #      len(val_loader.sampler))
+        # print("Test_loader,: iterations/epoch: ", len(test_loader), ", total number of samples",
+        #      len(test_loader.sampler))
+
+        time.sleep(0.001)
+        engine.pbar = tqdm.tqdm(total=300, desc='Training progress')
+        validator_engine.state.loss = 0
+        engine.state.best_epoch = 0
+        engine.state.min_loss = 1000
+        engine.state.best_train_metrics = {}
+        engine.state.best_val_metrics = {}
+        engine.state.best_test_metrics = {}
+
+        engine.count = 0
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def run_validation(engine):
+        validator_engine.run(loaders, max_epochs=1)
+        engine.pbar.update(1)
+        engine.pbar.set_postfix({'loss': validator_engine.state.loss,
+                                 'loss_train': trainer_engine.state.loss,
+                                 'accuracy_train': trainer_engine.state.m['accuracy']}, refresh=True)
+
+        test_engine.run(loaders, max_epochs=1)
+
+        if validator_engine.state.loss < engine.state.min_loss:
+            engine.count = 0
+            engine.state.min_loss = validator_engine.state.loss
+            #
+        else:
+            engine.count += 1
+            if engine.count > 20:
+                trainer_engine.terminate()
+        if True:
+            engine.state.best_epoch = engine.state.epoch
+            engine.state.best_train_metrics = trainer_engine.state.m
+            engine.state.best_val_metrics = validator_engine.state.m
+            engine.state.best_test_metrics = test_engine.state.m
+            engine.state.best_model = model.state_dict()
+
+        train_metrics.clear()
+        val_metrics.clear()
+        test_metrics.clear()
+
+        loss_train.clear()
+        loss_val.clear()
+        loss_test.clear()
+
+    @trainer_engine.on(Events.EPOCH_COMPLETED)
+    def write_neptune():
+
+        if save_metrics:
+            neptune.log_metric('loss_train', trainer_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_val', validator_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('loss_test', test_engine.state.loss.detach().cpu().numpy())
+            neptune.log_metric('train_AUC', trainer_engine.state.m['auc'])
+            neptune.log_metric('train_accuracy', trainer_engine.state.m['accuracy'])
+            neptune.log_metric('val_AUC', validator_engine.state.m['auc'])
+            neptune.log_metric('val_accuracy', validator_engine.state.m['accuracy'])
+            neptune.log_metric('test_AUC', test_engine.state.m['auc'])
+            neptune.log_metric('test_accuracy', test_engine.state.m['accuracy'])
+
+    @trainer_engine.on(Events.COMPLETED)
+    def run_testing(engine):
+
+        # print(time.ctime(), "Running testing")
+        # test_engine.run(test_loader, max_epochs=1)
+
+        engine.pbar.close()
+        # time.sleep(0.001)
+
+    trainer_engine.run(loaders, max_epochs=300)
+    print(
+        "AUC of training set: {:.2f}, validation set {:.2f}, and test set {:.2f}".format(trainer_engine.state.m['auc'],
+                                                                                         validator_engine.state.m[
+                                                                                             'auc'],
+                                                                                         test_engine.state.m['auc']))
+    # visualize(h=trainer_engine.state.best_out, color=trainer_engine.state.best_y)
+    result = {'loss': trainer_engine.state.min_loss,
+              'epoch': trainer_engine.state.best_epoch,
+              'train_metrics': trainer_engine.state.best_train_metrics,
+              'val_metrics': trainer_engine.state.best_val_metrics,
+              'test_metric': trainer_engine.state.best_test_metrics,
+              'status': STATUS_OK}
+
+    #pprint.pprint(trainer_engine.state.best_train_metrics)
+    #pprint.pprint(trainer_engine.state.best_val_metrics)
+    #pprint.pprint(trainer_engine.state.best_test_metrics)
+
+    return result, trainer_engine.state.best_model
-- 
GitLab