Baseline subtraction,dataframe fixed

3318ef5f · DEEPAK CHARLES · 032c3618 · 3318ef5f · 3318ef5f · 3318ef5f
Commit 3318ef5f authored Jul 5, 2023 by DEEPAK CHARLES
--- a/Time_stamp_comments/taste_time_stamp_creater.py
+++ b/Time_stamp_comments/taste_time_stamp_creater.py
@@ -119,7 +119,7 @@ def read_single_csv_file(filename: str = "../taste_test_dir/for_charles_Taste_fi
    labchart_initial_time = adi.read_file(adicht_file).records[0].record_time.rec_datetime
    event_datetime = unix_to_datetime(end_time)
-    # Iryna found the time line skey resulting in 0 at the end of the time line
+    # Iryna found the timeline skew resulting in 0 at the end of the timeline
    # synchronised_time = event_datetime - event_datetime[-2]  #labchart_initial_time
    synchronised_time = event_datetime - labchart_initial_time
    event_ticks = get_ticks(synchronised_time).astype(int)

--- a/cognitive_load _analysis/adicht_utils.py
+++ b/cognitive_load _analysis/adicht_utils.py
@@ -11,6 +11,8 @@ from neurokit2.signal import signal_rate, signal_sanitize
 from neurokit2.rsp.rsp_amplitude import rsp_amplitude
 from neurokit2.rsp.rsp_clean import rsp_clean
+from scipy import signal
 DATA = {"signal": np.ndarray,
        "block_name":  np.ndarray}
@@ -300,10 +302,11 @@ def slice_channels(channels: list, indx_start: np.ndarray, indx_end: np.ndarray,
    for id, channel in enumerate(channels): # will fail for puls , have to bypass it
            if channel.name in channel_names:
-                # TODO implement the post processing for Puls from HR
+                # TODO implement the post processing for the breathing signal
                Error = None
                if channel.name == "HR":
+                    # default post processing number is set  to 1
                    data = slice_signals(channel, indx_start, indx_end, take_mean, post_process)
                if channel.name == "Hautleitfähigkeit":
                    post_process = 2 * post_process
@@ -322,7 +325,8 @@ def slice_channels(channels: list, indx_start: np.ndarray, indx_end: np.ndarray,
    return channels_df, Error
 def slice_channels_for_analysis(channels: list, indx_start: np.ndarray, indx_end: np.ndarray, block_names: np.ndarray,
-                   channel_names: list=['Atmung', 'HR','Hautleitfähigkeit' ,'Channel 4'],subject:str="subject_name", mean:bool = False) -> Tuple[np.ndarray, np.ndarray]:
+                   channel_names: list=['Atmung', 'HR','Hautleitfähigkeit','Puls', 'Atmungsrate'],subject:str="subject_name", mean:bool = False,
+                                base_normalization:bool=True) -> Tuple[np.ndarray, np.ndarray]:
    """
    slice the signals for all the channels and return signal
    :param channels: array of adi object channel
@@ -335,7 +339,7 @@ def slice_channels_for_analysis(channels: list, indx_start: np.ndarray, indx_end
    """
    channels_df = pd.DataFrame()
    block_names = np.array(block_names, dtype=str)
-    block_names = np.delete(block_names, 0)
    subject_ids = [subject] * block_names.size
    channels_df["subject"] = subject_ids
    channels_df["comments"] = block_names
@@ -352,35 +356,38 @@ def slice_channels_for_analysis(channels: list, indx_start: np.ndarray, indx_end
    else:
        take_mean = np.zeros_like(indx_start, dtype=bool)
-    list_features = []
    for id, channel in enumerate(channels): # will fail for puls , have to bypass it
-        #if channel.name in channel_names:
+        if channel.name in channel_names:
-            # TODO implement the post processing for Puls from HR
+            # TODO implement the post processing for other channels
-            #if channel.name == "HR":
+            if channel.name == "HR":
-            #   data = slice_signals(channel, indx_start, indx_end, take_mean, post_process)
+                data = slice_signals(channel, indx_start, indx_end, take_mean, post_process)
            if channel.name == "Hautleitfähigkeit":
                post_process = 2 * post_process
                data = slice_signals(channel, indx_start, indx_end, take_mean, post_process)
-            #else:
+            else:
-            #data = slice_signals(channel, indx_start, indx_end)
+                data = slice_signals(channel, indx_start, indx_end,take_mean,post_process)
                # subtracting baseline values
-            baseline = np.zeros_like(data)
-            min_shape = data[-1].shape[0]
-            baseline.fill(data[data[0].shape[0]-min_shape:])
-            data = data - baseline
-            #list_features.append(normalized_data)
+            # verify baseline subtraction
+            if base_normalization:
+                baseline = np.mean(data[0])
+                data = np.subtract(data, baseline).astype(np.float16)
+                #data = signal.detrend(data, axis=0, type="constant", bp= baseline,overwrite_data=False)
+            # base line subtraction is not relevant as per neurokit tonic signals unless there is not much change in the regression
-    return channels_df, data
+            # storing all the channel blocks per subject in a data frame including baseline
+            channels_df[channel.name] = data
+        else:
+            Error = None # "channel name not found in the channel list"
+    return channels_df, Error
 def get_slice_range(tick_position: np.ndarray,  intend_front:int=2e4, intend_back:int=2e4,edit_last_comment:bool=False) -> Tuple[np.ndarray, np.ndarray]:
    """

--- a/cognitive_load _analysis/taste_load_analysis.py
+++ b/cognitive_load _analysis/taste_load_analysis.py
@@ -11,7 +11,8 @@ def read_adicht_file(file: str, front_offset: int = 28e3, back_offset:int=28e3,
                     channel_names: list = ['Atmung', 'HR', 'Hautleitfähigkeit','Puls', 'Atmungsrate'],
                     start_block: list = ['baseline_start'],
                     end_block: list = ['baseline_end'],
-                     subject_id: str = "subject_id") -> pd.DataFrame:
+                     subject_id: str = "subject_id",
+                     retun_signal: bool = False) -> pd.DataFrame:
    """
    :param file: filename for each participant
@@ -56,24 +57,36 @@ def read_adicht_file(file: str, front_offset: int = 28e3, back_offset:int=28e3,
    # Removing baseline_end tick positions
    red_block_names = np.delete(block_names_per_channel, baseline_ids[0, 1])
+    if retun_signal:
+        sliced_channel_df, Error = slice_channels_for_analysis(channels=adi_object.channels,
+                                                  block_names=red_block_names,
+                                                  indx_start=slice_start,
+                                                  indx_end=slice_end,
+                                                  channel_names=channel_names,
+                                                  subject=subject_id)
+    else:
+        feature_data = None
        sliced_channel_df, Error = slice_channels(channels=adi_object.channels,
                                                  block_names=red_block_names,
                                                  indx_start=slice_start,
                                                  indx_end=slice_end,
                                                  channel_names=channel_names,
                                                  subject=subject_id)
    if Error is not None:
-       raise AssertionError("The comment name/s: ", *Error.flatten(), rf" in the file{file} does not matches the default comment order.")
+       raise AssertionError(Error + " in the file{file} does not matches the default comment order.")
    return sliced_channel_df
 def get_data_for_analysis(directory: str = "taste_adicht_files/",
-                              channels: list = ['Hautleitfähigkeit','HR'],
+                              channels: list = ['Hautleitfähigkeit','HR', 'Puls'] ,
                              start_block: list = ['baseline_start'],
-                              end_block: list = ['baseline_end'],offeset_sec:int=22) \
+                              end_block: list = ['baseline_end'],offeset_sec:int=22,
+                              return_signals:bool=True) \
            -> [np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
-    Main function thats collects data from all subjects
+    Main function that collects data from all subjects
    :param directory: directory where adicht files are located
    :param channels: list of channel that you want to consider
    :param start_block: list of block starting names
@@ -92,7 +105,11 @@ def get_data_for_analysis(directory: str = "taste_adicht_files/",
        single_participant_df= read_adicht_file(participant,
                                                   channel_names=channels, start_block=start_block,
                                                 end_block=end_block, subject_id=Subject_ID,
-                                                 back_offset=offeset_window,front_offset=offeset_window)
+                                                 back_offset=offeset_window,front_offset=offeset_window,
+                                                 retun_signal=return_signals)
+        # TODO: try to handle the feature in a suitable HDF5 format
        participants_data.append(single_participant_df)
    return pd.concat(participants_data,ignore_index=True)
@@ -103,6 +120,7 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Average over cognitive load ")
    parser.add_argument("-w", dest="working_dir", type=str, help="absolute path to directory containing taste adicht files", default=None)
    parser.add_argument("-s", dest="offset_sec", type=str, help="Seconds to be taken out on either side of the tick position", default=28)
+    parser.add_argument("-g", dest="graph", type=bool, help="If you like to take the mean and plot", default=False)
    args = parser.parse_args()
    data_df = None
@@ -115,9 +133,11 @@ if __name__ == "__main__":
        window.title("Cognitive load Analysis ")
        def graph():
-            data_df = get_data_for_analysis(directory=args.working_dir, offeset_sec=get_current_value())
+            data_df = get_data_for_analysis(directory=args.working_dir, offeset_sec=get_current_value(),return_signals= False)
            data_df.to_csv("taste_cognitive_load_analysis.csv")
            print("A csv file is written in the same folder ")
            grouped_data = data_df.groupby("comments").agg({'Hautleitfähigkeit': ['mean']})
            axes = grouped_data.plot.bar(figsize=(10,8))
            axes.set_xlabel("taste_load")
@@ -126,6 +146,12 @@ if __name__ == "__main__":
            axes.autoscale_view()
            plt.show()
            return None
+        def analyse():
+            data_df = get_data_for_analysis(directory=args.working_dir, offeset_sec=get_current_value(),return_signals= True)
+            data_df.to_csv("taste_cognitive_load_analysis.csv")
+            print("A csv file is written in the same folder ")
+            return None
        window.columnconfigure(0, weight=1)
        window.columnconfigure(1, weight=3)
@@ -158,7 +184,9 @@ if __name__ == "__main__":
        value_label.pack()
        args.working_dir = filedialog.askdirectory(title="select a taste directory",)
        button = Button(window, text="plot", command=graph)
+        button1 = Button(window, text="Analyse", command=analyse)
        button.pack()
+        button1.pack()
        #Label(window, text=args.working_dir, font=13).pack()
        print(f"Analysis will be performed on taste adicht files in  {args.working_dir}  folder")