Updates per style guides

3da7bd7d · = · 9e3ead0d · 3da7bd7d · 3da7bd7d · 3da7bd7d
Commit 3da7bd7d authored 2 years ago by =
--- a/src/hrvmeg/Driver.py
+++ b/src/hrvmeg/Driver.py
@@ -9,63 +9,68 @@ import traceback
 current_dir = os.path.dirname(os.path.realpath(__file__))

 input_dir = "/home/ec2-user/MEGNET/input_data/"
-output_dir =  os.path.join(current_dir, "output")
+output_dir = os.path.join(current_dir, "output")

 # list of all ICA values and images for each scan in the input directory
 scan_files = os.listdir(input_dir)
-scan_files = [scan_file.split('.')[0] for scan_file in scan_files]
+scan_files = [scan_file.split(".")[0] for scan_file in scan_files]

 # remove potential duplicates
 scan_files = list(set(scan_files))

+
 def process(scan_file):

-  try:
+    try:
+
+        output_sub_dir = os.path.join(output_dir, scan_file)
+
+        full_input_path = os.path.join(input_dir, scan_file)
+        full_input_path = f"{full_input_path}.ds"
+
+        if not os.path.exists(output_sub_dir):
+            os.makedirs(output_sub_dir)

-      output_sub_dir = os.path.join(output_dir, scan_file)
+            full_csv_output_path = os.path.join(output_sub_dir, scan_file + "_HR_component.csv")

-      full_input_path = os.path.join(input_dir, scan_file)
-      full_input_path = f'{full_input_path}.ds'
+            print(f"Now processing: {scan_file}")

-      if not os.path.exists(output_sub_dir):
-          os.makedirs(output_sub_dir)
+            MNE_proc = MNE_Processing.MNE_Processor(
+                full_input_path, output_sub_dir, full_csv_output_path
+            )
+            MNE_proc.process()

-          full_csv_output_path = os.path.join(output_sub_dir, scan_file + '_HR_component.csv')
+            ###############################################################################

-          print(f'Now processing: {scan_file}')
+            # feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
+            label_ICA = Label_ICA_Components.label_ICA_components(full_csv_output_path)

-          MNE_proc = MNE_Processing.MNE_Processor(full_input_path, output_sub_dir, full_csv_output_path)
-          MNE_proc.process()
+            # get best heartifact after running MEGNET on the MATLAB file above
+            heartifact = label_ICA.fPredictICA()

-          ###############################################################################
+            # filter dataframe to only heartifact and Time index
+            indices_to_filter = [0, heartifact + 1]

-          # feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
-          label_ICA = Label_ICA_Components.label_ICA_components(full_csv_output_path)
+            df = pd.read_csv(full_csv_output_path)
+            df = df.iloc[:, indices_to_filter]

-          # get best heartifact after running MEGNET on the MATLAB file above
-          heartifact = label_ICA.fPredictICA() 
-          
-          # filter dataframe to only heartifact and Time index
-          indices_to_filter = [0, heartifact + 1]
-          
-          df = pd.read_csv(full_csv_output_path)
-          df = df.iloc[:, indices_to_filter]
-          
-          df.to_csv(full_csv_output_path, index=False)
+            df.to_csv(full_csv_output_path, index=False)

-          ###############################################################################
+            ###############################################################################
+
+            # feed .mat filepath and heartifact list to Get_RR_Intervals.py to get RR intervals for each heartifact
+            getRR = Get_RR_Component.GetRRComponent(
+                output_sub_dir, scan_file, full_csv_output_path, heartifact
+            )
+            # getRR = Get_RR_Intervals.GetRRIntervals(scan_file, heartifacts)
+            getRR.get_RR_intervals()
+
+    except Exception as err:
+        print(traceback.format_exc())

-          # feed .mat filepath and heartifact list to Get_RR_Intervals.py to get RR intervals for each heartifact
-          getRR = Get_RR_Component.GetRRComponent(output_sub_dir, scan_file, full_csv_output_path, heartifact)
-          # getRR = Get_RR_Intervals.GetRRIntervals(scan_file, heartifacts)
-          getRR.get_RR_intervals()

-  except Exception as err:
-      print(traceback.format_exc())
- 
- 
 for scan_file in scan_files:
-    process(scan_file)   
-    
+    process(scan_file)
+
 # pool = multiprocessing.Pool(128)
 # pool.map(process, scan_files)
--- a/src/hrvmeg/Get_RR_Component.py
+++ b/src/hrvmeg/Get_RR_Component.py
@@ -13,7 +13,8 @@ from opensignalsreader import OpenSignalsReader
 import math
 from sklearn.metrics import mean_squared_error

-class GetRRComponent():
+
+class GetRRComponent:
    def __init__(self, output_sub_dir, subject_id, subject_path, ICA_value):
        self.subject_path = subject_path
        self.subject_id = subject_id
@@ -25,59 +26,58 @@ class GetRRComponent():
        #############################################################

        df = pd.DataFrame()
-        
+
        # get time and component from .CSV file
        arr_Time_Series = pd.read_csv(self.subject_path).iloc[:, 0].T.to_numpy()
        component_value = pd.read_csv(self.subject_path).iloc[:, 1].T.to_numpy()
-        
-        df['Time'] = arr_Time_Series
-        df[f'ICA_Values_{self.ICA_value}'] = component_value
+
+        df["Time"] = arr_Time_Series
+        df[f"ICA_Values_{self.ICA_value}"] = component_value

        hr_arr = df.to_numpy()
-        
+
        #############################################################
-        
+
        # Assign paths
-        stats_component = f'{self.output_sub_dir}/{self.subject_id}_HR_component.csv'
-        graph_component = f'{self.output_sub_dir}/{self.subject_id}_Plot_{self.ICA_value}.png'
+        stats_component = f"{self.output_sub_dir}/{self.subject_id}_HR_component.csv"
+        graph_component = f"{self.output_sub_dir}/{self.subject_id}_Plot_{self.ICA_value}.png"

        #############################################################

        component_time = hr_arr[:, 0]
        ICA_component_value = hr_arr[:, 1]
-        
+
        # attempt to flip values if signal is inverted
-        if np.median(ICA_component_value) >  0.00:
+        if np.median(ICA_component_value) > 0.00:
            ICA_component_value = -ICA_component_value
-        
-        df[f'ICA_Values_{self.ICA_value}'] = ICA_component_value

-        
+        df[f"ICA_Values_{self.ICA_value}"] = ICA_component_value
+
        #                                   SCIPY PEAK DETECTION
-        
+
        ###################################################################################################
-        
+
        distance = 150

        # filter height to peaks within the top 5% of largest values
        num_to_filter_component = round(len(ICA_component_value) * 0.045)
-        height_component = ((sum(sorted(ICA_component_value)[-num_to_filter_component:])) / num_to_filter_component)
-        
+        height_component = (
+            sum(sorted(ICA_component_value)[-num_to_filter_component:])
+        ) / num_to_filter_component
+
        # Load sample ECG signal & extract R-peaks using BioSppy
-        component_rpeaks, _ = signal.find_peaks(ICA_component_value, height=height_component, distance=distance, prominence=3.5)
-    
-            
+        component_rpeaks, _ = signal.find_peaks(
+            ICA_component_value, height=height_component, distance=distance, prominence=3.5
+        )
+
        #                                   PYHRV PEAK DETECTION
        ########################################################################################################

        # t_component, filtered_signal, component_rpeaks = biosppy.signals.ecg.ecg(ICA_component_value, sampling_rate=500)[:3]
-      
-      
-     
-        #                                    Plots Component Peaks 
+
+        #                                    Plots Component Peaks
        ################################################################################################
-        
-        
+
        length_of_component_time_series = np.amax(component_time)

        to_divide = 1
@@ -85,7 +85,9 @@ class GetRRComponent():
        mini2 = 0
        maxi2 = len(component_time) / to_divide

-        peaks_range = [idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2]
+        peaks_range = [
+            idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2
+        ]

        peaks1 = component_rpeaks[peaks_range]

@@ -102,38 +104,40 @@ class GetRRComponent():
        beat_cnt = len(peaks1)
        avg_hr = round((beat_cnt / (length_of_component_time_series)) * 60, 2)

-        plt.title(f"{self.subject_id} RR Interval Component {self.ICA_value} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")
+        plt.title(
+            f"{self.subject_id} RR Interval Component {self.ICA_value} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}"
+        )

        plt.savefig(graph_component, dpi=100)

        # save figure as HTML file
        html_str = mpld3.fig_to_html(fig)
-        Html_file = open(graph_component[:-4] + '.html', "w")
+        Html_file = open(graph_component[:-4] + ".html", "w")
        Html_file.write(html_str)
        Html_file.close()

        #                                    Writes Component RR Intervals to CSV
        ################################################################################################################
-        
+
        # peak_vals = component_time[component_rpeaks]
-        
+
        # rr_int = []
        # size = len(peak_vals) - 1
-        
+
        # for i in range(1, size):
        #     print(peak_vals[i])
        #     delta = peak_vals[i] - peak_vals[i - 1]
        #     rr_int = np.append(rr_int, delta)
-            
+
        # df = pd.DataFrame()
-        
+
        # df['Time'] = peak_vals[2:]
-        
+
        # df = pd.DataFrame()
-        
+
        # df['Signal'] = ICA_component_value
-        
+
        # df = df.set_index('Signal')
        # df = df.dropna()
-         
-        df.to_csv(stats_component, index=False)
\ No newline at end of file
+
+        df.to_csv(stats_component, index=False)
--- a/src/hrvmeg/Get_RR_Intervals.py
+++ b/src/hrvmeg/Get_RR_Intervals.py
@@ -14,21 +14,23 @@ import math
 from sklearn.metrics import mean_squared_error


-class GetRRIntervals():
+class GetRRIntervals:
    def __init__(self, subject_id):
        self.subject_id = subject_id

    def get_RR_intervals(self):

-        self.subject_id = self.subject_id.split('.')[0]
+        self.subject_id = self.subject_id.split(".")[0]

        #############################################################

        output_path = f"/home/ec2-user/MEGNET/output/{self.subject_id}"

-        arr_Time_Series = pd.read_csv(f'{output_path}/{self.subject_id}.csv').iloc[:, 0].T.to_numpy()
-        value = pd.read_csv(f'{output_path}/{self.subject_id}.csv').iloc[:, 1]
-        
+        arr_Time_Series = (
+            pd.read_csv(f"{output_path}/{self.subject_id}.csv").iloc[:, 0].T.to_numpy()
+        )
+        value = pd.read_csv(f"{output_path}/{self.subject_id}.csv").iloc[:, 1]
+
        value_num = value.name

        counter = 0
@@ -39,11 +41,11 @@ class GetRRIntervals():

        df_input = pd.DataFrame()

-        df_input['Time_Series'] = arr_Time_Series
+        df_input["Time_Series"] = arr_Time_Series

        print(arr_Time_Series)

-        df_input[f'ICA_Value_{counter}'] = value
+        df_input[f"ICA_Value_{counter}"] = value

        #############################################################

@@ -52,11 +54,11 @@ class GetRRIntervals():
        #############################################################

        # Create toul variable for later use
-        stats_component = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}.csv'
-        stats_ECG = f'{output_path}/{self.subject_id}_RR_Intervals_ECG.csv'
-        
-        graph_component = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}.png'
-        graph_ecg = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png'
+        stats_component = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}.csv"
+        stats_ECG = f"{output_path}/{self.subject_id}_RR_Intervals_ECG.csv"
+
+        graph_component = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}.png"
+        graph_ecg = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png"

        #############################################################

@@ -64,92 +66,95 @@ class GetRRIntervals():
        sig_one = hr_arr[:, 1]

        #############################################################
-        
-        if np.median(sig_one) >  0.00:
+
+        if np.median(sig_one) > 0.00:
            sig_one = -sig_one
-        
-        df_input[f'ICA_Value_{counter}'] = sig_one
-        
+
+        df_input[f"ICA_Value_{counter}"] = sig_one
+
        x = np.amax(time_s)

        distance = 150
-        
+
        ecg_filepath = f"/home/ec2-user/MEGNET/techdev_ecg/{self.subject_id}.csv"
-        
+
        ecg_df = pd.read_csv(ecg_filepath).to_numpy()
-        
+
        ecg_time = ecg_df[:, 0]
        ecg_signal = ecg_df[:, 1]
-        
-        if np.median(ecg_signal) >  0.00:
+
+        if np.median(ecg_signal) > 0.00:
            ecg_signal = -ecg_signal
-            
+
        ecg_signal = signal.resample(ecg_signal, 240000)
-        
+
        #                                   SCIPY PEAK DETECTION
-        
+
        ###################################################################################################

        num_to_filter_component = round(len(sig_one) * 0.04)
-        height_component = ((sum(sorted(sig_one)[-num_to_filter_component:])) / num_to_filter_component)
-        
+        height_component = (
+            sum(sorted(sig_one)[-num_to_filter_component:])
+        ) / num_to_filter_component
+
        # Load sample ECG signal & extract R-peaks using BioSppy
        component_rpeaks, _ = signal.find_peaks(sig_one, height=height_component, distance=distance)
-        
+
        num_to_filter_ECG = round(len(ecg_signal) * 0.04)
-        height_ECG = ((sum(sorted(ecg_signal)[-num_to_filter_ECG:])) / num_to_filter_ECG)
+        height_ECG = (sum(sorted(ecg_signal)[-num_to_filter_ECG:])) / num_to_filter_ECG

        # Load sample ECG signal & extract R-peaks using BioSppy
        ecg_rpeaks, _ = signal.find_peaks(ecg_signal, height=height_ECG, distance=distance)

        # component_nni = tools.nn_intervals(component_rpeaks)
        # ecg_nni = tools.nn_intervals(ecg_rpeaks)
-        
+
        # ecg_series = pd.Series(ecg_rpeaks)
        # component_series = pd.Series(component_rpeaks)
-        
+
        # component_rmssd = pyhrv.time_domain.rmssd(nni=component_nni, rpeaks=component_rpeaks)['rmssd']
        # component_sdnn = pyhrv.time_domain.sdnn(nni=component_nni, rpeaks=component_rpeaks)['sdnn']
        # component_nn50 = pyhrv.time_domain.nn50(nni=component_nni, rpeaks=component_rpeaks)['nn50']
        # component_pnn50 = pyhrv.time_domain.nn50(nni=component_nni, rpeaks=component_rpeaks)['pnn50']
-        
+
        # ecg_rmssd = pyhrv.time_domain.rmssd(nni=ecg_nni, rpeaks=ecg_rpeaks)['rmssd']
        # ecg_sdnn = pyhrv.time_domain.sdnn(nni=ecg_nni, rpeaks=ecg_rpeaks)['sdnn']
        # ecg_nn50 = pyhrv.time_domain.nn50(nni=ecg_nni, rpeaks=ecg_rpeaks)['nn50']
        # ecg_pnn50 = pyhrv.time_domain.nn50(nni=ecg_nni, rpeaks=ecg_rpeaks)['pnn50']
-        
+
        # df = pd.DataFrame()
-        
+
        # df['component_rmssd'] = [component_rmssd]
        # df['ecg_rmssd'] = [ecg_rmssd]
-        
+
        # df['component_sdnn'] = [component_sdnn]
        # df['ecg_sdnn'] = [ecg_sdnn]
-        
+
        # df['component_nn50'] = [component_nn50]
        # df['ecg_nn50'] = [ecg_nn50]
-        
+
        # df['component_pnn50'] = [component_pnn50]
        # df['ecg_pnn50'] = [ecg_pnn50]
-        
+
        # df['component_peaks_num'] = [len(component_rpeaks)]
        # df['ECG_peaks_num'] = [len(ecg_rpeaks)]
-        
+
        # df['subject_id'] = [f'{self.subject_id}']
-        
+
        # df = df.set_index('subject_id')
-        
+
        # df.to_csv(f"/home/ec2-user/MEGNET/comparisons/{self.subject_id}_stats.csv")
-      
-            
+
        ################################################################################################

        to_divide = 1

        mini2 = 0
-        maxi2 = len(df_input['Time_Series']) / to_divide
+        maxi2 = len(df_input["Time_Series"]) / to_divide

-        peaks_range = [idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2]
+        peaks_range = [
+            idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2
+        ]

        peaks1 = component_rpeaks[peaks_range]

@@ -166,18 +171,20 @@ class GetRRIntervals():
        beat_cnt = len(peaks1)
        avg_hr = round((beat_cnt / (x)) * 60, 2)

-        plt.title(f"{self.subject_id} RR Interval Component {value_num} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")
+        plt.title(
+            f"{self.subject_id} RR Interval Component {value_num} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}"
+        )

        plt.savefig(graph_component, dpi=100)

        # save figure as HTML file
        html_str = mpld3.fig_to_html(fig)
-        Html_file = open(graph_component[:-4] + '.html', "w")
+        Html_file = open(graph_component[:-4] + ".html", "w")
        Html_file.write(html_str)
        Html_file.close()

        #############################################################
-        
+
        # to_divide = 1

        # mini2 = 0
@@ -196,12 +203,12 @@ class GetRRIntervals():

        # fig = plt.gcf()
        # fig.set_size_inches(12, 5)
-        
+
        # ecg_beats_length = np.amax(ecg_time)

        # beat_cnt = len(peaks1_ecg)
-        
-        # avg_hr = round((beat_cnt / ecg_beats_length) * 60, 2) 
+
+        # avg_hr = round((beat_cnt / ecg_beats_length) * 60, 2)

        # plt.title(f"{self.subject_id} RR Interval ECG \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")

@@ -212,49 +219,49 @@ class GetRRIntervals():
        # Html_file = open(graph_ecg[:-4] + '.html', "w")
        # Html_file.write(html_str)
        # Html_file.close()
-        
+
        #                                       Writes Component values to CSV
-       ################################################################################################################
-        
+        ################################################################################################################
+
        # peak_vals = component_time[component_rpeaks]
-        
+
        # rr_int = []
        # size = len(peak_vals) - 1
-        
+
        # for i in range(1, size):
        #     delta = peak_vals[i] - peak_vals[i - 1]
        #     rr_int = np.append(rr_int, delta)
-            
+
        df = pd.DataFrame()
-        
+
        # df['Time'] = peak_vals[2:]
-        
-        df['Signal'] = sig_one
-        
-        df = df.set_index('Signal')
+
+        df["Signal"] = sig_one
+
+        df = df.set_index("Signal")
        # df = df.dropna()
-         
+
        df.to_csv(stats_component)
-        
+
        #                                    Writes ECG RR Intervals to CSV
        ###############################################################################################################
-        
+
        # peak_vals = ecg_time[ecg_rpeaks]
-        
+
        # rr_int = []
        # size = len(peak_vals) - 1
-        
+
        # for i in range(1, size):
        #     delta = peak_vals[i] - peak_vals[i - 1]
        #     rr_int = np.append(rr_int, delta)
-            
+
        df = pd.DataFrame()
-        
+
        # df['Time'] = peak_vals[2:]
-        
-        df['Signal'] = ecg_signal
-        
-        df = df.set_index('Signal')
+
+        df["Signal"] = ecg_signal
+
+        df = df.set_index("Signal")
        # df = df.dropna()
-         
-        df.to_csv(stats_ECG)
\ No newline at end of file
+
+        df.to_csv(stats_ECG)
--- a/src/hrvmeg/Label_ICA_Components.py
+++ b/src/hrvmeg/Label_ICA_Components.py
@@ -17,23 +17,23 @@ python Label_ICA_Components.py --input_path example_data/HCP/100307/@rawc_rfDC_8
 """


-class label_ICA_components():
-
+class label_ICA_components:
    def __init__(self, subject_path):
        self.subject_path = subject_path

-    def load_image(self, infilename ) :
+    def load_image(self, infilename):
        from PIL import Image
        import numpy as np

-        img = Image.open( infilename )
+        img = Image.open(infilename)
        img.load()
-        data = np.asarray( img, dtype="int32" )
+        data = np.asarray(img, dtype="int32")
        return data

    def fPredictICA(self):
-    
+
        import os
+
        # os.environ['OMP_NUM_THREADS'] = '1'

        from Megnet_Utilities import fPredictChunkAndVoting
@@ -46,54 +46,66 @@ class label_ICA_components():

        heartifacts = []

-        strOutputType = 'list'
+        strOutputType = "list"
+
+        # loading the data is from our Brainstorm Pipeline, it may require some minor edits based on how the data is saved.
+        # load the time seris and the spatial map

-        #loading the data is from our Brainstorm Pipeline, it may require some minor edits based on how the data is saved.
-        #load the time seris and the spatial map
-        
        arrTimeSeries = pd.read_csv(self.subject_path).iloc[:, 1:]
        arrTimeSeries = arrTimeSeries.T.to_numpy()
-        
+
        arrSpatialMap = np.zeros((20, 120, 120, 3), dtype=np.uint8)

-        #ensure the data is compatable
+        # ensure the data is compatable
        try:
-            assert arrTimeSeries.shape[0] == arrSpatialMap.shape[0] #the number of time series should be the same as the number of spatial maps
-            assert arrSpatialMap.shape[1:] == (120,120,3) #the spatial maps should have a shape of [N,120,120,3]
-            assert arrTimeSeries.shape[1] >= 15000 #the time series need to be at least 60secs with a sample rate of 250hz (60*250=15000)
+            assert (
+                arrTimeSeries.shape[0] == arrSpatialMap.shape[0]
+            )  # the number of time series should be the same as the number of spatial maps
+            assert arrSpatialMap.shape[1:] == (
+                120,
+                120,
+                3,
+            )  # the spatial maps should have a shape of [N,120,120,3]
+            assert (
+                arrTimeSeries.shape[1] >= 15000
+            )  # the time series need to be at least 60secs with a sample rate of 250hz (60*250=15000)
        except AssertionError:
-            raise ValueError('The data does not have the correct dimensions')
+            raise ValueError("The data does not have the correct dimensions")

        current_dir = os.path.dirname(os.path.realpath(__file__))
-        
-        #load the model
+
+        # load the model
        kModel = keras.models.load_model(f"{current_dir}/MEGnet_final_model.h5")

-        #use the vote chunk prediction function to make a prediction on each input
-        output = fPredictChunkAndVoting(kModel,
-                                        arrTimeSeries,
-                                        arrSpatialMap,
-                                        np.zeros((20,3)), #the code expects the Y values as it was used for performance, just put in zeros as a place holder.
-                                        15000,
-                                        3750)
+        # use the vote chunk prediction function to make a prediction on each input
+        output = fPredictChunkAndVoting(
+            kModel,
+            arrTimeSeries,
+            arrSpatialMap,
+            np.zeros(
+                (20, 3)
+            ),  # the code expects the Y values as it was used for performance, just put in zeros as a place holder.
+            15000,
+            3750,
+        )
        arrPredictionsVote, arrGTVote, arrPredictionsChunk, arrGTChunk = output

-        #format the predictions
-        if strOutputType.lower() == 'array':
-            to_return = arrPredictionsVote[:,0,:]
+        # format the predictions
+        if strOutputType.lower() == "array":
+            to_return = arrPredictionsVote[:, 0, :]
        else:
-            to_return = arrPredictionsVote[:,0,:].argmax(axis=1)
+            to_return = arrPredictionsVote[:, 0, :].argmax(axis=1)

        for x in range(len(to_return)):
-        
+
            confidence = list(arrPredictionsVote[x][0])
            heartrate_confidence = confidence[2]

-            # append all HR artifacts with confidence above 0 to list 
+            # append all HR artifacts with confidence above 0 to list
            if to_return[x] == 2 and heartrate_confidence > 0:
                print("Heartifact:", x, heartrate_confidence)
                heartifacts.append((x, heartrate_confidence))
-                
+
        # return highest confidence heartrate artifact
        index, confidence = max(heartifacts, key=lambda x: x[1])


--- a/src/hrvmeg/MNE_Processing.py
+++ b/src/hrvmeg/MNE_Processing.py
@@ -13,24 +13,28 @@ from mne.preprocessing import ICA
 import multiprocessing


-class MNE_Processor():
-
+class MNE_Processor:
    def __init__(self, scan_path, output_dir, csv_output_path):
        self.scan_path = scan_path
        self.output_dir = output_dir
        self.csv_output_path = csv_output_path

-
    def process(self):
-    
+
        # process with every available thread
-        os.environ['OMP_NUM_THREADS'] = str(multiprocessing.cpu_count())
+        os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count())
        # os.environ['OMP_NUM_THREADS'] = '1'

-        mne.set_log_level('ERROR')
-        raw = mne.io.read_raw_ctf(self.scan_path, verbose='warning', preload=True, system_clock='truncate', clean_names=False)
+        mne.set_log_level("ERROR")
+        raw = mne.io.read_raw_ctf(
+            self.scan_path,
+            verbose="warning",
+            preload=True,
+            system_clock="truncate",
+            clean_names=False,
+        )

-        sfreq = raw.info['sfreq']
+        sfreq = raw.info["sfreq"]

        mag_picks = mne.pick_types(raw.info, meg=True, eeg=False, misc=False)
        eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True, misc=False)
@@ -38,26 +42,32 @@ class MNE_Processor():
        all_picks = mne.pick_types(raw.info, meg=True, eeg=True, misc=True)

        freq_to_filter = 60  # remove 60 Hz sig from power lines
-        notch_filtered = mne.io.Raw.notch_filter(raw, np.arange(freq_to_filter, sfreq / 2., freq_to_filter), \
-                                                 picks=all_picks, phase='zero')
+        notch_filtered = mne.io.Raw.notch_filter(
+            raw,
+            np.arange(freq_to_filter, sfreq / 2.0, freq_to_filter),
+            picks=all_picks,
+            phase="zero",
+        )

        lower_freq_cutoff = 0.5
        upper_freq_cutoff = 100
-        
-        bandpass_filtered = mne.io.Raw.filter(notch_filtered, lower_freq_cutoff, upper_freq_cutoff, picks=all_picks)
-        
+
+        bandpass_filtered = mne.io.Raw.filter(
+            notch_filtered, lower_freq_cutoff, upper_freq_cutoff, picks=all_picks
+        )
+
        bandpass_filtered_resampled = bandpass_filtered.resample(500)

-        method = 'infomax'
+        method = "infomax"

-        if method == 'infomax':
+        if method == "infomax":
            fit_params = dict(extended=True)
-        elif method == 'fastica':
+        elif method == "fastica":
            fit_params = dict()

        reject = dict(mag=5e-12, grad=4000e-13)
-            
-        ica = ICA(n_components=20, method='infomax', fit_params=fit_params, random_state=0)
+
+        ica = ICA(n_components=20, method="infomax", fit_params=fit_params, random_state=0)

        ica.fit(bandpass_filtered_resampled, picks="data")

@@ -66,17 +76,27 @@ class MNE_Processor():
        ica_sources = ica.get_sources(bandpass_filtered_resampled)

        data, times = bandpass_filtered_resampled[misc_picks[:1]]
-        time = ica_sources[ecg_index[0]][1].reshape(len(times), )
+        time = ica_sources[ecg_index[0]][1].reshape(
+            len(times),
+        )

        sigi = []

        try:
            for i in ecg_index:
-                sigi.append(ica_sources[i][0].reshape(len(times), ))
+                sigi.append(
+                    ica_sources[i][0].reshape(
+                        len(times),
+                    )
+                )
        except Exception as err:
            print(err)

        appended_output = np.c_[time, np.array(sigi).T]
-        np.savetxt(self.csv_output_path, appended_output, fmt='%1.4e',
-                   delimiter=',', \
-                   header='Time, IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10, IC11, IC12, IC13, IC14, IC15, IC16, IC17, IC18, IC19, IC20')
+        np.savetxt(
+            self.csv_output_path,
+            appended_output,
+            fmt="%1.4e",
+            delimiter=",",
+            header="Time, IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10, IC11, IC12, IC13, IC14, IC15, IC16, IC17, IC18, IC19, IC20",
+        )
--- a/src/hrvmeg/Megnet_Utilities.py
+++ b/src/hrvmeg/Megnet_Utilities.py
 import pandas as pd
+
 idx = pd.IndexSlice
 import numpy as np

@@ -10,13 +11,15 @@ def fGetStartTimesOverlap(intInputLen, intModelLen=15000, intOverlap=3750):
    """
    lStartTimes = []
    intStartTime = 0
-    while intStartTime+intModelLen<=intInputLen:
+    while intStartTime + intModelLen <= intInputLen:
        lStartTimes.append(intStartTime)
-        intStartTime = intStartTime+intModelLen-intOverlap
+        intStartTime = intStartTime + intModelLen - intOverlap
    return lStartTimes


-def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750):
+def fPredictChunkAndVoting(
+    kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750
+):
    """
    This function is designed to take in ICA time series and a spatial map pair and produce a prediction useing a trained model.
    The time series will be split into multiple chunks and the final prediction will be a weighted vote of each time chunk.
@@ -46,7 +49,9 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen

    for arrScanTimeSeries, arrScanSpatialMap, arrScanY in zip(lTimeSeries, arrSpatialMap, arrY):
        intTimeSeriesLen = arrScanTimeSeries.shape[0]
-        lStartTimes = fGetStartTimesOverlap(intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap)
+        lStartTimes = fGetStartTimesOverlap(
+            intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap
+        )

        if lStartTimes[-1] + intModelLen <= intTimeSeriesLen:
            lStartTimes.append(arrScanTimeSeries.shape[0] - intModelLen)
@@ -62,10 +67,17 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen
        # predict
        dctWeightedPredictions = {}
        for intStartTime in dctTimeChunkVotes.keys():
-            lPrediction = kModel.predict([np.expand_dims(arrScanSpatialMap, 0),
-                                          np.expand_dims(
-                                              np.expand_dims(arrScanTimeSeries[intStartTime:intStartTime + intModelLen],
-                                                             0), -1)])
+            lPrediction = kModel.predict(
+                [
+                    np.expand_dims(arrScanSpatialMap, 0),
+                    np.expand_dims(
+                        np.expand_dims(
+                            arrScanTimeSeries[intStartTime : intStartTime + intModelLen], 0
+                        ),
+                        -1,
+                    ),
+                ]
+            )
            lPredictionsChunk.append(lPrediction)
            lGTChunk.append(arrScanY)

@@ -79,4 +91,9 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen

        i += 1

-    return np.stack(lPredictionsVote), np.stack(lGTVote), np.stack(lPredictionsChunk), np.stack(lGTChunk)
+    return (
+        np.stack(lPredictionsVote),
+        np.stack(lGTVote),
+        np.stack(lPredictionsChunk),
+        np.stack(lGTChunk),
+    )
--- a/src/hrvmeg/evaluate.py
+++ b/src/hrvmeg/evaluate.py
@@ -86,4 +86,4 @@ def get_metrics(
        slices = PandasSFApplier([nlp_cnn, short_text]).apply(df)
        metrics["slices"] = get_slice_metrics(y_true=y_true, y_pred=y_pred, slices=slices)

-    return metrics
\ No newline at end of file
+    return metrics
--- a/src/hrvmeg/main.py
+++ b/src/hrvmeg/main.py
@@ -207,4 +207,4 @@ def predict_tag(text: str = "", run_id: str = None) -> None:


 if __name__ == "__main__":
-    app()  # pragma: no cover, live app
\ No newline at end of file
+    app()  # pragma: no cover, live app
--- a/src/hrvmeg/predict.py
+++ b/src/hrvmeg/predict.py
@@ -43,4 +43,4 @@ def predict(texts: List, artifacts: Dict) -> List:
        }
        for i in range(len(tags))
    ]
-    return predictions
\ No newline at end of file
+    return predictions
--- a/src/hrvmeg/train.py
+++ b/src/hrvmeg/train.py
@@ -137,4 +137,4 @@ def objective(args: Namespace, df: pd.DataFrame, trial: optuna.trial._trial.Tria
    trial.set_user_attr("recall", overall_performance["recall"])
    trial.set_user_attr("f1", overall_performance["f1"])

-    return overall_performance["f1"]
\ No newline at end of file
+    return overall_performance["f1"]
--- a/src/hrvmeg/utils.py
+++ b/src/hrvmeg/utils.py
@@ -55,4 +55,4 @@ def set_seeds(seed: int = 42) -> None:
    """
    # Set seeds
    np.random.seed(seed)
-    random.seed(seed)
\ No newline at end of file
+    random.seed(seed)