Progress on direct comparison of MEG and ECG signals

40c5cd4e · Ryan Godwin · 8d51bb72 · 40c5cd4e · 40c5cd4e · 40c5cd4e
Commit 40c5cd4e authored 2 years ago by Ryan Godwin
--- a/src/config/args_ecg_preproc.json
+++ b/src/config/args_ecg_preproc.json
+{
+    "sampling_freq": 1000
+}
\ No newline at end of file
--- a/src/config/args_preproc.json
+++ b/src/config/args_preproc.json
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -6,9 +6,6 @@ from pathlib import Path
 import mlflow
 from rich.logging import RichHandler

-# Experiment name for MLFlow
-experiment_name = "Test 1"
-
 # Directories
 BASE_DIR = Path(__file__).parent.parent.absolute()
 CONFIG_DIR = Path(BASE_DIR, "config")
@@ -21,13 +18,14 @@ RESULTS_DIR = Path(DATA_DIR, "results")
 # Assets
 # A folder containing RAW MEG data (CTF -> .ds folders)
 MEG_DATA_DIR = Path(RAW_DATA, "MEG_Data")
-ECG_DATA_DIR = Path(RAW_DATA, "BioPac_Data")
+# A Folder containing the RAW ECG data from biopack (.ACQ files converted to text)
+ECG_DATA_DIR = Path(RAW_DATA, "BioPac_Data", "TechDevScans")
 MODEL_FILE = Path(BASE_DIR, "model", "MEGnet_final_model.h5")

+ICA_HEADERS = ["Time", "IC1", "IC2", "IC3", "IC4", "IC5", "IC6", "IC7", "IC8", 'IC9', "IC10", "IC11", "IC12", "IC13", "IC14", "IC15", "IC16", "IC17", "IC18", "IC19", "IC20"]
+
 # MLFlow model registry
 mlflow.set_tracking_uri("http://localhost:5000")
-mlflow.set_experiment(experiment_name)
-ICA_HEADERS = "Time, IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10, IC11, IC12, IC13, IC14, IC15, IC16, IC17, IC18, IC19, IC20"

 # Logger
 logging_config = {

--- a/src/config/run_id.txt
+++ b/src/config/run_id.txt
-c409b386346c4cab834880a38a85b42c
\ No newline at end of file
+5aa3eed2315c479e8038e28a238550a6
\ No newline at end of file
--- a/src/hrvmeg/data.py
+++ b/src/hrvmeg/data.py
@@ -155,4 +155,5 @@ class GetRpeaks:
        # df = df.dropna()

        df.to_csv(stats_component, index=False)
-        return time[component_rpeaks], trace[component_rpeaks]
\ No newline at end of file
+        return time[component_rpeaks], trace[component_rpeaks]
+    
\ No newline at end of file
--- a/src/hrvmeg/main.py
+++ b/src/hrvmeg/main.py
 import warnings
 from pathlib import Path
 from typing import Dict
-
+import mpld3
 import mlflow
 import pandas as pd
 import typer
-
+import matplotlib.pyplot as plt
 from config import config
-from config.config import logger
-from hrvmeg import predict, compare
-from py_utils import config_utils, get_files, print_info
-from hrvmeg import meg_to_ica
+from config.config import INTERMEDIATE_DIR, logger
+from hrvmeg import predict, compare, meg_to_ica
+from py_utils import config_utils, get_folders, print_info
+
+#Experiment Name
+experiment_name = "Testing part 1"

 # Initialize Typer CLI app
 app = typer.Typer()
@@ -18,8 +20,8 @@ warnings.filterwarnings("ignore")

 @app.command()
 def extract_ic_from_meg(
-    args_fp: str = "config/args.json",
-    experiment_name: str = "Test 1",
+    args_fp: str = "config/args_meg_preproc.json",
+    experiment_name: str = experiment_name,
    run_name: str = "Run1",
    test_run: bool = False,
 ) -> None:
@@ -37,13 +39,14 @@ def extract_ic_from_meg(
    :type test_run: bool (optional)
    """
    # recursively scan through folders for .ds (CTF) files
-    scan_files = get_files.get_files(config.MEG_DATA_DIR, ".ds")
+    # This works nicely because the .ds folders contain .ds files
+    scan_files = get_folders.get_files(config.MEG_DATA_DIR, ".ds")
    scan_files = [x for x in scan_files if Path(x).suffix == ".ds"]
    # Ignore the hz.ds files
    scan_files = [x for x in scan_files if Path(x).stem != "hz"]
    # removing any duplicates
    scan_files = list(set(scan_files))
-
+    
    # compile the args
    args = dict(config_utils.load_dict(filepath=args_fp))
    output_files = []
@@ -57,17 +60,19 @@ def extract_ic_from_meg(
        MNE_proc = meg_to_ica.MNE_Processor(
            scan_file, output_sub_dir, full_csv_output_path, config.ICA_HEADERS, args
        )
+        
        MNE_proc.process()

        #TODO add plotting here
        # fig, ax = MNE_proc.plot()

-        logger.info("✅ ICA successfully calculated on MEG data with MNE")
+        logger.info("✅ ICA successfully calculated with MNE")
+        mlflow.set_experiment(experiment_name)
        with mlflow.start_run(run_name=run_name) as run:
            print_info.print_run_info(run)
            run_id = mlflow.active_run().info.run_id
            mlflow.log_params(args)
-            mlflow.log_artifact(full_csv_output_path)
+            mlflow.log_param("output_filepath", full_csv_output_path)

        # Save to config folder
        open(Path(config.CONFIG_DIR, "run_id.txt"), "w", encoding="utf-8").write(run_id)
@@ -84,7 +89,7 @@ def extract_ic_from_meg(

 @app.command()
 def label_cardiac_ic(
-    args_fp: str = "config/args_preproc.json",
+    args_fp: str = "config/args_meg_preproc.json",
    experiment_name: str = "label_cardiac_signals",
    run_name: str = "sgd",
    test_run: bool = False,
@@ -101,52 +106,91 @@ def label_cardiac_ic(
    """
    input_filepaths = open(Path(config.INTERMEDIATE_DIR, "output_ica_files.txt"), "r")
    input_list = input_filepaths.read()
-    input_list
+    
    for file in input_list.split(","):
-        # feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
-        print("file  - ", file)
-        label_ICA = predict.label_ICA_components(file.strip())
-        # get best heartifact after running MEGNET on the MATLAB file above
-        heartifact = label_ICA.fPredictICA()
-
-        # filter dataframe to only heartifact and Time index
-        indices_to_filter = [0, heartifact + 1]
-
-        df = pd.read_csv(file.strip())
-        df = df.iloc[:, indices_to_filter]
-
-        fp = Path(file)
-        output_file = Path(config.INTERMEDIATE_DIR,fp.parent.name, "heartifact.csv")
-        df.to_csv(output_file, index=False)
-
-        args = dict(config_utils.load_dict(filepath=args_fp))
-
-        with mlflow.start_run(run_name=run_name) as run:
-            run_id = mlflow.active_run().info.run_id
-            mlflow.log_params(args)
-            mlflow.log_artifact(output_file)
-
-        open(Path(config.CONFIG_DIR, "run_id.txt"), "w", encoding="utf-8").write(run_id)
+        if file != '':
+            # feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
+            print("file  - ", file)
+            label_ICA = predict.label_ICA_components(file.strip())
+            # get best heartifact after running MEGNET on the MATLAB file above
+            heartifact = label_ICA.fPredictICA()
+
+            # filter dataframe to only heartifact and Time index
+            indices_to_filter = [0, heartifact + 1]
+
+            df = pd.read_csv(file.strip())
+            df = df.iloc[:, indices_to_filter]
+            
+            fp = Path(file)
+            output_file = Path(config.INTERMEDIATE_DIR,fp.parent.name, "heartifact.csv")
+            df.to_csv(output_file, index=False)
+
+            args = dict(config_utils.load_dict(filepath=args_fp))
+            time_col = str(df.columns[0])
+            IC_col = str(df.columns[1])
+
+            fig, ax = plt.subplots(1,1, figsize=(12,5))
+
+            output_png = Path(config.INTERMEDIATE_DIR, fp.parent.name,"Heartifact.png")
+            
+            plt.plot(df[time_col], df[IC_col], linewidth=0.2)
+            plt.xlabel="Time (s)"
+            plt.ylabel="Cardiac Component"
+            plt.title="Selected Cardiac Component"
+            fig = plt.gcf()
+            fig.set_size_inches(12, 5)
+            plt.savefig(output_png)
+            
+            # save figure as HTML file
+            html_str = mpld3.fig_to_html(fig)
+            output_html = output_png.rename(output_png.with_suffix(".html"))
+            HTML_file = open(output_html, "w")
+            HTML_file.write(html_str)
+            HTML_file.close()
+            
+            with mlflow.start_run(run_name=run_name) as run:
+                run_id = mlflow.active_run().info.run_id
+                mlflow.log_params(args)
+                mlflow.log_artifact(output_html)
+            
+            open(Path(config.CONFIG_DIR, "run_id.txt"), "w", encoding="utf-8").write(run_id)


-@app.command
+@app.command()
 def compare_meg_and_ecg(
    args_rr: str="config/args_rr.json",
    experiment_name: str= "compare ECG + MEG",
-    run_name: str="comparison 1"
+    run_name: str="comparison 1",
    test_run: bool = True
 ):
-    raise (NotImplementedError)
-
-
-@app.command()
-def train_model(
-    args_fp: str = "config/args.json",
-    experiment_name: str = "baselines",
-    run_name: str = "sgd",
-    test_run: bool = False,
-) -> None:
-    raise (NotImplementedError)
+    """
+    This function takes in a json file with the arguments for the rr_intervals function, and then runs
+    the rr_intervals function on the ECG data, and then runs the rr_intervals function on the MEG data,
+    and then compares the two.
+    
+    Args:
+      args_rr (str): str="config/args_rr.json",. Defaults to config/args_rr.json
+      experiment_name (str): str= "compare ECG + MEG",. Defaults to compare ECG + MEG
+      run_name (str): the name of the run. This will be used to create a folder in the results
+    directory. Defaults to comparison 1
+      test_run (bool): bool = True. Defaults to True
+    """
+    ecg_scan_files = get_folders.get_folders(config.ECG_DATA_DIR, ".txt")
+    print(ecg_scan_files)
+    ecg_to_proc = []
+    for file in ecg_scan_files:
+        text_file_generators = file.glob("*.txt")
+        for text_file in text_file_generators:
+            ecg_to_proc.append(text_file)
+
+    ecg_dfs = {}
+    for ecg_scan in ecg_to_proc:
+        ecg_dfs[ecg_scan] = pd.read_csv(ecg_scan)
+    print(ecg_dfs)        
+    
+    #Now can use ecg_to_proc to index ecg_dfs
+    print(ecg_dfs[ecg_to_proc[0]])
+    args = dict(config_utils.load_dict(filepath=args_rr))


 @app.command()

--- a/src/hrvmeg/meg_to_ica.py
+++ b/src/hrvmeg/meg_to_ica.py
@@ -8,9 +8,6 @@ import matplotlib.pyplot as plt
 import numpy as np
 from mne.preprocessing import ICA

-from py_utils.time_series_utils import determine_fs
-
-
 class MNE_Processor:
    def __init__(self, scan_path, output_dir, csv_output_path, ica_headers, args: dict):
        self.scan_path = scan_path
@@ -103,12 +100,14 @@ class MNE_Processor:
            print(err)

        appended_output = np.c_[time, np.array(sigi).T]
+        headers= [x for x in self.ica_headers]
        np.savetxt(
            self.csv_output_path,
            appended_output,
            fmt="%1.4e",
            delimiter=",",
-            header=self.ica_headers,
+            comments= '',
+            header=','.join(headers),
        )
        self._time_ = time
        self._ica_ = np.array(sigi).T

--- a/py_utils @ d798ac0b
+++ b/py_utils @ d798ac0b
-Subproject commit 9ed6500be77001280a94c7e5c9706151b23758c2
+Subproject commit d798ac0b2a8d7ac83bcd255e1d851ebfa342fff6