Skip to content
Snippets Groups Projects
Commit 4f59bb0d authored by Ryan Godwin's avatar Ryan Godwin
Browse files

Alignment built, needed to revise preprocessing

parent 0c82090d
No related branches found
No related tags found
1 merge request!1Progress toward getting the evaluation integrated
{
"sampling_freq": 1000
"sampling_freq": 1000,
"sampling_freq_units": "Hz",
"intermediate_filename": "clean_ecg.csv",
"downsample_freq":500
}
\ No newline at end of file
......@@ -4,5 +4,7 @@
"notch_filter_freq": 60,
"bandpass_lowfreq_cutoff": 0.5,
"bandpass_highfreq_cutoff": 100,
"downsample_freq":500
"sampling_freq": 1200,
"downsample_freq":500,
"intermediate_filename":"heartifact.csv"
}
\ No newline at end of file
......@@ -20,8 +20,17 @@ RESULTS_DIR = Path(DATA_DIR, "results")
MEG_DATA_DIR = Path(RAW_DATA, "MEG_Data")
# A Folder containing the RAW ECG data from biopack (.ACQ files converted to text)
ECG_DATA_DIR = Path(RAW_DATA, "BioPac_Data", "TechDevScans")
#The TF model for classifying MEG artifacts
MODEL_FILE = Path(BASE_DIR, "model", "MEGnet_final_model.h5")
SIGNAL_TUPS = [
tuple(("MEGandECG_DWG-Rest_20210816_01", "Subject1")),
tuple(("MEGandECG_DWG-Rest_20210816_02", "Subject2")),
tuple(("MEGandECG_DWG-Rest_20210817_02", "Subject3")),
tuple(("MEGandECG_DWG-Rest_20210817_03", "Subject4")),
tuple(("MEGandECG_DWG-Rest_20210817_04", "Subject5")),
]
ICA_HEADERS = [
"Time",
"IC1",
......
from pathlib import Path
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import scipy.signal as signal
from scipy.signal import correlate, correlation_lags
import plotter
from hrvmeg import data
class CompareMEGICAtoECG:
def __init__(self, output_sub_dir, input_ecg, input_cardiac_ic):
"""
The function takes in the subject_id, subject_path, output_sub_dir, and ICA_value and assigns
them to the class.
Args:
output_sub_dir: The directory where the output files will be saved.
subject_id: The subject ID
subject_path: The path to the subject's folder
ICA_value: The cardiac independent component from the ICA.
"""
self.subject_path = subject_path
self.subject_id = subject_id
def __init__(self,
input_ecg,
ecg_name,
input_cardiac_ic,
meg_name,
output_sub_dir
):
print('output directory - ', output_sub_dir)
self.output_sub_dir = output_sub_dir
print('ecg input - ', input_ecg)
self.ecg_data = input_ecg
self.ecg_dir = ecg_name
print('ic input - ', input_cardiac_ic)
self.ica_data = input_cardiac_ic
self.meg_dir = Path(self.output_sub_dir,meg_name)
Path(self.meg_dir).mkdir(parents=True, exist_ok=True)
def time_align(self):
#plot unaligned
plotter.plot_signals_comp(self.ica_data,
self.ecg_data,
Path(self.meg_dir, 'pre-align-both.png')
)
def get_ica_rr(self):
ica_rr = data.GetRPeaks(
output_sub_dir=self.output_sub_dir,
subject_id=self.subject_id,
subject_path=self.subject_path,
)
correlation = correlate(self.ica_data,
self.ecg_data,
mode='full',
method='fft'
)
print("ECG - ", self.ecg_data.shape)
print("MEG - ", self.ica_data.shape)
lags = correlation_lags(self.ica_data.size, self.ecg_data.size, mode = 'full')
lag = lags[np.argmax(correlation)]
print('Signal lag = ', lag)
# def get_ica_rr(self):
# ica_rr = data.GetRPeaks(
# output_sub_dir=self.output_sub_dir,
# subject_id=self.subject_id,
# subject_path=self.subject_path,
# )
from time import time
import warnings
from pathlib import Path
from typing import Dict
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import mpld3
import pandas as pd
import typer
from sklearn.preprocessing import StandardScaler
from config import config
from config.config import INTERMEDIATE_DIR, logger
from hrvmeg import compare, meg_to_ica, predict
from hrvmeg.plotter import PlotCardiacIC
from config.config import logger
from hrvmeg import compare, meg_to_ica, predict, preprocess_ecg, plotter
from py_utils import config_utils, get_data, print_info
# Experiment Name
......@@ -24,14 +24,14 @@ warnings.filterwarnings("ignore")
@app.command()
def extract_ic_from_meg(
def preprocess_megs(
args_fp: str = "config/args_meg_preproc.json",
experiment_name: str = experiment_name,
run_name: str = "extract_ica_from_meg",
test_run: bool = False,
) -> None:
"""
`extract_ic_from_meg` takes in a `scan_file` (a .ds file), and outputs a .csv file with the ICA
`preprocess_megs` takes in a `scan_file` (a .ds file), and outputs a .csv file with the ICA
components
:param args_fp: str = "config/args.json", defaults to config/args.json
......@@ -136,13 +136,13 @@ def label_cardiac_ic(
IC_col = str(df.columns[1])
output_png = Path(config.INTERMEDIATE_DIR, fp.parent.name, "Heartifact.png")
plot_cardiac_ic = PlotCardiacIC(
plot_cardiac_ic = plotter.PlotCardiacIC(
output_file=output_png.absolute(), x_values=df[time_col], y_values=df[IC_col]
)
plot_out = plot_cardiac_ic()
mlflow.set_experiment(experiment_name)
with mlflow.start_run(run_name=run_name) as run:
with mlflow.start_run(run_name=run_name):
run_id = mlflow.active_run().info.run_id
mlflow.log_params(args)
mlflow.log_artifact(plot_out)
......@@ -153,30 +153,40 @@ def label_cardiac_ic(
@app.command()
def preprocess_ecgs(
experiment_name: str = experiment_name,
args_ecg: str = "config/args_ecg_preproc.json",
run_name: str = "preprocess ECG",
test_run: bool = True,
):
"""
> This function takes in a list of ECG files, preprocesses them, and saves them to a new folder
Args:
experiment_name (str): The name of the experiment. This is used to create a folder in the
`config.RESULTS_DIR` directory.
args_ecg (str): str = "config/args_ecg_preproc.json". Defaults to config/args_ecg_preproc.json
run_name (str): The name of the run. This is used to create a folder in the experiment directory.
Defaults to preprocess ECG
test_run (bool): bool = True. Defaults to True
"""
ecg_scan_folders = get_data.get_folders(config.ECG_DATA_DIR, ".txt")
# TODO: move this to a "get_files" in py_utils
ecg_to_proc = get_data.get_files(ecg_scan_folders, "*.txt")
args = dict(config_utils.load_dict(filepath=args_ecg))
fs = args["sampling_freq"]
for file in ecg_to_proc:
fp = Path(file)
output_file = Path(config.INTERMEDIATE_DIR, fp.parent.name, "clean_ecg.csv")
ecg_dat = pd.read_csv(fp)
scaler = StandardScaler()
scaled_ecg = scaler.fit_transform(ecg_dat)
print(scaled_ecg)
output_folder = Path(config.INTERMEDIATE_DIR, fp.parent.name)
output_file = Path(output_folder, "clean_ecg.csv")
Path(output_folder).mkdir(parents=True, exist_ok=True)
data_to_write= preprocess_ecg.preproc_ecg(fp, fs, args["downsample_freq"],output_folder)
np.savetxt(output_file, data_to_write, delimiter=',')
@app.command()
def compare(
def compare_signals(
args_rr: str = "config/args_rr.json",
args_meg: str = "config/args_meg_preproc.json",
args_ecg: str = "config/args_ecg_preproc.json",
experiment_name: str = "compare ECG + MEG",
run_name: str = "comparison 1",
test_run: bool = True,
......@@ -193,30 +203,48 @@ def compare(
directory. Defaults to comparison 1
test_run (bool): bool = True. Defaults to True
"""
args_rr = dict(config_utils.load_dict(filepath=args_rr))
args_meg = dict(config_utils.load_dict(filepath=args_meg))
args_ecg = dict(config_utils.load_dict(filepath=args_ecg))
meg_scan_folders = get_data.get_folders(config.ECG_DATA_DIR, ".csv")
meg_to_proc = []
for file in meg_scan_folders:
text_file_generators = file.glob("heartifact.csv")
for text_file in text_file_generators:
meg_to_proc.append(text_file)
meg_scan_folders = get_data.get_folders(config.INTERMEDIATE_DIR, args_meg["intermediate_filename"])
meg_to_proc = get_data.get_files(meg_scan_folders,args_meg["intermediate_filename"])
meg_dfs = {}
for meg_scan in meg_to_proc:
meg_dfs[meg_scan] = pd.read_csv(meg_scan)
# #Now can use ecg_to_proc to index ecg_dfs
# if len(ecg_dfs)==len(meg_dfs):
# for i in range(len(ecg_dfs)):
# #do stuff with both meg and ecg data
# #preproc ecg
# pass
# else:
# raise (IndexError)
args = dict(config_utils.load_dict(filepath=args_rr))
meg_dfs[Path(meg_scan).parent.name] = pd.read_csv(meg_scan)
ecg_scan_folders = get_data.get_folders(config.INTERMEDIATE_DIR, args_ecg["intermediate_filename"])
ecg_to_proc = get_data.get_files(ecg_scan_folders,args_ecg["intermediate_filename"])
ecg_dfs = {}
for ecg_scan in ecg_to_proc:
ecg_dfs[Path(ecg_scan).parent.name] = pd.read_csv(ecg_scan)
for idx, value in enumerate(config.SIGNAL_TUPS):
meg_name = value[0]
ecg_name = value[1]
meg_data = meg_dfs[meg_name]
ecg_data = ecg_dfs[ecg_name]
print(meg_data.shape)
print(ecg_data.shape)
comp = compare.CompareMEGICAtoECG(ecg_data,
ecg_name,
meg_data,
meg_name,
config.RESULTS_DIR
)
#Do all the direct comparison between the MEG and ECG here
comp.time_align()
#print('Correlation - ', corr_out)
print(args_rr)
print(args_meg)
print(args_ecg)
def load_artifacts(run_id: str = None) -> Dict:
......
......@@ -3,59 +3,48 @@ from pathlib import Path
import matplotlib.pyplot as plt
import mpld3
class Plotter(object):
def __init__(self, output_file: Path(), x_values, y_values, fig_size=(12, 5)):
self._x = x_values
self._y = y_values
self._output_file = output_file
self._fig_size = fig_size
plt.close()
def __call__(self):
pass
def make_html(self, fig):
# save figure as HTML file
html_str = mpld3.fig_to_html(fig)
output_html = self._output_file.rename(self._output_file.with_suffix(".html"))
HTML_file = open(output_html, "w", encoding="utf8")
HTML_file.write(html_str)
HTML_file.close()
return output_html
class PlotCardiacIC(Plotter):
def __init__(self, output_file, x_values, y_values, fig_size=(12, 5)):
super().__init__(output_file, x_values, y_values, fig_size)
def __call__(self):
plt.plot(self._x, self._y, linewidth=0.3)
plt.xlabel("Time (s)")
plt.ylabel("Cardiac Component")
plt.title("Selected Cardiac Component")
plt.savefig(Path(self._output_file), dpi=330)
#TODO- pull off the IC # and add to title
print('outfile - ', Path(self._output_file))
#output_html = super().make_html(plt.gcf())
return self._output_file
class PlotECG(Plotter):
def __init__(self, output_file, x_values, y_values, fig_size=(12, 5)):
super().__init__(output_file, x_values, y_values, fig_size)
def __call__(self):
plt.subplots(1, 1, figsize=(12, 5))
plt.plot(self._x, self._y, linewidth=0.2)
plt.xlabel("Time (s)")
plt.ylabel("ECG Signal")
plt.title("Scaled Signal")
plt.savefig(str(self._output_file), dpi=330)
output_html = super().make_html(plt.gcf())
return output_html
def make_html(fig):
# save figure as HTML file
html_str = mpld3.fig_to_html(fig)
output_html = self._output_file.rename(self._output_file.with_suffix(".html"))
HTML_file = open(output_html, "w", encoding="utf8")
HTML_file.write(html_str)
HTML_file.close()
def plot_signals_comp(meg_values, ecg_values, output_file):
fig, ax = plt.subplot(211)
print('Plotting Cardiac IC...')
fig.plot(meg_values.iloc[:,0],meg_values.iloc[:,1], linewidth=0.2)
ax.xlabel("Time (s)")
ax.ylabel("Cardiac Component")
ax.title("Selected Cardiac Component")
fig.savefig(Path(output_file), dpi=330)
# output_html = super().make_html(plt.gcf())
fig.subplot(212)
print('Plotting ECG...')
fig.plot(ecg_values.iloc[:,0],ecg_values.iloc[:,1], linewidth=0.2)
ax.xlabel("Time (s)")
ax.ylabel("ECG Signal")
ax.title("Scaled ECG Signal")
fig.savefig(str(output_file), dpi=330)
print("outfile - ", Path(output_file))
make_html(fig)
plt.close()
def plot_ecg(ecg_values, output_file):
print('Plotting ECG...')
plt.plot(ecg_values.iloc[:,0],ecg_values.iloc[:,1], linewidth=0.2)
plt.xlabel("Time (s)")
plt.ylabel("ECG Signal")
plt.title("ECG Signal")
plt.savefig(str(output_file), dpi=330)
print("outfile - ", Path(output_file))
make_html(plt.gcf())
plt.close()
\ No newline at end of file
import numpy as np
import pandas as pd
import tensorflow_addons as tfa
from tensorflow import keras
from config import config
......@@ -25,7 +24,7 @@ class label_ICA_components:
The outputs are saved by numpy in a text file, that is easliy human readable and can be loaded using
np.loadtxt('/path/to/ICA_component_lables.txt')
example usage:
python Label_ICA_Components.py --input_path example_data/ICA202DDisc \\
--output_dir example_data/ICA202DDisc --output_type list
......
from email.mime import base
from scipy import signal
from pathlib import Path
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from py_utils import baseline_wander_removal
import plotter
def preproc_ecg(file_path, fs, downsample_freq, output_folder):
ecg_data = pd.read_csv(file_path)
#Determing the time domain of the raw signal from the length of data and sampling freq
time_s = np.arange(0, len(ecg_data)/fs, 1/fs)
#spot checking
output_file = Path(output_folder, 'raw_data.png')
plotter.plot_ecg(ecg_data,output_file)
#Basline Drift Removal
baseline, ecg_out = baseline_wander_removal.bwr(ecg_data)
#spot check 2
output_file = Path(output_folder, 'bwr_data.png')
plotter.plot_ecg(ecg_out,output_file)
#Perfrom a standard scaling of the data (x-mu/sigma)
scaler = StandardScaler()
scaled_ecg = scaler.fit_transform(ecg_out)
#spot check 3
output_file = Path(output_folder, 'scaled_data.png')
plotter.plot_ecg(ecg_data,scaled_ecg)
#Need to downsample to match the MEG data
secs = len(scaled_ecg)/fs
sampls = secs*downsample_freq
print('int - ', int(sampls))
scaled_down_ecg = signal.resample(scaled_ecg, int(sampls), t = time_s)
#spot check 4
output_file = Path(output_folder, 'scaled_downsampled_data.png')
plotter.plot_ecg(ecg_data,scaled_down_ecg)
#Combining time data with signal data
print(scaled_down_ecg[0])
scaled_down_ecg_wtime = np.insert(scaled_down_ecg[0], 0, scaled_down_ecg[1], axis=1)
return scaled_down_ecg_wtime
\ No newline at end of file
py_utils @ a8c6aebd
Subproject commit b550045afcb48f6a72cbd7ea8cd28353ba0996ff
Subproject commit a8c6aebd5cd5e0ac79c58ee9d978bc7c64e093f9
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment