Skip to content
Snippets Groups Projects
Commit 3da7bd7d authored by ='s avatar =
Browse files

Updates per style guides

parent 9e3ead0d
No related branches found
No related tags found
1 merge request!1Progress toward getting the evaluation integrated
......@@ -9,63 +9,68 @@ import traceback
current_dir = os.path.dirname(os.path.realpath(__file__))
input_dir = "/home/ec2-user/MEGNET/input_data/"
output_dir = os.path.join(current_dir, "output")
output_dir = os.path.join(current_dir, "output")
# list of all ICA values and images for each scan in the input directory
scan_files = os.listdir(input_dir)
scan_files = [scan_file.split('.')[0] for scan_file in scan_files]
scan_files = [scan_file.split(".")[0] for scan_file in scan_files]
# remove potential duplicates
scan_files = list(set(scan_files))
def process(scan_file):
try:
try:
output_sub_dir = os.path.join(output_dir, scan_file)
full_input_path = os.path.join(input_dir, scan_file)
full_input_path = f"{full_input_path}.ds"
if not os.path.exists(output_sub_dir):
os.makedirs(output_sub_dir)
output_sub_dir = os.path.join(output_dir, scan_file)
full_csv_output_path = os.path.join(output_sub_dir, scan_file + "_HR_component.csv")
full_input_path = os.path.join(input_dir, scan_file)
full_input_path = f'{full_input_path}.ds'
print(f"Now processing: {scan_file}")
if not os.path.exists(output_sub_dir):
os.makedirs(output_sub_dir)
MNE_proc = MNE_Processing.MNE_Processor(
full_input_path, output_sub_dir, full_csv_output_path
)
MNE_proc.process()
full_csv_output_path = os.path.join(output_sub_dir, scan_file + '_HR_component.csv')
###############################################################################
print(f'Now processing: {scan_file}')
# feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
label_ICA = Label_ICA_Components.label_ICA_components(full_csv_output_path)
MNE_proc = MNE_Processing.MNE_Processor(full_input_path, output_sub_dir, full_csv_output_path)
MNE_proc.process()
# get best heartifact after running MEGNET on the MATLAB file above
heartifact = label_ICA.fPredictICA()
###############################################################################
# filter dataframe to only heartifact and Time index
indices_to_filter = [0, heartifact + 1]
# feed CSV component filepath to Label_ICA_Components.py so MEGNET knows what files to work with
label_ICA = Label_ICA_Components.label_ICA_components(full_csv_output_path)
df = pd.read_csv(full_csv_output_path)
df = df.iloc[:, indices_to_filter]
# get best heartifact after running MEGNET on the MATLAB file above
heartifact = label_ICA.fPredictICA()
# filter dataframe to only heartifact and Time index
indices_to_filter = [0, heartifact + 1]
df = pd.read_csv(full_csv_output_path)
df = df.iloc[:, indices_to_filter]
df.to_csv(full_csv_output_path, index=False)
df.to_csv(full_csv_output_path, index=False)
###############################################################################
###############################################################################
# feed .mat filepath and heartifact list to Get_RR_Intervals.py to get RR intervals for each heartifact
getRR = Get_RR_Component.GetRRComponent(
output_sub_dir, scan_file, full_csv_output_path, heartifact
)
# getRR = Get_RR_Intervals.GetRRIntervals(scan_file, heartifacts)
getRR.get_RR_intervals()
except Exception as err:
print(traceback.format_exc())
# feed .mat filepath and heartifact list to Get_RR_Intervals.py to get RR intervals for each heartifact
getRR = Get_RR_Component.GetRRComponent(output_sub_dir, scan_file, full_csv_output_path, heartifact)
# getRR = Get_RR_Intervals.GetRRIntervals(scan_file, heartifacts)
getRR.get_RR_intervals()
except Exception as err:
print(traceback.format_exc())
for scan_file in scan_files:
process(scan_file)
process(scan_file)
# pool = multiprocessing.Pool(128)
# pool.map(process, scan_files)
......@@ -13,7 +13,8 @@ from opensignalsreader import OpenSignalsReader
import math
from sklearn.metrics import mean_squared_error
class GetRRComponent():
class GetRRComponent:
def __init__(self, output_sub_dir, subject_id, subject_path, ICA_value):
self.subject_path = subject_path
self.subject_id = subject_id
......@@ -25,59 +26,58 @@ class GetRRComponent():
#############################################################
df = pd.DataFrame()
# get time and component from .CSV file
arr_Time_Series = pd.read_csv(self.subject_path).iloc[:, 0].T.to_numpy()
component_value = pd.read_csv(self.subject_path).iloc[:, 1].T.to_numpy()
df['Time'] = arr_Time_Series
df[f'ICA_Values_{self.ICA_value}'] = component_value
df["Time"] = arr_Time_Series
df[f"ICA_Values_{self.ICA_value}"] = component_value
hr_arr = df.to_numpy()
#############################################################
# Assign paths
stats_component = f'{self.output_sub_dir}/{self.subject_id}_HR_component.csv'
graph_component = f'{self.output_sub_dir}/{self.subject_id}_Plot_{self.ICA_value}.png'
stats_component = f"{self.output_sub_dir}/{self.subject_id}_HR_component.csv"
graph_component = f"{self.output_sub_dir}/{self.subject_id}_Plot_{self.ICA_value}.png"
#############################################################
component_time = hr_arr[:, 0]
ICA_component_value = hr_arr[:, 1]
# attempt to flip values if signal is inverted
if np.median(ICA_component_value) > 0.00:
if np.median(ICA_component_value) > 0.00:
ICA_component_value = -ICA_component_value
df[f'ICA_Values_{self.ICA_value}'] = ICA_component_value
df[f"ICA_Values_{self.ICA_value}"] = ICA_component_value
# SCIPY PEAK DETECTION
###################################################################################################
distance = 150
# filter height to peaks within the top 5% of largest values
num_to_filter_component = round(len(ICA_component_value) * 0.045)
height_component = ((sum(sorted(ICA_component_value)[-num_to_filter_component:])) / num_to_filter_component)
height_component = (
sum(sorted(ICA_component_value)[-num_to_filter_component:])
) / num_to_filter_component
# Load sample ECG signal & extract R-peaks using BioSppy
component_rpeaks, _ = signal.find_peaks(ICA_component_value, height=height_component, distance=distance, prominence=3.5)
component_rpeaks, _ = signal.find_peaks(
ICA_component_value, height=height_component, distance=distance, prominence=3.5
)
# PYHRV PEAK DETECTION
########################################################################################################
# t_component, filtered_signal, component_rpeaks = biosppy.signals.ecg.ecg(ICA_component_value, sampling_rate=500)[:3]
# Plots Component Peaks
# Plots Component Peaks
################################################################################################
length_of_component_time_series = np.amax(component_time)
to_divide = 1
......@@ -85,7 +85,9 @@ class GetRRComponent():
mini2 = 0
maxi2 = len(component_time) / to_divide
peaks_range = [idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2]
peaks_range = [
idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2
]
peaks1 = component_rpeaks[peaks_range]
......@@ -102,38 +104,40 @@ class GetRRComponent():
beat_cnt = len(peaks1)
avg_hr = round((beat_cnt / (length_of_component_time_series)) * 60, 2)
plt.title(f"{self.subject_id} RR Interval Component {self.ICA_value} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")
plt.title(
f"{self.subject_id} RR Interval Component {self.ICA_value} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}"
)
plt.savefig(graph_component, dpi=100)
# save figure as HTML file
html_str = mpld3.fig_to_html(fig)
Html_file = open(graph_component[:-4] + '.html', "w")
Html_file = open(graph_component[:-4] + ".html", "w")
Html_file.write(html_str)
Html_file.close()
# Writes Component RR Intervals to CSV
################################################################################################################
# peak_vals = component_time[component_rpeaks]
# rr_int = []
# size = len(peak_vals) - 1
# for i in range(1, size):
# print(peak_vals[i])
# delta = peak_vals[i] - peak_vals[i - 1]
# rr_int = np.append(rr_int, delta)
# df = pd.DataFrame()
# df['Time'] = peak_vals[2:]
# df = pd.DataFrame()
# df['Signal'] = ICA_component_value
# df = df.set_index('Signal')
# df = df.dropna()
df.to_csv(stats_component, index=False)
\ No newline at end of file
df.to_csv(stats_component, index=False)
......@@ -14,21 +14,23 @@ import math
from sklearn.metrics import mean_squared_error
class GetRRIntervals():
class GetRRIntervals:
def __init__(self, subject_id):
self.subject_id = subject_id
def get_RR_intervals(self):
self.subject_id = self.subject_id.split('.')[0]
self.subject_id = self.subject_id.split(".")[0]
#############################################################
output_path = f"/home/ec2-user/MEGNET/output/{self.subject_id}"
arr_Time_Series = pd.read_csv(f'{output_path}/{self.subject_id}.csv').iloc[:, 0].T.to_numpy()
value = pd.read_csv(f'{output_path}/{self.subject_id}.csv').iloc[:, 1]
arr_Time_Series = (
pd.read_csv(f"{output_path}/{self.subject_id}.csv").iloc[:, 0].T.to_numpy()
)
value = pd.read_csv(f"{output_path}/{self.subject_id}.csv").iloc[:, 1]
value_num = value.name
counter = 0
......@@ -39,11 +41,11 @@ class GetRRIntervals():
df_input = pd.DataFrame()
df_input['Time_Series'] = arr_Time_Series
df_input["Time_Series"] = arr_Time_Series
print(arr_Time_Series)
df_input[f'ICA_Value_{counter}'] = value
df_input[f"ICA_Value_{counter}"] = value
#############################################################
......@@ -52,11 +54,11 @@ class GetRRIntervals():
#############################################################
# Create toul variable for later use
stats_component = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}.csv'
stats_ECG = f'{output_path}/{self.subject_id}_RR_Intervals_ECG.csv'
graph_component = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}.png'
graph_ecg = f'{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png'
stats_component = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}.csv"
stats_ECG = f"{output_path}/{self.subject_id}_RR_Intervals_ECG.csv"
graph_component = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}.png"
graph_ecg = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png"
#############################################################
......@@ -64,92 +66,95 @@ class GetRRIntervals():
sig_one = hr_arr[:, 1]
#############################################################
if np.median(sig_one) > 0.00:
if np.median(sig_one) > 0.00:
sig_one = -sig_one
df_input[f'ICA_Value_{counter}'] = sig_one
df_input[f"ICA_Value_{counter}"] = sig_one
x = np.amax(time_s)
distance = 150
ecg_filepath = f"/home/ec2-user/MEGNET/techdev_ecg/{self.subject_id}.csv"
ecg_df = pd.read_csv(ecg_filepath).to_numpy()
ecg_time = ecg_df[:, 0]
ecg_signal = ecg_df[:, 1]
if np.median(ecg_signal) > 0.00:
if np.median(ecg_signal) > 0.00:
ecg_signal = -ecg_signal
ecg_signal = signal.resample(ecg_signal, 240000)
# SCIPY PEAK DETECTION
###################################################################################################
num_to_filter_component = round(len(sig_one) * 0.04)
height_component = ((sum(sorted(sig_one)[-num_to_filter_component:])) / num_to_filter_component)
height_component = (
sum(sorted(sig_one)[-num_to_filter_component:])
) / num_to_filter_component
# Load sample ECG signal & extract R-peaks using BioSppy
component_rpeaks, _ = signal.find_peaks(sig_one, height=height_component, distance=distance)
num_to_filter_ECG = round(len(ecg_signal) * 0.04)
height_ECG = ((sum(sorted(ecg_signal)[-num_to_filter_ECG:])) / num_to_filter_ECG)
height_ECG = (sum(sorted(ecg_signal)[-num_to_filter_ECG:])) / num_to_filter_ECG
# Load sample ECG signal & extract R-peaks using BioSppy
ecg_rpeaks, _ = signal.find_peaks(ecg_signal, height=height_ECG, distance=distance)
# component_nni = tools.nn_intervals(component_rpeaks)
# ecg_nni = tools.nn_intervals(ecg_rpeaks)
# ecg_series = pd.Series(ecg_rpeaks)
# component_series = pd.Series(component_rpeaks)
# component_rmssd = pyhrv.time_domain.rmssd(nni=component_nni, rpeaks=component_rpeaks)['rmssd']
# component_sdnn = pyhrv.time_domain.sdnn(nni=component_nni, rpeaks=component_rpeaks)['sdnn']
# component_nn50 = pyhrv.time_domain.nn50(nni=component_nni, rpeaks=component_rpeaks)['nn50']
# component_pnn50 = pyhrv.time_domain.nn50(nni=component_nni, rpeaks=component_rpeaks)['pnn50']
# ecg_rmssd = pyhrv.time_domain.rmssd(nni=ecg_nni, rpeaks=ecg_rpeaks)['rmssd']
# ecg_sdnn = pyhrv.time_domain.sdnn(nni=ecg_nni, rpeaks=ecg_rpeaks)['sdnn']
# ecg_nn50 = pyhrv.time_domain.nn50(nni=ecg_nni, rpeaks=ecg_rpeaks)['nn50']
# ecg_pnn50 = pyhrv.time_domain.nn50(nni=ecg_nni, rpeaks=ecg_rpeaks)['pnn50']
# df = pd.DataFrame()
# df['component_rmssd'] = [component_rmssd]
# df['ecg_rmssd'] = [ecg_rmssd]
# df['component_sdnn'] = [component_sdnn]
# df['ecg_sdnn'] = [ecg_sdnn]
# df['component_nn50'] = [component_nn50]
# df['ecg_nn50'] = [ecg_nn50]
# df['component_pnn50'] = [component_pnn50]
# df['ecg_pnn50'] = [ecg_pnn50]
# df['component_peaks_num'] = [len(component_rpeaks)]
# df['ECG_peaks_num'] = [len(ecg_rpeaks)]
# df['subject_id'] = [f'{self.subject_id}']
# df = df.set_index('subject_id')
# df.to_csv(f"/home/ec2-user/MEGNET/comparisons/{self.subject_id}_stats.csv")
################################################################################################
to_divide = 1
mini2 = 0
maxi2 = len(df_input['Time_Series']) / to_divide
maxi2 = len(df_input["Time_Series"]) / to_divide
peaks_range = [idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2]
peaks_range = [
idx for idx in range(len(component_rpeaks)) if mini2 < component_rpeaks[idx] < maxi2
]
peaks1 = component_rpeaks[peaks_range]
......@@ -166,18 +171,20 @@ class GetRRIntervals():
beat_cnt = len(peaks1)
avg_hr = round((beat_cnt / (x)) * 60, 2)
plt.title(f"{self.subject_id} RR Interval Component {value_num} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")
plt.title(
f"{self.subject_id} RR Interval Component {value_num} \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}"
)
plt.savefig(graph_component, dpi=100)
# save figure as HTML file
html_str = mpld3.fig_to_html(fig)
Html_file = open(graph_component[:-4] + '.html', "w")
Html_file = open(graph_component[:-4] + ".html", "w")
Html_file.write(html_str)
Html_file.close()
#############################################################
# to_divide = 1
# mini2 = 0
......@@ -196,12 +203,12 @@ class GetRRIntervals():
# fig = plt.gcf()
# fig.set_size_inches(12, 5)
# ecg_beats_length = np.amax(ecg_time)
# beat_cnt = len(peaks1_ecg)
# avg_hr = round((beat_cnt / ecg_beats_length) * 60, 2)
# avg_hr = round((beat_cnt / ecg_beats_length) * 60, 2)
# plt.title(f"{self.subject_id} RR Interval ECG \nNumber of Beats: {beat_cnt} || Average Heart Rate: {avg_hr}")
......@@ -212,49 +219,49 @@ class GetRRIntervals():
# Html_file = open(graph_ecg[:-4] + '.html', "w")
# Html_file.write(html_str)
# Html_file.close()
# Writes Component values to CSV
################################################################################################################
################################################################################################################
# peak_vals = component_time[component_rpeaks]
# rr_int = []
# size = len(peak_vals) - 1
# for i in range(1, size):
# delta = peak_vals[i] - peak_vals[i - 1]
# rr_int = np.append(rr_int, delta)
df = pd.DataFrame()
# df['Time'] = peak_vals[2:]
df['Signal'] = sig_one
df = df.set_index('Signal')
df["Signal"] = sig_one
df = df.set_index("Signal")
# df = df.dropna()
df.to_csv(stats_component)
# Writes ECG RR Intervals to CSV
###############################################################################################################
# peak_vals = ecg_time[ecg_rpeaks]
# rr_int = []
# size = len(peak_vals) - 1
# for i in range(1, size):
# delta = peak_vals[i] - peak_vals[i - 1]
# rr_int = np.append(rr_int, delta)
df = pd.DataFrame()
# df['Time'] = peak_vals[2:]
df['Signal'] = ecg_signal
df = df.set_index('Signal')
df["Signal"] = ecg_signal
df = df.set_index("Signal")
# df = df.dropna()
df.to_csv(stats_ECG)
\ No newline at end of file
df.to_csv(stats_ECG)
......@@ -17,23 +17,23 @@ python Label_ICA_Components.py --input_path example_data/HCP/100307/@rawc_rfDC_8
"""
class label_ICA_components():
class label_ICA_components:
def __init__(self, subject_path):
self.subject_path = subject_path
def load_image(self, infilename ) :
def load_image(self, infilename):
from PIL import Image
import numpy as np
img = Image.open( infilename )
img = Image.open(infilename)
img.load()
data = np.asarray( img, dtype="int32" )
data = np.asarray(img, dtype="int32")
return data
def fPredictICA(self):
import os
# os.environ['OMP_NUM_THREADS'] = '1'
from Megnet_Utilities import fPredictChunkAndVoting
......@@ -46,54 +46,66 @@ class label_ICA_components():
heartifacts = []
strOutputType = 'list'
strOutputType = "list"
# loading the data is from our Brainstorm Pipeline, it may require some minor edits based on how the data is saved.
# load the time seris and the spatial map
#loading the data is from our Brainstorm Pipeline, it may require some minor edits based on how the data is saved.
#load the time seris and the spatial map
arrTimeSeries = pd.read_csv(self.subject_path).iloc[:, 1:]
arrTimeSeries = arrTimeSeries.T.to_numpy()
arrSpatialMap = np.zeros((20, 120, 120, 3), dtype=np.uint8)
#ensure the data is compatable
# ensure the data is compatable
try:
assert arrTimeSeries.shape[0] == arrSpatialMap.shape[0] #the number of time series should be the same as the number of spatial maps
assert arrSpatialMap.shape[1:] == (120,120,3) #the spatial maps should have a shape of [N,120,120,3]
assert arrTimeSeries.shape[1] >= 15000 #the time series need to be at least 60secs with a sample rate of 250hz (60*250=15000)
assert (
arrTimeSeries.shape[0] == arrSpatialMap.shape[0]
) # the number of time series should be the same as the number of spatial maps
assert arrSpatialMap.shape[1:] == (
120,
120,
3,
) # the spatial maps should have a shape of [N,120,120,3]
assert (
arrTimeSeries.shape[1] >= 15000
) # the time series need to be at least 60secs with a sample rate of 250hz (60*250=15000)
except AssertionError:
raise ValueError('The data does not have the correct dimensions')
raise ValueError("The data does not have the correct dimensions")
current_dir = os.path.dirname(os.path.realpath(__file__))
#load the model
# load the model
kModel = keras.models.load_model(f"{current_dir}/MEGnet_final_model.h5")
#use the vote chunk prediction function to make a prediction on each input
output = fPredictChunkAndVoting(kModel,
arrTimeSeries,
arrSpatialMap,
np.zeros((20,3)), #the code expects the Y values as it was used for performance, just put in zeros as a place holder.
15000,
3750)
# use the vote chunk prediction function to make a prediction on each input
output = fPredictChunkAndVoting(
kModel,
arrTimeSeries,
arrSpatialMap,
np.zeros(
(20, 3)
), # the code expects the Y values as it was used for performance, just put in zeros as a place holder.
15000,
3750,
)
arrPredictionsVote, arrGTVote, arrPredictionsChunk, arrGTChunk = output
#format the predictions
if strOutputType.lower() == 'array':
to_return = arrPredictionsVote[:,0,:]
# format the predictions
if strOutputType.lower() == "array":
to_return = arrPredictionsVote[:, 0, :]
else:
to_return = arrPredictionsVote[:,0,:].argmax(axis=1)
to_return = arrPredictionsVote[:, 0, :].argmax(axis=1)
for x in range(len(to_return)):
confidence = list(arrPredictionsVote[x][0])
heartrate_confidence = confidence[2]
# append all HR artifacts with confidence above 0 to list
# append all HR artifacts with confidence above 0 to list
if to_return[x] == 2 and heartrate_confidence > 0:
print("Heartifact:", x, heartrate_confidence)
heartifacts.append((x, heartrate_confidence))
# return highest confidence heartrate artifact
index, confidence = max(heartifacts, key=lambda x: x[1])
......
......@@ -13,24 +13,28 @@ from mne.preprocessing import ICA
import multiprocessing
class MNE_Processor():
class MNE_Processor:
def __init__(self, scan_path, output_dir, csv_output_path):
self.scan_path = scan_path
self.output_dir = output_dir
self.csv_output_path = csv_output_path
def process(self):
# process with every available thread
os.environ['OMP_NUM_THREADS'] = str(multiprocessing.cpu_count())
os.environ["OMP_NUM_THREADS"] = str(multiprocessing.cpu_count())
# os.environ['OMP_NUM_THREADS'] = '1'
mne.set_log_level('ERROR')
raw = mne.io.read_raw_ctf(self.scan_path, verbose='warning', preload=True, system_clock='truncate', clean_names=False)
mne.set_log_level("ERROR")
raw = mne.io.read_raw_ctf(
self.scan_path,
verbose="warning",
preload=True,
system_clock="truncate",
clean_names=False,
)
sfreq = raw.info['sfreq']
sfreq = raw.info["sfreq"]
mag_picks = mne.pick_types(raw.info, meg=True, eeg=False, misc=False)
eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True, misc=False)
......@@ -38,26 +42,32 @@ class MNE_Processor():
all_picks = mne.pick_types(raw.info, meg=True, eeg=True, misc=True)
freq_to_filter = 60 # remove 60 Hz sig from power lines
notch_filtered = mne.io.Raw.notch_filter(raw, np.arange(freq_to_filter, sfreq / 2., freq_to_filter), \
picks=all_picks, phase='zero')
notch_filtered = mne.io.Raw.notch_filter(
raw,
np.arange(freq_to_filter, sfreq / 2.0, freq_to_filter),
picks=all_picks,
phase="zero",
)
lower_freq_cutoff = 0.5
upper_freq_cutoff = 100
bandpass_filtered = mne.io.Raw.filter(notch_filtered, lower_freq_cutoff, upper_freq_cutoff, picks=all_picks)
bandpass_filtered = mne.io.Raw.filter(
notch_filtered, lower_freq_cutoff, upper_freq_cutoff, picks=all_picks
)
bandpass_filtered_resampled = bandpass_filtered.resample(500)
method = 'infomax'
method = "infomax"
if method == 'infomax':
if method == "infomax":
fit_params = dict(extended=True)
elif method == 'fastica':
elif method == "fastica":
fit_params = dict()
reject = dict(mag=5e-12, grad=4000e-13)
ica = ICA(n_components=20, method='infomax', fit_params=fit_params, random_state=0)
ica = ICA(n_components=20, method="infomax", fit_params=fit_params, random_state=0)
ica.fit(bandpass_filtered_resampled, picks="data")
......@@ -66,17 +76,27 @@ class MNE_Processor():
ica_sources = ica.get_sources(bandpass_filtered_resampled)
data, times = bandpass_filtered_resampled[misc_picks[:1]]
time = ica_sources[ecg_index[0]][1].reshape(len(times), )
time = ica_sources[ecg_index[0]][1].reshape(
len(times),
)
sigi = []
try:
for i in ecg_index:
sigi.append(ica_sources[i][0].reshape(len(times), ))
sigi.append(
ica_sources[i][0].reshape(
len(times),
)
)
except Exception as err:
print(err)
appended_output = np.c_[time, np.array(sigi).T]
np.savetxt(self.csv_output_path, appended_output, fmt='%1.4e',
delimiter=',', \
header='Time, IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10, IC11, IC12, IC13, IC14, IC15, IC16, IC17, IC18, IC19, IC20')
np.savetxt(
self.csv_output_path,
appended_output,
fmt="%1.4e",
delimiter=",",
header="Time, IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10, IC11, IC12, IC13, IC14, IC15, IC16, IC17, IC18, IC19, IC20",
)
import pandas as pd
idx = pd.IndexSlice
import numpy as np
......@@ -10,13 +11,15 @@ def fGetStartTimesOverlap(intInputLen, intModelLen=15000, intOverlap=3750):
"""
lStartTimes = []
intStartTime = 0
while intStartTime+intModelLen<=intInputLen:
while intStartTime + intModelLen <= intInputLen:
lStartTimes.append(intStartTime)
intStartTime = intStartTime+intModelLen-intOverlap
intStartTime = intStartTime + intModelLen - intOverlap
return lStartTimes
def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750):
def fPredictChunkAndVoting(
kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750
):
"""
This function is designed to take in ICA time series and a spatial map pair and produce a prediction useing a trained model.
The time series will be split into multiple chunks and the final prediction will be a weighted vote of each time chunk.
......@@ -46,7 +49,9 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen
for arrScanTimeSeries, arrScanSpatialMap, arrScanY in zip(lTimeSeries, arrSpatialMap, arrY):
intTimeSeriesLen = arrScanTimeSeries.shape[0]
lStartTimes = fGetStartTimesOverlap(intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap)
lStartTimes = fGetStartTimesOverlap(
intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap
)
if lStartTimes[-1] + intModelLen <= intTimeSeriesLen:
lStartTimes.append(arrScanTimeSeries.shape[0] - intModelLen)
......@@ -62,10 +67,17 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen
# predict
dctWeightedPredictions = {}
for intStartTime in dctTimeChunkVotes.keys():
lPrediction = kModel.predict([np.expand_dims(arrScanSpatialMap, 0),
np.expand_dims(
np.expand_dims(arrScanTimeSeries[intStartTime:intStartTime + intModelLen],
0), -1)])
lPrediction = kModel.predict(
[
np.expand_dims(arrScanSpatialMap, 0),
np.expand_dims(
np.expand_dims(
arrScanTimeSeries[intStartTime : intStartTime + intModelLen], 0
),
-1,
),
]
)
lPredictionsChunk.append(lPrediction)
lGTChunk.append(arrScanY)
......@@ -79,4 +91,9 @@ def fPredictChunkAndVoting(kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen
i += 1
return np.stack(lPredictionsVote), np.stack(lGTVote), np.stack(lPredictionsChunk), np.stack(lGTChunk)
return (
np.stack(lPredictionsVote),
np.stack(lGTVote),
np.stack(lPredictionsChunk),
np.stack(lGTChunk),
)
......@@ -86,4 +86,4 @@ def get_metrics(
slices = PandasSFApplier([nlp_cnn, short_text]).apply(df)
metrics["slices"] = get_slice_metrics(y_true=y_true, y_pred=y_pred, slices=slices)
return metrics
\ No newline at end of file
return metrics
......@@ -207,4 +207,4 @@ def predict_tag(text: str = "", run_id: str = None) -> None:
if __name__ == "__main__":
app() # pragma: no cover, live app
\ No newline at end of file
app() # pragma: no cover, live app
......@@ -43,4 +43,4 @@ def predict(texts: List, artifacts: Dict) -> List:
}
for i in range(len(tags))
]
return predictions
\ No newline at end of file
return predictions
......@@ -137,4 +137,4 @@ def objective(args: Namespace, df: pd.DataFrame, trial: optuna.trial._trial.Tria
trial.set_user_attr("recall", overall_performance["recall"])
trial.set_user_attr("f1", overall_performance["f1"])
return overall_performance["f1"]
\ No newline at end of file
return overall_performance["f1"]
......@@ -55,4 +55,4 @@ def set_seeds(seed: int = 42) -> None:
"""
# Set seeds
np.random.seed(seed)
random.seed(seed)
\ No newline at end of file
random.seed(seed)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment