Skip to content
Snippets Groups Projects
Commit d2b23afa authored by ='s avatar =
Browse files

Moved megnet_utils to utils and otherwise made make happy

parent d06735d6
No related branches found
No related tags found
1 merge request!1Progress toward getting the evaluation integrated
......@@ -23,7 +23,7 @@ LOGS_DIR.mkdir(parents=True, exist_ok=True)
BLOB_STORE.mkdir(parents=True, exist_ok=True)
# MLFlow model registry
mlflow.set_tracking_uri("file://" + str(MODEL_REGISTRY.absolute()))
mlflow.set_tracking_uri("http://localhost:5000")
# Logger
logging_config = {
......@@ -68,9 +68,3 @@ logging_config = {
logging.config.dictConfig(logging_config)
logger = logging.getLogger()
logger.handlers[0] = RichHandler(markup=True)
# Assets
PROJECTS_URL = (
"https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/projects.json"
)
TAGS_URL = "https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/tags.json"
......@@ -2,7 +2,6 @@ import os
import Label_ICA_Components
import Get_RR_Component
import MNE_Processing
import multiprocessing
import pandas as pd
import traceback
......@@ -65,7 +64,7 @@ def process(scan_file):
# getRR = Get_RR_Intervals.GetRRIntervals(scan_file, heartifacts)
getRR.get_RR_intervals()
except Exception as err:
except Exception:
print(traceback.format_exc())
......
......@@ -3,15 +3,6 @@ import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import pandas as pd
import scipy.io
from sklearn import preprocessing
import os
import pyhrv.tools as tools
import pyhrv
import biosppy
from opensignalsreader import OpenSignalsReader
import math
from sklearn.metrics import mean_squared_error
class GetRRComponent:
......
......@@ -3,15 +3,6 @@ import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
import pandas as pd
import scipy.io
from sklearn import preprocessing
import os
import pyhrv.tools as tools
import pyhrv
import biosppy
from opensignalsreader import OpenSignalsReader
import math
from sklearn.metrics import mean_squared_error
class GetRRIntervals:
......@@ -58,7 +49,7 @@ class GetRRIntervals:
stats_ECG = f"{output_path}/{self.subject_id}_RR_Intervals_ECG.csv"
graph_component = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}.png"
graph_ecg = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png"
# graph_ecg = f"{output_path}/{self.subject_id}_RR_Intervals_{value_num}_ecg.png"
#############################################################
......@@ -80,7 +71,7 @@ class GetRRIntervals:
ecg_df = pd.read_csv(ecg_filepath).to_numpy()
ecg_time = ecg_df[:, 0]
# ecg_time = ecg_df[:, 0]
ecg_signal = ecg_df[:, 1]
if np.median(ecg_signal) > 0.00:
......
......@@ -7,7 +7,7 @@ It is set up to be ran from the command line.
Note: Tensroflow does take some time to load, thus running this independently for each subject is not the most computationally efficient.
To increase efficeny, I'd suggest imbedding this function into a pipeline that will load tensorflow and then run multiple subjects at once
Alternativley, fPredictChunkAndVoting (used in function below) can be applied to N spatial map and time series pairs.
Alternativley, fPredictChunkAndVoting (used in function below) can be applied to N spatial map and time series pairs.
Thus the fPredictICA could be easily modified to be appled to a complete list of ICA components and ran on many subjects.
The outputs are saved by numpy in a text file, that is easliy human readable and can be loaded using np.loadtxt('/path/to/ICA_component_lables.txt')
......@@ -38,9 +38,6 @@ class label_ICA_components:
from Megnet_Utilities import fPredictChunkAndVoting
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import scipy.io
from tensorflow import keras
import pandas as pd
......
import os
import mne
from time import time, localtime, strftime
import scipy.stats
from matplotlib import pyplot as plt
from skimage import io, color, transform, exposure, img_as_ubyte
from time import time, localtime, strftime
import cv2
import numpy as np
import os
from PIL import Image
from mne.preprocessing import ICA
import multiprocessing
......@@ -36,8 +28,8 @@ class MNE_Processor:
sfreq = raw.info["sfreq"]
mag_picks = mne.pick_types(raw.info, meg=True, eeg=False, misc=False)
eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True, misc=False)
# mag_picks = mne.pick_types(raw.info, meg=True, eeg=False, misc=False)
# eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True, misc=False)
misc_picks = mne.pick_types(raw.info, meg=False, eeg=False, misc=True)
all_picks = mne.pick_types(raw.info, meg=True, eeg=True, misc=True)
......@@ -65,7 +57,7 @@ class MNE_Processor:
elif method == "fastica":
fit_params = dict()
reject = dict(mag=5e-12, grad=4000e-13)
# reject = dict(mag=5e-12, grad=4000e-13)
ica = ICA(n_components=20, method="infomax", fit_params=fit_params, random_state=0)
......
import pandas as pd
idx = pd.IndexSlice
import numpy as np
def fGetStartTimesOverlap(intInputLen, intModelLen=15000, intOverlap=3750):
"""
model len is 60 seconds at 250Hz = 15000
overlap len is 15 seconds at 250Hz = 3750
"""
lStartTimes = []
intStartTime = 0
while intStartTime + intModelLen <= intInputLen:
lStartTimes.append(intStartTime)
intStartTime = intStartTime + intModelLen - intOverlap
return lStartTimes
def fPredictChunkAndVoting(
kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750
):
"""
This function is designed to take in ICA time series and a spatial map pair and produce a prediction useing a trained model.
The time series will be split into multiple chunks and the final prediction will be a weighted vote of each time chunk.
The weight for the voting will be determined by the manout of time and overlap each chunk has with one another.
For example if the total lenght of the scan is 50 seconds, and the chunks are 15 seconds long with a 5 second overlap:
The first chunk will be the only chunk to use the first 10 seconds, and one of two chunks to use the next 5 seconds.
Thus
:param kModel: The model that will be used for the predictions on each chunk. It should have two inputs the spatial map and time series respectivley
:type kModel: a keras model
:param lTimeSeries: The time series for each scan (can also be an array if all scans are the same lenght)
:type lTimeSeries: list or array (if each scan is a different length, then it needs to be a list)
:param arrSpatialMap: The spatial maps (one per scan)
:type arrSpatialMap: numpy array
:param intModelLen: The lenght of the time series in the model, defaults to 15000
:type intModelLen: int, optional
:param intOverlap: The lenght of the overlap between scans, defaults to 3750
:type intOverlap: int, optional
"""
# empty list to hold the prediction for each component pair
lPredictionsVote = []
lGTVote = []
lPredictionsChunk = []
lGTChunk = []
i = 0
for arrScanTimeSeries, arrScanSpatialMap, arrScanY in zip(lTimeSeries, arrSpatialMap, arrY):
intTimeSeriesLen = arrScanTimeSeries.shape[0]
lStartTimes = fGetStartTimesOverlap(
intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap
)
if lStartTimes[-1] + intModelLen <= intTimeSeriesLen:
lStartTimes.append(arrScanTimeSeries.shape[0] - intModelLen)
dctTimeChunkVotes = dict([[x, 0] for x in lStartTimes])
for intT in range(intTimeSeriesLen):
lChunkMatches = [x <= intT < x + intModelLen for x in dctTimeChunkVotes.keys()]
intInChunks = np.sum(lChunkMatches)
for intStartTime, bTruth in zip(dctTimeChunkVotes.keys(), lChunkMatches):
if bTruth:
dctTimeChunkVotes[intStartTime] += 1.0 / intInChunks
# predict
dctWeightedPredictions = {}
for intStartTime in dctTimeChunkVotes.keys():
lPrediction = kModel.predict(
[
np.expand_dims(arrScanSpatialMap, 0),
np.expand_dims(
np.expand_dims(
arrScanTimeSeries[intStartTime : intStartTime + intModelLen], 0
),
-1,
),
]
)
lPredictionsChunk.append(lPrediction)
lGTChunk.append(arrScanY)
dctWeightedPredictions[intStartTime] = lPrediction * dctTimeChunkVotes[intStartTime]
arrScanPrediction = np.stack(dctWeightedPredictions.values())
arrScanPrediction = arrScanPrediction.mean(axis=0)
arrScanPrediction = arrScanPrediction / arrScanPrediction.sum()
lPredictionsVote.append(arrScanPrediction)
lGTVote.append(arrScanY)
i += 1
return (
np.stack(lPredictionsVote),
np.stack(lGTVote),
np.stack(lPredictionsChunk),
np.stack(lGTChunk),
)
import json
import random
from typing import Dict
from urllib.request import urlopen
import pandas as pd
import numpy as np
idx = pd.IndexSlice
def fGetStartTimesOverlap(intInputLen, intModelLen=15000, intOverlap=3750):
"""
model len is 60 seconds at 250Hz = 15000
overlap len is 15 seconds at 250Hz = 3750
"""
lStartTimes = []
intStartTime = 0
while intStartTime + intModelLen <= intInputLen:
lStartTimes.append(intStartTime)
intStartTime = intStartTime + intModelLen - intOverlap
return lStartTimes
def fPredictChunkAndVoting(
kModel, lTimeSeries, arrSpatialMap, arrY, intModelLen=15000, intOverlap=3750
):
"""
This function is designed to take in ICA time series and a spatial map pair and produce a prediction useing a trained model.
The time series will be split into multiple chunks and the final prediction will be a weighted vote of each time chunk.
The weight for the voting will be determined by the manout of time and overlap each chunk has with one another.
For example if the total lenght of the scan is 50 seconds, and the chunks are 15 seconds long with a 5 second overlap:
The first chunk will be the only chunk to use the first 10 seconds, and one of two chunks to use the next 5 seconds.
Thus
:param kModel: The model that will be used for the predictions on each chunk. It should have two inputs the spatial map and time series respectivley
:type kModel: a keras model
:param lTimeSeries: The time series for each scan (can also be an array if all scans are the same lenght)
:type lTimeSeries: list or array (if each scan is a different length, then it needs to be a list)
:param arrSpatialMap: The spatial maps (one per scan)
:type arrSpatialMap: numpy array
:param intModelLen: The lenght of the time series in the model, defaults to 15000
:type intModelLen: int, optional
:param intOverlap: The lenght of the overlap between scans, defaults to 3750
:type intOverlap: int, optional
"""
# empty list to hold the prediction for each component pair
lPredictionsVote = []
lGTVote = []
lPredictionsChunk = []
lGTChunk = []
i = 0
for arrScanTimeSeries, arrScanSpatialMap, arrScanY in zip(lTimeSeries, arrSpatialMap, arrY):
intTimeSeriesLen = arrScanTimeSeries.shape[0]
lStartTimes = fGetStartTimesOverlap(
intTimeSeriesLen, intModelLen=intModelLen, intOverlap=intOverlap
)
if lStartTimes[-1] + intModelLen <= intTimeSeriesLen:
lStartTimes.append(arrScanTimeSeries.shape[0] - intModelLen)
dctTimeChunkVotes = dict([[x, 0] for x in lStartTimes])
for intT in range(intTimeSeriesLen):
lChunkMatches = [x <= intT < x + intModelLen for x in dctTimeChunkVotes.keys()]
intInChunks = np.sum(lChunkMatches)
for intStartTime, bTruth in zip(dctTimeChunkVotes.keys(), lChunkMatches):
if bTruth:
dctTimeChunkVotes[intStartTime] += 1.0 / intInChunks
# predict
dctWeightedPredictions = {}
for intStartTime in dctTimeChunkVotes.keys():
lPrediction = kModel.predict(
[
np.expand_dims(arrScanSpatialMap, 0),
np.expand_dims(
np.expand_dims(
arrScanTimeSeries[intStartTime : intStartTime + intModelLen], 0
),
-1,
),
]
)
lPredictionsChunk.append(lPrediction)
lGTChunk.append(arrScanY)
dctWeightedPredictions[intStartTime] = lPrediction * dctTimeChunkVotes[intStartTime]
arrScanPrediction = np.stack(dctWeightedPredictions.values())
arrScanPrediction = arrScanPrediction.mean(axis=0)
arrScanPrediction = arrScanPrediction / arrScanPrediction.sum()
lPredictionsVote.append(arrScanPrediction)
lGTVote.append(arrScanY)
i += 1
return (
np.stack(lPredictionsVote),
np.stack(lGTVote),
np.stack(lPredictionsChunk),
np.stack(lGTChunk),
)
......@@ -14,6 +14,5 @@ mkdocstrings==0.18.1
rich==12.5.1
black==22.6.0
flake8==5.0.2
isort==5.10.1
typer==0.6.1
jupyter
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment