import pandas as pd import numpy as np def gen_steps( timesteps=340, channels=6, p_steps=0.01, step_ampl_min=0.0002, step_ampl_max=0.05, channel_corr_matrix=None ): if channel_corr_matrix is None: channel_corr_matrix = np.ones((channels,channels)) start = np.random.binomial(n=1,p=p_steps, size=(timesteps,1)) ampl_params = np.random.random(size=(1,channels))*step_ampl_max + step_ampl_min sign_params = np.random.choice([-1,1], size=(timesteps, channels), replace=True) corr_params = channel_corr_matrix[np.random.randint(channels)] steps = np.cumsum(start * sign_params, axis=0) * ampl_params * corr_params return steps, np.nonzero(start)[0] def gen_trend( timesteps=340, channels=6, step_trend_min=5, step_trend_max=10, channel_corr_matrix=None ): trend_params = np.random.choice(np.arange(step_trend_min, max(step_trend_min + 1e-14,step_trend_max)), size=channels, replace=True) / timesteps if channel_corr_matrix is None: trend_params = trend_params * np.random.choice([1,-1], size=channels) else: trend_params = trend_params * channel_corr_matrix[np.random.randint(channel_corr_matrix.shape[0])] trend_params = trend_params[np.newaxis,:] trend = np.transpose(np.tile(np.linspace(0, timesteps, timesteps), reps=(channels,1))) * trend_params trend = np.concatenate([trend[:timesteps//2],trend[timesteps//2:0:-1]], axis=0) trend = np.roll(trend, shift=np.random.randint(trend.shape[0])) return trend def gen_noise( timesteps=340, channels=6, noise_mean=0, noise_std_min=0.00001, noise_std_max=0.00003, noise_std_stp=0.00001/50 ): noise_std = np.full(channels, fill_value=noise_std_min) amplification = np.random.choice([1,100], size=channels, p=[0.99,0.01]) noise_std = np.random.choice(np.arange(noise_std_min, noise_std_max + noise_std_stp, noise_std_stp)) * amplification noise_cov = np.eye(channels)*noise_std signal = np.random.multivariate_normal(np.repeat(noise_mean, channels), noise_cov, timesteps) return signal def gen_harmon_signal( timesteps=340, harm_ampl_min=0.02, harm_ampl_max=0.05, harm_ampl_step=0.005 ): harm_ampl = np.full(shape=(timesteps,1), fill_value=harm_ampl_min) harm_ampl_choices = np.arange(start=harm_ampl_min, stop=harm_ampl_max + harm_ampl_step, step=harm_ampl_step) for i in range(timesteps): harm_ampl[i] = np.random.choice(harm_ampl_choices) cos_arg = np.zeros((timesteps, 3)) cos_arg[:, 0] = np.linspace(0, 2 * np.pi, timesteps) cos_arg[:, 1] = np.linspace(0, 4 * np.pi, timesteps) if np.random.random() < 0.95: cos_arg[:, 2] = np.linspace(0, 8 * np.pi, timesteps) else: # print("Curvy signal appears!") cos_arg[:, 2] = np.linspace(0, 64 * np.pi, timesteps) cos_arg[:, 0] *= 0 cos_arg[:, 1] *= 0 cos_harms = np.cos(cos_arg) flip = 1 if np.random.random() < 0.5 else -1 cos_harms[:, 0] *= flip flip = 1 if np.random.random() < 0.5 else -1 cos_harms[:, 1] *= flip flip = 1 if np.random.random() < 0.5 else -1 cos_harms[:, 2] *= flip cos_add = cos_harms.sum(axis=1)[:, np.newaxis] cos_add *= harm_ampl return cos_add def gen_scan_motion_signal( timesteps, channels, scale=None, corr=None, noise_mean=0, noise_std_min=0.00001, noise_std_max=0.00002, noise_std_stp=0.00001/50, harm_ampl_min=0.005, harm_ampl_max=0.01, harm_ampl_step=0.0001, probability_steps=0.03, step_ampl_min=0.008, step_ampl_max=0.2, step_trend_min=np.random.random(), step_trend_max=np.random.random() ): """ Example of generated signal ``` import matplotlib.pyplot as plt from synthetic_dataset_utils import gen_scan_motion_signal new_signal_fmri_scan, new_signal_fmri_steps = gen_scan_motion_signal(340, 6) plt.plot(new_signal_fmri_scan) plt.vlines(new_signal_fmri_steps, ymin=new_signal_fmri_scan.min(), ymax=new_signal_fmri_scan.max(), color='black', linewidth=1) ``` """ scale = np.ones((1,channels)) if scale is None else scale signal = np.zeros([timesteps, channels]) signal += gen_noise( timesteps=timesteps, channels=channels, noise_mean=noise_mean, noise_std_min=noise_std_min, noise_std_max=noise_std_max, noise_std_stp=noise_std_stp ) signal += gen_harmon_signal( timesteps=timesteps, harm_ampl_min=harm_ampl_min, harm_ampl_max=harm_ampl_max, harm_ampl_step=harm_ampl_step ) step_signal, step_indexes = gen_steps( timesteps=timesteps, channels=channels, p_steps=probability_steps, step_ampl_min=step_ampl_min, step_ampl_max=step_ampl_max, channel_corr_matrix=corr ) signal += step_signal signal += gen_trend( timesteps=timesteps, channels=channels, step_trend_min=step_trend_min, step_trend_max=step_trend_max, channel_corr_matrix=None ) signal *= scale return signal, step_indexes def gen_Xy( sample_num, timesteps, channels, window_size, noise_mean=0, noise_std_min=0.0001, noise_std_max=0.002, noise_std_stp=0.00001, harm_ampl_min=0.005, harm_ampl_max=0.01, harm_ampl_step=0.0001, probability_steps=0.03, step_ampl_min=0.008, step_ampl_max=0.2, channel_corr_matrix=None, scale = None ): """ Использует gen_scan_motion_signal для генерации одной fMRI записи. В каждую запись внедрены аномалии сдвига. Сигнал нарезается на кусочки размером window_size и маркируются метками двух классов: норма, аномалия. Кусочки перемешиваются два раза - внутри каждого класса перед уравниванием количества примеров классов и внутри датасета. """ scale = np.ones((1,channels)) if scale is None else scale # ---------------------------- Generate fmri signals --------------------------------------------- signal_fmri_scans = [] signal_fmri_steps = [] for i in range(sample_num): signal, step_indexes = gen_scan_motion_signal( timesteps, channels, scale=scale, corr=None, noise_mean=noise_mean, noise_std_min=noise_std_min, noise_std_max=noise_std_max, noise_std_stp=noise_std_stp, harm_ampl_min=harm_ampl_min, harm_ampl_max=harm_ampl_max, harm_ampl_step=harm_ampl_step, probability_steps=probability_steps, step_ampl_min=step_ampl_min, step_ampl_max=step_ampl_max, step_trend_min=np.random.random(), step_trend_max=np.random.random()*0.5) signal_fmri_scans.append(signal) # indexes can be merged like in detect_shifts with window_merge(step_indexes, window_size=window_size) signal_fmri_steps.append(step_indexes) signal_fmri_scans = np.stack(signal_fmri_scans) # -------- Chop singals into short signals with sliding window and divide into two categories: normal and anomaly ----------- normal_indexes = [] anomaly_indexes = [] normal_windows = [] anomaly_windows = [] for scan_idx, (fmri_scan, anomaly_window_starts) in enumerate(zip(signal_fmri_scans, signal_fmri_steps)): for start in range(fmri_scan.shape[0]): window_values = fmri_scan[start:start+window_size,:] if window_values.shape[0] == window_size: anomaly = False for anomaly_window_start in anomaly_window_starts: if (start <= anomaly_window_start-2) and (anomaly_window_start+2 < start+window_size): anomaly = True break if anomaly: anomaly_windows.append(window_values) anomaly_indexes.append([scan_idx, start]) else: normal_windows.append(window_values) normal_indexes.append([scan_idx, start]) normal_windows = np.array(normal_windows) anomaly_windows = np.array(anomaly_windows) # ----- Prepare dataset and labels ------ # To get balanced dataset first shuffle across time and take only number equal to minimal presented class np.random.shuffle(normal_windows) np.random.shuffle(anomaly_windows) normal_windows = normal_windows[:min(len(normal_windows),len(anomaly_windows))] anomaly_windows = anomaly_windows[:min(len(normal_windows),len(anomaly_windows))] X = np.concatenate((normal_windows, anomaly_windows), axis=0) y = np.concatenate([np.repeat(0, normal_windows.shape[0]), np.repeat(1, anomaly_windows.shape[0])]) # Shuffle normal and anomaly examples shuffled_index = np.arange(X.shape[0]) np.random.shuffle(shuffled_index) X = X[shuffled_index] y = y[shuffled_index] break_point = int(X.shape[0]*0.8) X_train = X[:break_point] y_train = y[:break_point] X_val = X[break_point:] y_val = y[break_point:] return X_train, y_train, X_val, y_val