master
protsenkovi 2 years ago
commit 7b6ff83699

3
.dvc/.gitignore vendored

@ -0,0 +1,3 @@
/config.local
/tmp
/cache

@ -0,0 +1,7 @@
[core]
remote = ssau
['remote "ssau"']
url = webdavs://storage.ai.ssau.ru/public.php/webdav/
user = jJ7sCC35KnZEPbf
password =
verify = true

@ -0,0 +1,8 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
.ipynb_checkpoints
*.pyc
*.ipynb
!OpenEmotionDistanceBenchmark.ipynb
!bechmark_results

9
.gitignore vendored

@ -0,0 +1,9 @@
*.npy
*.mat
*.pth
*.h5
*.mp4
.ipynb_checkpoints
*.pyc
!results

@ -0,0 +1,50 @@
#FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel
#FROM tensorflow/tensorflow:nightly-gpu
#FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG USER
ARG GROUP
ARG UID
ARG GID
RUN groupadd --gid ${GID} ${GROUP}
RUN useradd --shell /bin/bash --uid ${UID} --gid ${GID} --create-home ${USER}
RUN mkdir /wd
RUN chown ${USER}:${GROUP} /wd
# SYSTEM INITIALIZATION
RUN apt update; apt install python3 python3-pip -y
#RUN ln -s /usr/bin/pip3 /usr/bin/pip
RUN pip install --upgrade pip
RUN pip install jupyterlab
RUN pip install tensorflow==2.12.0
RUN pip install pandas
RUN pip install scikit-learn
RUN pip install matplotlib
RUN pip install keras
RUN apt install software-properties-common wget curl git -y
ENV OS=ubuntu22.04
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin; \
mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600; \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/3bf863cc.pub; \
add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/ /"
ENV cuda_version=cuda11.8
ENV cudnn_version=8.8.1.*
RUN apt-get update; \
apt-get install libcudnn8=${cudnn_version}-1+${cuda_version} -y; \
apt-get install libcudnn8-dev=${cudnn_version}-1+${cuda_version} -y
RUN pip install dvc[webdav]
USER ${USER}
# USER INITIALIZATION
# RUN ...
SHELL ["/bin/bash", "--login", "-i", "-c"]
ENV SHELL=bash
WORKDIR /wd

3
assets/.gitignore vendored

@ -0,0 +1,3 @@
*
!.gitignore
!*.dvc

@ -0,0 +1,4 @@
outs:
- md5: 7af123ad0aff29cd9190416faebc3ac4
size: 101428175
path: All_data_10.csv

@ -0,0 +1,4 @@
outs:
- md5: f91ec886ce5529eb03f2d03c41b61494
size: 225432530
path: All_data_15.csv

@ -0,0 +1,4 @@
outs:
- md5: ffa29427fcc6958fbb3e03b431d7a69c
size: 550562598
path: All_data_24.csv

@ -0,0 +1,4 @@
outs:
- md5: 9105d6fe9f4afc5188d9832ef966902d
size: 3002048
path: X_test_10.npy

@ -0,0 +1,4 @@
outs:
- md5: e4eb94e48930f16a119f3bedeebac91e
size: 6789728
path: X_test_15.npy

@ -0,0 +1,4 @@
outs:
- md5: 61bf48b13dcc89ee34d080f62f6670f6
size: 12734336
path: X_test_24.npy

@ -0,0 +1,4 @@
outs:
- md5: d11c64ea9fc0dabee322ef0951035a1b
size: 578048
path: X_test_5.npy

@ -0,0 +1,4 @@
outs:
- md5: d99bfc149617ec7c17b7730515f76ed7
size: 21344768
path: X_train_10.npy

@ -0,0 +1,4 @@
outs:
- md5: 2ef0dc0aa5a30d555dc5d47c642b4f61
size: 53781248
path: X_train_15.npy

@ -0,0 +1,4 @@
outs:
- md5: fe51d511258bbf2add7ebe1d7fefa66b
size: 134323328
path: X_train_24.npy

@ -0,0 +1,4 @@
outs:
- md5: fb3f9fd488fef79ad926cca08358671f
size: 5336768
path: X_val_10.npy

@ -0,0 +1,4 @@
outs:
- md5: cc3960d22e0a0c551438612f58f2b440
size: 13445408
path: X_val_15.npy

@ -0,0 +1,4 @@
outs:
- md5: c41976bc7c490cc8dee8abd33f460f25
size: 33580928
path: X_val_24.npy

@ -0,0 +1,5 @@
outs:
- md5: 3f66dbbdd8295baedbcbdc2d89783972.dir
size: 1084009
nfiles: 48
path: new_real_data

@ -0,0 +1,4 @@
outs:
- md5: f16ffbef96dc0690262191eb2ff69d0e
size: 3855640
path: new_real_dataset_10.csv

@ -0,0 +1,4 @@
outs:
- md5: 6c5ce353fa9ed13da1a7ce73dfa5488d
size: 6962258
path: new_real_dataset_15.csv

@ -0,0 +1,4 @@
outs:
- md5: c3d997f7481885976f64463432e7d779
size: 10114327
path: new_real_dataset_24.csv

@ -0,0 +1,5 @@
outs:
- md5: a64327d23e339d46f75f235eee366160.dir
size: 870864
nfiles: 30
path: real_data

@ -0,0 +1,4 @@
outs:
- md5: 62ec44432c0e215b01241b36a32ff28a
size: 2593252
path: real_dataset_10.csv

@ -0,0 +1,4 @@
outs:
- md5: 319028b5f20f5307f545d0e7ad388384
size: 5323094
path: real_dataset_15.csv

@ -0,0 +1,4 @@
outs:
- md5: aa9d6a4d0120c2de8a2c6a7eb91f157a
size: 10279685
path: real_dataset_24.csv

@ -0,0 +1,4 @@
outs:
- md5: ac18eb1765604638a78f03920b3f66d1
size: 746718
path: real_mc_data.mat

@ -0,0 +1,5 @@
outs:
- md5: b5ae7af091995a4ba0994f9a026bd5a4.dir
size: 2743
nfiles: 39
path: relabeled

@ -0,0 +1,5 @@
outs:
- md5: db6b2d2517705f008209093fbfd876ec.dir
size: 1945723
nfiles: 39
path: reprocessed

@ -0,0 +1,4 @@
outs:
- md5: 23330b148e272e1d4d5a6edae9a96618
size: 50160
path: y_test_10.npy

@ -0,0 +1,4 @@
outs:
- md5: 399486653e302ca15cf993fd9584ed27
size: 75568
path: y_test_15.npy

@ -0,0 +1,4 @@
outs:
- md5: 0d3ec4a2350c3f1e7f1dbc568d04c406
size: 88560
path: y_test_24.npy

@ -0,0 +1,4 @@
outs:
- md5: 0f114f9d3cf0cf2b103df06201ebf7df
size: 19392
path: y_test_5.npy

@ -0,0 +1,4 @@
outs:
- md5: 002ff83dde477599a4eae963820ab602
size: 355872
path: y_train_10.npy

@ -0,0 +1,4 @@
outs:
- md5: c5c1ce0d1b2cb690b2efc3116567b988
size: 597696
path: y_train_15.npy

@ -0,0 +1,4 @@
outs:
- md5: 49adfa5be3edb676e86d9cc17fa21460
size: 932928
path: y_train_24.npy

@ -0,0 +1,4 @@
outs:
- md5: 11bf2908107644d389e1d8eb121c339a
size: 89072
path: y_val_10.npy

@ -0,0 +1,4 @@
outs:
- md5: bfacb5d89c1ded48c6ee890f057c9b05
size: 149520
path: y_val_15.npy

@ -0,0 +1,4 @@
outs:
- md5: a4238c73582b4ab1c9953be8ee108dac
size: 233328
path: y_val_24.npy

@ -0,0 +1,8 @@
#!/bin/bash
CURDIRNAME=${PWD##*/}
docker build . -t ${USER}_${CURDIRNAME} \
--build-arg USER=${USER} \
--build-arg GROUP=${USER} \
--build-arg UID=$(id -u ${USER}) \
--build-arg GID=$(id -g ${USER})

@ -0,0 +1,4 @@
#!/bin/bash
CURDIRNAME=${PWD##*/}
docker run -it --rm -v $(pwd):/wd ${USER}_${CURDIRNAME} bash -l -c "dvc pull"

@ -0,0 +1,5 @@
#!/bin/bash
CURDIRNAME=${PWD##*/}
docker run -d --gpus "device=0" -p 8888:8888 -v $(pwd):/wd -v /home/${USER}/:/home/${USER}/ --name ${USER}_${CURDIRNAME}_jupyter ${USER}_${CURDIRNAME} \
jupyter lab --ip 0.0.0.0

@ -0,0 +1,14 @@
```
git clone https://git.ai.ssau.ru/liav/docker_template myproject
cd myproject
```
Модифицируйте `Dockerfile` для выбора базового образа из https://hub.docker.com/ и установки зависимостей. Отредактируйте .dockerfile, если хотите скопировать файлы в образ командой COPY (по-умолчанию файлы не отправляются в контекст для ускорения сборки).
```
./build.sh
./start_daemon.sh или ./start_interactively.sh
```
-----------------------------------
При использовании этой схемы будет получен контейнер с пользователем, идентичным пользователю в `host` системе. Новые файлы (логи/модели etm.) и процессы в диспетчере процессов (top/htop) хоста будут принадлежать вашему пользователю.
Параметр `-v $(pwd):/wd ` в `start` скриптах означает, что директория `/wd` в контейнере будет связана с текущей папкой на хосте.

@ -0,0 +1,5 @@
*
!.gitignore
!*.dvc
!open_emotion_distance
!*.csv

@ -0,0 +1,2 @@
harm_ampl_max,harm_ampl_min,noise_std_max,noise_std_min,step_ampl_max,step_ampl_min,best_acc,total_trials
0.09335516712568537,0.002075276740510396,0.06500162177977852,9.010488980514873e-05,0.21633192557128092,6.102973711283474e-05,0.6955548448992644,12000
1 harm_ampl_max harm_ampl_min noise_std_max noise_std_min step_ampl_max step_ampl_min best_acc total_trials
2 0.09335516712568537 0.002075276740510396 0.06500162177977852 9.010488980514873e-05 0.21633192557128092 6.102973711283474e-05 0.6955548448992644 12000

@ -0,0 +1,2 @@
harm_ampl_max,harm_ampl_min,noise_std_max,noise_std_min,step_ampl_max,step_ampl_min,best_acc,total_trials
0.05141408658277873,0.004216894373183093,0.04576914309029287,4.005219535133996e-05,0.2241382196779908,8.371016275499698e-05,0.7329734219269103,12000
1 harm_ampl_max harm_ampl_min noise_std_max noise_std_min step_ampl_max step_ampl_min best_acc total_trials
2 0.05141408658277873 0.004216894373183093 0.04576914309029287 4.005219535133996e-05 0.2241382196779908 8.371016275499698e-05 0.7329734219269103 12000

@ -0,0 +1,4 @@
outs:
- md5: 18e1a696158507a944451aba09f8984b
size: 463632
path: gcaec_10.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/gcaec_10.h5,1.0,1.0,0.9793128260478503,0.9788057500921489,0.627758234729773,0.4294117647058823
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/gcaec_10.h5 1.0 1.0 0.9793128260478503 0.9788057500921489 0.627758234729773 0.4294117647058823

@ -0,0 +1,4 @@
outs:
- md5: 26c1ca3e2780970018fe9baa74db5a74
size: 467216
path: gcaec_15.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/gcaec_15.h5,0.9999866124022705,0.9999865806036045,0.9807754096604905,0.9808829011129453,0.6238600212089077,0.4527079154451474
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/gcaec_15.h5 0.9999866124022705 0.9999865806036045 0.9807754096604905 0.9808829011129453 0.6238600212089077 0.4527079154451474

@ -0,0 +1,4 @@
outs:
- md5: fc9d1a6104cba6fe35c0f834ed8eed02
size: 476432
path: gcaec_24.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/gcaec_24.h5,1.0,1.0,0.9884734133790738,0.9885464957731115,0.6197756468246789,0.45969919012726573
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/gcaec_24.h5 1.0 1.0 0.9884734133790738 0.9885464957731115 0.6197756468246789 0.45969919012726573

@ -0,0 +1,4 @@
outs:
- md5: bf15f089cb40d35831a8a28049f64226
size: 13942784
path: hyperparameter_study_10.db

@ -0,0 +1,4 @@
outs:
- md5: 4c3509f5e54afe5629956d51ed080ecc
size: 749568
path: hyperparameter_study_15.db

@ -0,0 +1,4 @@
outs:
- md5: 59b9f9e9bdd8cfc9d6a096a7cc209086
size: 13893632
path: hyperparameter_study_5.db

@ -0,0 +1,4 @@
outs:
- md5: 36f0060d9f56ee2322b51291e35b6891
size: 1852576
path: simple_10.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_10.h5,1.0,1.0,0.98246087425796,0.9820755584152955,0.6389510713143588,0.45406189555125726
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_10.h5 1.0 1.0 0.98246087425796 0.9820755584152955 0.6389510713143588 0.45406189555125726

@ -0,0 +1,4 @@
outs:
- md5: ffa13e4e4112f6af8ff4cb78f8f8823e
size: 482649
path: simple_10.pth

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_10.pth,0.9999550238373662,0.9999552552686921,0.9131138694009714,0.9113924050632911,0.6579788935081547,0.6241433842909858
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_10.pth 0.9999550238373662 0.9999552552686921 0.9131138694009714 0.9113924050632911 0.6579788935081547 0.6241433842909858

@ -0,0 +1,4 @@
outs:
- md5: 8c67087c6372ab7c4a45a861b8b677a6
size: 2835616
path: simple_15.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_15.h5,1.0,1.0,0.9847381385884117,0.9849022620119723,0.636373276776246,0.47720689129440463
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_15.h5 1.0 1.0 0.9847381385884117 0.9849022620119723 0.636373276776246 0.47720689129440463

@ -0,0 +1,4 @@
outs:
- md5: f16baba118a7936705d8e62216cf13ee
size: 490329
path: simple_15.pth

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_15.pth,1.0,1.0,0.9293134839884332,0.9300402798388806,0.6504772004241781,0.6158508158508158
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_15.pth 1.0 1.0 0.9293134839884332 0.9300402798388806 0.6504772004241781 0.6158508158508158

@ -0,0 +1,4 @@
outs:
- md5: 3234dfb259f27ddbbf279c032114f311
size: 4605088
path: simple_24.h5

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_24.h5,1.0,1.0,0.9827444253859349,0.9828520778645212,0.6468246788492853,0.5168316831683168
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_24.h5 1.0 1.0 0.9827444253859349 0.9828520778645212 0.6468246788492853 0.5168316831683168

@ -0,0 +1,4 @@
outs:
- md5: f7a87cc10bdcbd051ded6dc88eb5b113
size: 504153
path: simple_24.pth

@ -0,0 +1,2 @@
model,acc_train,f1_train,acc_val,f1_val,acc_test,f1_test
./saved_models/simple_24.pth,1.0,1.0,0.9535506003430532,0.9538073144104804,0.6691695313913516,0.6621709006928407
1 model acc_train f1_train acc_val f1_val acc_test f1_test
2 ./saved_models/simple_24.pth 1.0 1.0 0.9535506003430532 0.9538073144104804 0.6691695313913516 0.6621709006928407

@ -0,0 +1,29 @@
import pandas as pd
import numpy as np
def interpolate_between_windows(fmri_scan, anomaly_starts, window_radius):
"""
Все значения сигнала вне временного окна window_radius вокруг аномалий стираются.
Значения в промежутках между окнами заменяются значениями вычисленными линейной интерполяцией.
fmri_scan - многоканальный сигнал формы (ось времени, ось каналов)
anomaly_starts - массив моментов времени появления аномалий
window_radius - радиус окна с центром в момент аномалии
"""
intervals_between = [1] + list(np.ndarray.flatten(np.array(list(zip(anomaly_starts-(window_radius), anomaly_starts+(window_radius)))))) + [fmri_scan.shape[0]-1]
intervals_between = np.array(intervals_between)
intervals_between[intervals_between<1] = 1
last_max = 1
for i in range(intervals_between.shape[0]):
intervals_between[i] = max(last_max, intervals_between[i])
last_max = intervals_between[i]
intervals_between = intervals_between.reshape(len(intervals_between)//2,2)
fmri_scan_new = []
for signal in np.transpose(fmri_scan.copy()):
ts = pd.Series(signal)
for x1,x2 in intervals_between:
ts[x1:x2] = None
ts = ts.interpolate()
fmri_scan_new.append(ts)
fmri_scan = np.transpose(np.array(fmri_scan_new))
return fmri_scan

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,482 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b8e97ab8-4f81-4e8b-9b71-7076a11e9280",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/vlpr/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from scipy.io import loadmat\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import cm\n",
"import seaborn as sns\n",
"plt.rcParams['figure.figsize'] = [9, 5]\n",
"from cycler import cycler\n",
"from IPython.display import display, clear_output\n",
"import optuna\n",
"import time \n",
"import ray\n",
"import torch\n",
"\n",
"# import joblib\n",
"# from ray.util.joblib import register_ray\n",
"# register_ray()"
]
},
{
"cell_type": "markdown",
"id": "95472450-29ba-4f4a-8641-a623b6d6d76f",
"metadata": {},
"source": [
"В этой книге подбираются гиперпараметры синтетической модели движения головы в фмр томографе с помощью библиотеки optuna. \n",
"В качестве базовой модели взят классификатор из 3х свёрточных и двух полносвязных слоёв. \n",
"Решается задача максимизации точности классификатора, расчитанной для данных записанных с реального томографа. \n",
"Классификатор обучен на синтетических данных для варьируемых гипераметров. "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2b5d4395-2896-4254-9dd2-a00d5b599e5e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"window_size = 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "137610a1-fd50-465c-a366-8727f8172fbe",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from synthetic_dataset_utils import gen_Xy \n",
"\n",
"X_train, y_train, X_val, y_val = gen_Xy(sample_num=20, timesteps=340, channels=6, window_size=window_size)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9413e010-4084-4970-ad6f-abbaba2c7ff8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Use one of \"Prepare\" in generate_real_data.ipynb\n",
"\n",
"X_test = np.load(f\"../assets/X_test_{window_size}.npy\")\n",
"y_test = np.load(f\"../assets/y_test_{window_size}.npy\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7dfd59a0-4b38-4172-977d-0bc9a15575a6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"from torch_train_utils import print_results, train\n",
"\n",
"class Model1D(nn.Module):\n",
" def __init__(self, input_shape):\n",
" super(Model1D, self).__init__()\n",
" self.instance_norm = nn.InstanceNorm1d(input_shape[0])\n",
" self.conv1 = nn.Sequential(nn.Conv1d(input_shape[0], 128, kernel_size=3, padding='same'), nn.ELU())\n",
" self.dense1 = nn.Sequential(nn.Flatten(), nn.Linear(128*input_shape[1], 128), nn.ELU())\n",
" self.dense2 = nn.Sequential(nn.Linear(128, 128), nn.ELU())\n",
" self.classifier = nn.Linear(128, 1)\n",
" self.sigmoid = nn.Sigmoid()\n",
"\n",
" def forward(self, x):\n",
" x = self.instance_norm(x)\n",
" x = self.conv1(x)\n",
" x = self.dense1(x)\n",
" x = self.dense2(x)\n",
" x = self.sigmoid(self.classifier(x))\n",
" return x.squeeze(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "220e60bf-2d43-4bfd-8026-a12e0f448c61",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# check that model trains\n",
"model = Model1D(X_train.shape[1:])\n",
"train(X_train, y_train, X_test, y_test, model, epochs=100, lr=1e-1, verbose=True);"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "69aadbd5-30d5-42d2-8089-f0c75fbeca1f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def objective(trial):\n",
" clear_output(wait=True)\n",
" noise_std_min = trial.suggest_float('noise_std_min', 0.000001, 0.0001)\n",
" noise_std_max = trial.suggest_float('noise_std_max', 0.0001, 0.1)\n",
" noise_std_stp = 6.643190903827471e-05\n",
" harm_ampl_min = trial.suggest_float('harm_ampl_min', 0.001, 0.005)\n",
" harm_ampl_max = trial.suggest_float('harm_ampl_max', 0.005, 0.1)\n",
" harm_ampl_step = 0.0006277917832562148\n",
" probability_steps = 0.01\n",
" step_ampl_min = trial.suggest_float('step_ampl_min', 0.00001, 0.0001)\n",
" step_ampl_max = trial.suggest_float('step_ampl_max', 0.0001, 0.8)\n",
" \n",
" X_train, y_train, X_val, y_val = gen_Xy(\n",
" sample_num=100, \n",
" timesteps=340, \n",
" channels=6, \n",
" window_size=window_size,\n",
" noise_std_min=noise_std_min,\n",
" noise_std_max=noise_std_max,\n",
" noise_std_stp=noise_std_stp,\n",
" harm_ampl_min=harm_ampl_min,\n",
" harm_ampl_max=harm_ampl_max,\n",
" harm_ampl_step=harm_ampl_step,\n",
" probability_steps=probability_steps,\n",
" step_ampl_min=step_ampl_min,\n",
" step_ampl_max=step_ampl_max\n",
" )\n",
" model = Model1D(X_train.shape[1:])\n",
" results, history = train(X_train, y_train, X_test, y_test, model, epochs=100, lr=1e-1)\n",
" \n",
" return results['val_accuracy']\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "df871ce2-01ed-470a-b7fc-2f371af2cbd7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m[I 2023-04-19 15:01:21,126]\u001b[0m A new study created in RDB with name: ray\u001b[0m\n"
]
}
],
"source": [
"study = optuna.create_study(direction=\"maximize\", storage=f'sqlite:///../results/hyperparameter_study_{window_size}.db', study_name='ray')#, sampler=optuna.samplers.CmaEsSampler())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4bbe0684-702d-4cb3-92bd-86123505c0f0",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-04-19 15:01:23,341\tWARNING services.py:1780 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 67084288 bytes available. This will harm performance! You may be able to free up space by deleting files in /dev/shm. If you are inside a Docker container, you can increase /dev/shm size by passing '--shm-size=10.24gb' to 'docker run' (or add it to the run_options list in a Ray cluster config). Make sure to set this to more than 30% of available RAM.\n",
"2023-04-19 15:01:24,578\tINFO worker.py:1553 -- Started a local Ray instance.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
" <div style=\"margin-left: 50px;display: flex;flex-direction: row;align-items: center\">\n",
" <h3 style=\"color: var(--jp-ui-font-color0)\">Ray</h3>\n",
" <svg version=\"1.1\" id=\"ray\" width=\"3em\" viewBox=\"0 0 144.5 144.6\" style=\"margin-left: 3em;margin-right: 3em\">\n",
" <g id=\"layer-1\">\n",
" <path fill=\"#00a2e9\" class=\"st0\" d=\"M97.3,77.2c-3.8-1.1-6.2,0.9-8.3,5.1c-3.5,6.8-9.9,9.9-17.4,9.6S58,88.1,54.8,81.2c-1.4-3-3-4-6.3-4.1\n",
" c-5.6-0.1-9.9,0.1-13.1,6.4c-3.8,7.6-13.6,10.2-21.8,7.6C5.2,88.4-0.4,80.5,0,71.7c0.1-8.4,5.7-15.8,13.8-18.2\n",
" c8.4-2.6,17.5,0.7,22.3,8c1.3,1.9,1.3,5.2,3.6,5.6c3.9,0.6,8,0.2,12,0.2c1.8,0,1.9-1.6,2.4-2.8c3.5-7.8,9.7-11.8,18-11.9\n",
" c8.2-0.1,14.4,3.9,17.8,11.4c1.3,2.8,2.9,3.6,5.7,3.3c1-0.1,2,0.1,3,0c2.8-0.5,6.4,1.7,8.1-2.7s-2.3-5.5-4.1-7.5\n",
" c-5.1-5.7-10.9-10.8-16.1-16.3C84,38,81.9,37.1,78,38.3C66.7,42,56.2,35.7,53,24.1C50.3,14,57.3,2.8,67.7,0.5\n",
" C78.4-2,89,4.7,91.5,15.3c0.1,0.3,0.1,0.5,0.2,0.8c0.7,3.4,0.7,6.9-0.8,9.8c-1.7,3.2-0.8,5,1.5,7.2c6.7,6.5,13.3,13,19.8,19.7\n",
" c1.8,1.8,3,2.1,5.5,1.2c9.1-3.4,17.9-0.6,23.4,7c4.8,6.9,4.6,16.1-0.4,22.9c-5.4,7.2-14.2,9.9-23.1,6.5c-2.3-0.9-3.5-0.6-5.1,1.1\n",
" c-6.7,6.9-13.6,13.7-20.5,20.4c-1.8,1.8-2.5,3.2-1.4,5.9c3.5,8.7,0.3,18.6-7.7,23.6c-7.9,5-18.2,3.8-24.8-2.9\n",
" c-6.4-6.4-7.4-16.2-2.5-24.3c4.9-7.8,14.5-11,23.1-7.8c3,1.1,4.7,0.5,6.9-1.7C91.7,98.4,98,92.3,104.2,86c1.6-1.6,4.1-2.7,2.6-6.2\n",
" c-1.4-3.3-3.8-2.5-6.2-2.6C99.8,77.2,98.9,77.2,97.3,77.2z M72.1,29.7c5.5,0.1,9.9-4.3,10-9.8c0-0.1,0-0.2,0-0.3\n",
" C81.8,14,77,9.8,71.5,10.2c-5,0.3-9,4.2-9.3,9.2c-0.2,5.5,4,10.1,9.5,10.3C71.8,29.7,72,29.7,72.1,29.7z M72.3,62.3\n",
" c-5.4-0.1-9.9,4.2-10.1,9.7c0,0.2,0,0.3,0,0.5c0.2,5.4,4.5,9.7,9.9,10c5.1,0.1,9.9-4.7,10.1-9.8c0.2-5.5-4-10-9.5-10.3\n",
" C72.6,62.3,72.4,62.3,72.3,62.3z M115,72.5c0.1,5.4,4.5,9.7,9.8,9.9c5.6-0.2,10-4.8,10-10.4c-0.2-5.4-4.6-9.7-10-9.7\n",
" c-5.3-0.1-9.8,4.2-9.9,9.5C115,72.1,115,72.3,115,72.5z M19.5,62.3c-5.4,0.1-9.8,4.4-10,9.8c-0.1,5.1,5.2,10.4,10.2,10.3\n",
" c5.6-0.2,10-4.9,9.8-10.5c-0.1-5.4-4.5-9.7-9.9-9.6C19.6,62.3,19.5,62.3,19.5,62.3z M71.8,134.6c5.9,0.2,10.3-3.9,10.4-9.6\n",
" c0.5-5.5-3.6-10.4-9.1-10.8c-5.5-0.5-10.4,3.6-10.8,9.1c0,0.5,0,0.9,0,1.4c-0.2,5.3,4,9.8,9.3,10\n",
" C71.6,134.6,71.7,134.6,71.8,134.6z\"/>\n",
" </g>\n",
" </svg>\n",
" <table>\n",
" <tr>\n",
" <td style=\"text-align: left\"><b>Python version:</b></td>\n",
" <td style=\"text-align: left\"><b>3.10.6</b></td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left\"><b>Ray version:</b></td>\n",
" <td style=\"text-align: left\"><b> 2.3.1</b></td>\n",
" </tr>\n",
" \n",
" </table>\n",
" </div>\n",
"</div>\n"
],
"text/plain": [
"RayContext(dashboard_url='', python_version='3.10.6', ray_version='2.3.1', ray_commit='5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a', address_info={'node_ip_address': '172.17.0.3', 'raylet_ip_address': '172.17.0.3', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2023-04-19_15-01-21_178597_339730/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2023-04-19_15-01-21_178597_339730/sockets/raylet', 'webui_url': '', 'session_dir': '/tmp/ray/session_2023-04-19_15-01-21_178597_339730', 'metrics_export_port': 54619, 'gcs_address': '172.17.0.3:41322', 'address': '172.17.0.3:41322', 'dashboard_agent_listen_port': 52365, 'node_id': '57bf2181b750b2a14b11cae95b0e7be64561e3a69a817a7c7645f4b1'})"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ray.init(num_gpus=1, \n",
" ignore_reinit_error=True,\n",
" resources={\"cpu\": 32, \"gpu\": 32} )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21977c0d-29ce-42f6-80c0-942654d5df51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"@ray.remote(num_gpus=0.01)\n",
"def optimize(n_trials):\n",
" study = optuna.load_study(study_name='ray', storage=f'sqlite:///../results/hyperparameter_study_{window_size}.db')\n",
" study.optimize(objective, n_trials=n_trials)\n",
" \n",
"results = []\n",
"for i in range(12):\n",
" time.sleep(1)\n",
" results.append(optimize.remote(1000)) \n",
"\n",
"\n",
"for i in range(10000):\n",
" time.sleep(2)\n",
" clear_output(wait=True)\n",
" \n",
" \n",
"# [ray.get(x) for x in results] \n",
"# study.best_trial, study.best_params "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "35d60e56-1028-4428-a69d-d4e217efe11c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(FrozenTrial(number=8316, state=TrialState.COMPLETE, values=[0.7329734219269103], datetime_start=datetime.datetime(2023, 4, 19, 17, 5, 3, 109691), datetime_complete=datetime.datetime(2023, 4, 19, 17, 5, 14, 306688), params={'harm_ampl_max': 0.05141408658277873, 'harm_ampl_min': 0.004216894373183093, 'noise_std_max': 0.04576914309029287, 'noise_std_min': 4.005219535133996e-05, 'step_ampl_max': 0.2241382196779908, 'step_ampl_min': 8.371016275499698e-05}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'harm_ampl_max': FloatDistribution(high=0.1, log=False, low=0.005, step=None), 'harm_ampl_min': FloatDistribution(high=0.005, log=False, low=0.001, step=None), 'noise_std_max': FloatDistribution(high=0.1, log=False, low=0.0001, step=None), 'noise_std_min': FloatDistribution(high=0.0001, log=False, low=1e-06, step=None), 'step_ampl_max': FloatDistribution(high=0.8, log=False, low=0.0001, step=None), 'step_ampl_min': FloatDistribution(high=0.0001, log=False, low=1e-05, step=None)}, trial_id=8317, value=None),\n",
" {'harm_ampl_max': 0.05141408658277873,\n",
" 'harm_ampl_min': 0.004216894373183093,\n",
" 'noise_std_max': 0.04576914309029287,\n",
" 'noise_std_min': 4.005219535133996e-05,\n",
" 'step_ampl_max': 0.2241382196779908,\n",
" 'step_ampl_min': 8.371016275499698e-05})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"study.best_trial, study.best_params "
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "a04877b4-defa-4730-b3ce-fa275e709ae7",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>harm_ampl_max</th>\n",
" <th>harm_ampl_min</th>\n",
" <th>noise_std_max</th>\n",
" <th>noise_std_min</th>\n",
" <th>step_ampl_max</th>\n",
" <th>step_ampl_min</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.051414</td>\n",
" <td>0.004217</td>\n",
" <td>0.045769</td>\n",
" <td>0.00004</td>\n",
" <td>0.224138</td>\n",
" <td>0.000084</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" harm_ampl_max harm_ampl_min noise_std_max noise_std_min step_ampl_max \\\n",
"0 0.051414 0.004217 0.045769 0.00004 0.224138 \n",
"\n",
" step_ampl_min \n",
"0 0.000084 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(data={k:[v] for k,v in study.best_params.items()})"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "44564237-c95b-4e58-9646-b76059482cc9",
"metadata": {},
"outputs": [],
"source": [
"data = {k:[v] for k,v in study.best_params.items()}\n",
"data['best_acc'] = study.best_trial.values\n",
"data['total_trials'] = 12*1000\n",
"pd.DataFrame(data=data).to_csv(f\"../results/{window_size}_synth_model_best_params.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4042fb3d-7cda-4fa4-b333-fa31122e1661",
"metadata": {},
"outputs": [],
"source": [
"# 'noise_std_min': 9.502179883627595e-05,\n",
"# 'noise_std_max': 0.0018858019888194724,\n",
"# 'noise_std_stp': 6.643190903827471e-05,\n",
"# 'harm_ampl_min': 0.0038549419185346817,\n",
"# 'harm_ampl_max': 0.005978867746555492,\n",
"# 'harm_ampl_step': 0.0006277917832562148,\n",
"# 'probability_steps': 0.035397264960945196,\n",
"# 'step_ampl_min': 0.004726791192229224,\n",
"# 'step_ampl_max': 0.17299860153013677}"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "d1e416f5-6724-4b33-bc07-7d3dfa1c32da",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[None, None, None, None, None, None, None, None, None, None, None, None]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[ray.cancel(a) for a in results]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04220a72-0917-4306-9ab8-b2783dbf44ad",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,447 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "61c0d6a0-ca1e-463a-a0c0-70cc2a7defbf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os"
]
},
{
"cell_type": "markdown",
"id": "0468bda8-1cf7-47d0-8ac1-311ca2d93bfc",
"metadata": {},
"source": [
"### Метрики качества моделей, обученных на синтетических данных "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e0f7d0ae-ba4f-4900-9dc2-fc9a2f66d897",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>model</th>\n",
" <th>acc_train</th>\n",
" <th>f1_train</th>\n",
" <th>acc_val</th>\n",
" <th>f1_val</th>\n",
" <th>acc_test</th>\n",
" <th>f1_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/gcaec_10.h5</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.979313</td>\n",
" <td>0.978806</td>\n",
" <td>0.627758</td>\n",
" <td>0.429412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/gcaec_15.h5</td>\n",
" <td>0.999987</td>\n",
" <td>0.999987</td>\n",
" <td>0.980775</td>\n",
" <td>0.980883</td>\n",
" <td>0.623860</td>\n",
" <td>0.452708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/gcaec_24.h5</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.988473</td>\n",
" <td>0.988546</td>\n",
" <td>0.619776</td>\n",
" <td>0.459699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_24.pth</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.953551</td>\n",
" <td>0.953807</td>\n",
" <td>0.669170</td>\n",
" <td>0.662171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_24.h5</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.982744</td>\n",
" <td>0.982852</td>\n",
" <td>0.646825</td>\n",
" <td>0.516832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_15.pth</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.929313</td>\n",
" <td>0.930040</td>\n",
" <td>0.650477</td>\n",
" <td>0.615851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_15.h5</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.984738</td>\n",
" <td>0.984902</td>\n",
" <td>0.636373</td>\n",
" <td>0.477207</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_10.pth</td>\n",
" <td>0.999955</td>\n",
" <td>0.999955</td>\n",
" <td>0.913114</td>\n",
" <td>0.911392</td>\n",
" <td>0.657979</td>\n",
" <td>0.624143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>./saved_models/simple_10.h5</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.982461</td>\n",
" <td>0.982076</td>\n",
" <td>0.638951</td>\n",
" <td>0.454062</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" model acc_train f1_train acc_val f1_val \\\n",
"0 ./saved_models/gcaec_10.h5 1.000000 1.000000 0.979313 0.978806 \n",
"0 ./saved_models/gcaec_15.h5 0.999987 0.999987 0.980775 0.980883 \n",
"0 ./saved_models/gcaec_24.h5 1.000000 1.000000 0.988473 0.988546 \n",
"0 ./saved_models/simple_24.pth 1.000000 1.000000 0.953551 0.953807 \n",
"0 ./saved_models/simple_24.h5 1.000000 1.000000 0.982744 0.982852 \n",
"0 ./saved_models/simple_15.pth 1.000000 1.000000 0.929313 0.930040 \n",
"0 ./saved_models/simple_15.h5 1.000000 1.000000 0.984738 0.984902 \n",
"0 ./saved_models/simple_10.pth 0.999955 0.999955 0.913114 0.911392 \n",
"0 ./saved_models/simple_10.h5 1.000000 1.000000 0.982461 0.982076 \n",
"\n",
" acc_test f1_test \n",
"0 0.627758 0.429412 \n",
"0 0.623860 0.452708 \n",
"0 0.619776 0.459699 \n",
"0 0.669170 0.662171 \n",
"0 0.646825 0.516832 \n",
"0 0.650477 0.615851 \n",
"0 0.636373 0.477207 \n",
"0 0.657979 0.624143 \n",
"0 0.638951 0.454062 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"for f in os.listdir(\"../results/\"):\n",
" if \"stats.csv\" in f:\n",
" rows.append(pd.read_csv(f\"../results/{f}\"))\n",
" \n",
"results = pd.concat(rows)\n",
"results"
]
},
{
"cell_type": "markdown",
"id": "55d2d295-d2cb-44a4-9e6c-f95a5c9157bf",
"metadata": {},
"source": [
"### Размеры тренировочных и тестовых датасетов"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9f0130ad-3da0-4e67-89d0-a85bff70f6de",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>train_shape</th>\n",
" <th>val_shape</th>\n",
" <th>test_shape</th>\n",
" <th>train_classes_num</th>\n",
" <th>val_classes_num</th>\n",
" <th>test_classes_num</th>\n",
" </tr>\n",
" <tr>\n",
" <th>window_size</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>(44468, 10, 6)</td>\n",
" <td>(11118, 10, 6)</td>\n",
" <td>(6254, 10, 6)</td>\n",
" <td>(22118, 22350)</td>\n",
" <td>(5675, 5443)</td>\n",
" <td>(3127, 3127)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>(74696, 15, 6)</td>\n",
" <td>(18674, 15, 6)</td>\n",
" <td>(9430, 15, 6)</td>\n",
" <td>(37436, 37260)</td>\n",
" <td>(9249, 9425)</td>\n",
" <td>(4715, 4715)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>(116600, 24, 6)</td>\n",
" <td>(29150, 24, 6)</td>\n",
" <td>(11054, 24, 6)</td>\n",
" <td>(58422, 58178)</td>\n",
" <td>(14453, 14697)</td>\n",
" <td>(5527, 5527)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" train_shape val_shape test_shape \\\n",
"window_size \n",
"10 (44468, 10, 6) (11118, 10, 6) (6254, 10, 6) \n",
"15 (74696, 15, 6) (18674, 15, 6) (9430, 15, 6) \n",
"24 (116600, 24, 6) (29150, 24, 6) (11054, 24, 6) \n",
"\n",
" train_classes_num val_classes_num test_classes_num \n",
"window_size \n",
"10 (22118, 22350) (5675, 5443) (3127, 3127) \n",
"15 (37436, 37260) (9249, 9425) (4715, 4715) \n",
"24 (58422, 58178) (14453, 14697) (5527, 5527) "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"for window in [10, 15, 24]:\n",
" X_train = np.load(f\"../assets/X_train_{window}.npy\")\n",
" X_val = np.load(f\"../assets/X_val_{window}.npy\")\n",
" X_test = np.load(f\"../assets/X_test_{window}.npy\")\n",
" y_train = np.load(f\"../assets/y_train_{window}.npy\")\n",
" y_val = np.load(f\"../assets/y_val_{window}.npy\")\n",
" y_test = np.load(f\"../assets/y_test_{window}.npy\")\n",
" a = ((y_train==0).sum(),(y_train==1).sum())\n",
" b = ((y_val==0).sum(),(y_val==1).sum())\n",
" c = ((y_test==0).sum(),(y_test==1).sum())\n",
" rows.append([X_train.shape, X_val.shape, X_test.shape, a, b, c])\n",
"\n",
"dataset_shapes = pd.DataFrame(rows, columns=['train_shape', 'val_shape', 'test_shape', 'train_classes_num', 'val_classes_num', 'test_classes_num'], index=[10,15,24])\n",
"dataset_shapes.index.rename(\"window_size\", inplace=True)\n",
"dataset_shapes"
]
},
{
"cell_type": "markdown",
"id": "bad7ce4d-63de-49ea-81c9-4e3319a89892",
"metadata": {},
"source": [
"### Результаты подбора гиперпараметров синтетической модели с условием максимизации accuracy на реальных данных"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c638fd5a-f061-4f4e-b30f-95aba502a1ff",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>harm_ampl_max</th>\n",
" <th>harm_ampl_min</th>\n",
" <th>noise_std_max</th>\n",
" <th>noise_std_min</th>\n",
" <th>step_ampl_max</th>\n",
" <th>step_ampl_min</th>\n",
" <th>best_acc</th>\n",
" <th>total_trials</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.051414</td>\n",
" <td>0.004217</td>\n",
" <td>0.045769</td>\n",
" <td>0.00004</td>\n",
" <td>0.224138</td>\n",
" <td>0.000084</td>\n",
" <td>0.732973</td>\n",
" <td>12000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.093355</td>\n",
" <td>0.002075</td>\n",
" <td>0.065002</td>\n",
" <td>0.00009</td>\n",
" <td>0.216332</td>\n",
" <td>0.000061</td>\n",
" <td>0.695555</td>\n",
" <td>12000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" harm_ampl_max harm_ampl_min noise_std_max noise_std_min step_ampl_max \\\n",
"0 0.051414 0.004217 0.045769 0.00004 0.224138 \n",
"0 0.093355 0.002075 0.065002 0.00009 0.216332 \n",
"\n",
" step_ampl_min best_acc total_trials \n",
"0 0.000084 0.732973 12000 \n",
"0 0.000061 0.695555 12000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aa = []\n",
"for window_size in [5,10]:\n",
" aa.append(pd.read_csv(f\"../results/{window_size}_synth_model_best_params.csv\"))\n",
"pd.concat(aa)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87520bd6-8743-44bc-86ef-5fb05df3b255",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,250 @@
import pandas as pd
import numpy as np
def gen_steps(
timesteps=340,
channels=6,
p_steps=0.01,
step_ampl_min=0.0002,
step_ampl_max=0.05,
channel_corr_matrix=None
):
if channel_corr_matrix is None:
channel_corr_matrix = np.ones((channels,channels))
start = np.random.binomial(n=1,p=p_steps, size=(timesteps,1))
ampl_params = np.random.random(size=(1,channels))*step_ampl_max + step_ampl_min
sign_params = np.random.choice([-1,1], size=(timesteps, channels), replace=True)
corr_params = channel_corr_matrix[np.random.randint(channels)]
steps = np.cumsum(start * sign_params, axis=0) * ampl_params * corr_params
return steps, np.nonzero(start)[0]
def gen_trend(
timesteps=340,
channels=6,
step_trend_min=5,
step_trend_max=10,
channel_corr_matrix=None
):
trend_params = np.random.choice(np.arange(step_trend_min, max(step_trend_min + 1e-14,step_trend_max)), size=channels, replace=True) / timesteps
if channel_corr_matrix is None:
trend_params = trend_params * np.random.choice([1,-1], size=channels)
else:
trend_params = trend_params * channel_corr_matrix[np.random.randint(channel_corr_matrix.shape[0])]
trend_params = trend_params[np.newaxis,:]
trend = np.transpose(np.tile(np.linspace(0, timesteps, timesteps), reps=(channels,1))) * trend_params
trend = np.concatenate([trend[:timesteps//2],trend[timesteps//2:0:-1]], axis=0)
trend = np.roll(trend, shift=np.random.randint(trend.shape[0]))
return trend
def gen_noise(
timesteps=340,
channels=6,
noise_mean=0,
noise_std_min=0.00001,
noise_std_max=0.00003,
noise_std_stp=0.00001/50
):
noise_std = np.full(channels, fill_value=noise_std_min)
amplification = np.random.choice([1,100], size=channels, p=[0.99,0.01])
noise_std = np.random.choice(np.arange(noise_std_min, noise_std_max + noise_std_stp, noise_std_stp)) * amplification
noise_cov = np.eye(channels)*noise_std
signal = np.random.multivariate_normal(np.repeat(noise_mean, channels), noise_cov, timesteps)
return signal
def gen_harmon_signal(
timesteps=340,
harm_ampl_min=0.02,
harm_ampl_max=0.05,
harm_ampl_step=0.005
):
harm_ampl = np.full(shape=(timesteps,1), fill_value=harm_ampl_min)
harm_ampl_choices = np.arange(start=harm_ampl_min, stop=harm_ampl_max + harm_ampl_step, step=harm_ampl_step)
for i in range(timesteps):
harm_ampl[i] = np.random.choice(harm_ampl_choices)
cos_arg = np.zeros((timesteps, 3))
cos_arg[:, 0] = np.linspace(0, 2 * np.pi, timesteps)
cos_arg[:, 1] = np.linspace(0, 4 * np.pi, timesteps)
if np.random.random() < 0.95:
cos_arg[:, 2] = np.linspace(0, 8 * np.pi, timesteps)
else:
# print("Curvy signal appears!")
cos_arg[:, 2] = np.linspace(0, 64 * np.pi, timesteps)
cos_arg[:, 0] *= 0
cos_arg[:, 1] *= 0
cos_harms = np.cos(cos_arg)
flip = 1 if np.random.random() < 0.5 else -1
cos_harms[:, 0] *= flip
flip = 1 if np.random.random() < 0.5 else -1
cos_harms[:, 1] *= flip
flip = 1 if np.random.random() < 0.5 else -1
cos_harms[:, 2] *= flip
cos_add = cos_harms.sum(axis=1)[:, np.newaxis]
cos_add *= harm_ampl
return cos_add
def gen_scan_motion_signal(
timesteps,
channels,
scale=None,
corr=None,
noise_mean=0,
noise_std_min=0.00001,
noise_std_max=0.00002,
noise_std_stp=0.00001/50,
harm_ampl_min=0.005,
harm_ampl_max=0.01,
harm_ampl_step=0.0001,
probability_steps=0.03,
step_ampl_min=0.008,
step_ampl_max=0.2,
step_trend_min=np.random.random(),
step_trend_max=np.random.random()
):
"""
Example of generated signal
```
import matplotlib.pyplot as plt
from synthetic_dataset_utils import gen_scan_motion_signal
new_signal_fmri_scan, new_signal_fmri_steps = gen_scan_motion_signal(340, 6)
plt.plot(new_signal_fmri_scan)
plt.vlines(new_signal_fmri_steps, ymin=new_signal_fmri_scan.min(), ymax=new_signal_fmri_scan.max(), color='black', linewidth=1)
```
"""
scale = np.ones((1,channels)) if scale is None else scale
signal = np.zeros([timesteps, channels])
signal += gen_noise(
timesteps=timesteps,
channels=channels,
noise_mean=noise_mean,
noise_std_min=noise_std_min,
noise_std_max=noise_std_max,
noise_std_stp=noise_std_stp
)
signal += gen_harmon_signal(
timesteps=timesteps,
harm_ampl_min=harm_ampl_min,
harm_ampl_max=harm_ampl_max,
harm_ampl_step=harm_ampl_step
)
step_signal, step_indexes = gen_steps(
timesteps=timesteps,
channels=channels,
p_steps=probability_steps,
step_ampl_min=step_ampl_min,
step_ampl_max=step_ampl_max,
channel_corr_matrix=corr
)
signal += step_signal
signal += gen_trend(
timesteps=timesteps,
channels=channels,
step_trend_min=step_trend_min,
step_trend_max=step_trend_max,
channel_corr_matrix=None
)
signal *= scale
return signal, step_indexes
def gen_Xy(
sample_num,
timesteps,
channels,
window_size,
noise_mean=0,
noise_std_min=0.0001,
noise_std_max=0.002,
noise_std_stp=0.00001,
harm_ampl_min=0.005,
harm_ampl_max=0.01,
harm_ampl_step=0.0001,
probability_steps=0.03,
step_ampl_min=0.008,
step_ampl_max=0.2,
channel_corr_matrix=None,
scale = None
):
"""
Использует gen_scan_motion_signal для генерации одной fMRI записи.
В каждую запись внедрены аномалии сдвига.
Сигнал нарезается на кусочки размером window_size и маркируются метками двух классов: норма, аномалия.
Кусочки перемешиваются два раза - внутри каждого класса перед уравниванием количества примеров классов и внутри датасета.
"""
scale = np.ones((1,channels)) if scale is None else scale
# ---------------------------- Generate fmri signals ---------------------------------------------
signal_fmri_scans = []
signal_fmri_steps = []
for i in range(sample_num):
signal, step_indexes = gen_scan_motion_signal(
timesteps,
channels,
scale=scale,
corr=None,
noise_mean=noise_mean,
noise_std_min=noise_std_min,
noise_std_max=noise_std_max,
noise_std_stp=noise_std_stp,
harm_ampl_min=harm_ampl_min,
harm_ampl_max=harm_ampl_max,
harm_ampl_step=harm_ampl_step,
probability_steps=probability_steps,
step_ampl_min=step_ampl_min,
step_ampl_max=step_ampl_max,
step_trend_min=np.random.random(),
step_trend_max=np.random.random()*0.5)
signal_fmri_scans.append(signal)
# indexes can be merged like in detect_shifts with window_merge(step_indexes, window_size=window_size)
signal_fmri_steps.append(step_indexes)
signal_fmri_scans = np.stack(signal_fmri_scans)
# -------- Chop singals into short signals with sliding window and divide into two categories: normal and anomaly -----------
normal_indexes = []
anomaly_indexes = []
normal_windows = []
anomaly_windows = []
for scan_idx, (fmri_scan, anomaly_window_starts) in enumerate(zip(signal_fmri_scans, signal_fmri_steps)):
for start in range(fmri_scan.shape[0]):
window_values = fmri_scan[start:start+window_size,:]
if window_values.shape[0] == window_size:
anomaly = False
for anomaly_window_start in anomaly_window_starts:
if (start <= anomaly_window_start-2) and (anomaly_window_start+2 < start+window_size):
anomaly = True
break
if anomaly:
anomaly_windows.append(window_values)
anomaly_indexes.append([scan_idx, start])
else:
normal_windows.append(window_values)
normal_indexes.append([scan_idx, start])
normal_windows = np.array(normal_windows)
anomaly_windows = np.array(anomaly_windows)
# ----- Prepare dataset and labels ------
# To get balanced dataset first shuffle across time and take only number equal to minimal presented class
np.random.shuffle(normal_windows)
np.random.shuffle(anomaly_windows)
normal_windows = normal_windows[:min(len(normal_windows),len(anomaly_windows))]
anomaly_windows = anomaly_windows[:min(len(normal_windows),len(anomaly_windows))]
X = np.concatenate((normal_windows, anomaly_windows), axis=0)
y = np.concatenate([np.repeat(0, normal_windows.shape[0]), np.repeat(1, anomaly_windows.shape[0])])
# Shuffle normal and anomaly examples
shuffled_index = np.arange(X.shape[0])
np.random.shuffle(shuffled_index)
X = X[shuffled_index]
y = y[shuffled_index]
break_point = int(X.shape[0]*0.8)
X_train = X[:break_point]
y_train = y[:break_point]
X_val = X[break_point:]
y_val = y[break_point:]
return X_train, y_train, X_val, y_val

@ -0,0 +1,63 @@
import pandas as pd
import numpy as np
from IPython.display import display, clear_output
import torch
import torch.nn as nn
def print_results(epoch, logs):
results = pd.DataFrame(data=np.array([v for k,v in logs.items()]).reshape(2,2),
columns=['loss', 'accuracy'],
index=['train','val'])
results = results.style.set_caption(f"{epoch}")
clear_output(wait=True)
display(results)
def train(X_train, y_train, X_val, y_val, model, epochs=50, lr=1e-2,verbose=False):
X_train = torch.tensor(X_train, dtype=torch.float32).cuda()
X_val = torch.tensor(X_val, dtype=torch.float32).cuda()
history = {'loss': [], 'accuracy': [], 'val_loss': [], 'val_accuracy': []}
model = torch.compile(model).cuda()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
train_dataset = torch.utils.data.TensorDataset(X_train, torch.tensor(y_train))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4096, shuffle=True)
val_dataset = torch.utils.data.TensorDataset(X_val, torch.tensor(y_val))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4096, shuffle=False)
for i in range(epochs):
running_loss = 0.0
running_accuracy = 0.0
# Train
model.train()
for X_batch, y_batch in train_loader:
optimizer.zero_grad()
y_pred = model(X_batch.cuda()).cpu()
# print(y_pred.shape, y_batch.shape)
loss = criterion(y_pred, y_batch.float())
loss.backward()
optimizer.step()
running_loss += loss.item() * X_batch.size(0)
running_accuracy += ((y_pred > 0.5) == y_batch).sum().item()
running_loss /= X_train.shape[0]
running_accuracy /= X_train.shape[0]
val_running_loss = 0.0
val_running_accuracy = 0.0
# Validate
model.eval()
with torch.no_grad():
for X_batch, y_batch in val_loader:
y_pred = model(X_batch.cuda()).cpu()
loss = criterion(y_pred, y_batch.float())
val_running_loss += loss.item() * X_batch.size(0)
val_running_accuracy += ((y_pred > 0.5) == y_batch).sum().item()
val_running_loss /= X_val.shape[0]
val_running_accuracy /= X_val.shape[0]
if verbose:
print_results(i, {'loss': running_loss, 'accuracy': running_accuracy, 'val_loss': val_running_loss, 'val_accuracy': val_running_accuracy})
history['loss'].append(running_loss)
history['accuracy'].append(running_accuracy)
history['val_loss'].append(val_running_loss)
history['val_accuracy'].append(val_running_accuracy)
if verbose:
print_results(i, {'loss': running_loss, 'accuracy': running_accuracy, 'val_loss': val_running_loss, 'val_accuracy': val_running_accuracy})
return {'loss': running_loss, 'accuracy': running_accuracy, 'val_loss': val_running_loss, 'val_accuracy': val_running_accuracy}, history

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save