From 064b9e14c8a03dd270fb40050cc4a920e4c6848a Mon Sep 17 00:00:00 2001 From: Vladimir Protsenko Date: Tue, 10 Feb 2026 14:35:31 +0000 Subject: [PATCH] added new logs --- src/bert_optica_koef.py | 2 ++ src/bert_optica_koef_newf.py | 2 ++ src/bert_optica_nokoef.py | 2 ++ src/bert_optica_nokoef_newf.py | 2 ++ src/optical_matrix_multiplication/config.py | 5 +---- src/optical_matrix_multiplication/optical_mul.py | 2 +- src/optical_matrix_multiplication/propagator.py | 1 - src/train_gpt2.py | 5 +++-- src/train_optics_trainable_focal_dist_lens_64.py | 3 ++- src/train_optics_trainable_lens_128.py | 3 +++ src/train_optics_trainable_lens_256.py | 6 +++++- src/train_optics_trainable_lens_512.py | 6 +++++- src/train_optics_trainable_lens_64.py | 4 +++- 13 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/bert_optica_koef.py b/src/bert_optica_koef.py index 634c671..9098b80 100644 --- a/src/bert_optica_koef.py +++ b/src/bert_optica_koef.py @@ -379,6 +379,8 @@ model = BertClassifier( ).to(device) print(f'Parameters model - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}') +model_description = str(model) + f'\nParameters count - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}' +writer.add_text('model', model_description, 0) optimizer = schedulefree.AdamWScheduleFree(list(model.parameters()) + list(encoder.parameters())) if checkpoint_file is not None: diff --git a/src/bert_optica_koef_newf.py b/src/bert_optica_koef_newf.py index 9e5fb1d..a46e5e3 100644 --- a/src/bert_optica_koef_newf.py +++ b/src/bert_optica_koef_newf.py @@ -387,6 +387,8 @@ model = BertClassifier( ).to(device) print(f'Parameters model - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}') +model_description = str(model) + f'\nParameters count - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}' +writer.add_text('model', model_description, 0) optimizer = schedulefree.AdamWScheduleFree(list(model.parameters()) + list(encoder.parameters())) if checkpoint_file is not None: diff --git a/src/bert_optica_nokoef.py b/src/bert_optica_nokoef.py index 2c5b249..4f83cc6 100644 --- a/src/bert_optica_nokoef.py +++ b/src/bert_optica_nokoef.py @@ -377,6 +377,8 @@ model = BertClassifier( ).to(device) print(f'Parameters model - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}') +model_description = str(model) + f'\nParameters count - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}' +writer.add_text('model', model_description, 0) optimizer = schedulefree.AdamWScheduleFree(list(model.parameters()) + list(encoder.parameters())) if checkpoint_file is not None: diff --git a/src/bert_optica_nokoef_newf.py b/src/bert_optica_nokoef_newf.py index dcfede5..7728aa0 100644 --- a/src/bert_optica_nokoef_newf.py +++ b/src/bert_optica_nokoef_newf.py @@ -387,6 +387,8 @@ model = BertClassifier( ).to(device) print(f'Parameters model - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}') +model_description = str(model) + f'\nParameters count - {sum(p.numel() for p in model.parameters())}, parameters encoder - {sum(p.numel() for p in encoder.parameters())}' +writer.add_text('model', model_description, 0) optimizer = schedulefree.AdamWScheduleFree(list(model.parameters()) + list(encoder.parameters())) if checkpoint_file is not None: diff --git a/src/optical_matrix_multiplication/config.py b/src/optical_matrix_multiplication/config.py index 7b84265..26c8589 100644 --- a/src/optical_matrix_multiplication/config.py +++ b/src/optical_matrix_multiplication/config.py @@ -274,8 +274,7 @@ class Config(ConfigOpticBase, ConfigModelBase): wavelength: float = 532e-9, distance: float = 0.03, lens_pixel_size: float = 1.8e-6, - lens_size: int = 8192, - trainable_cylind_lens = False): + lens_size: int = 8192): """ Конструктор класса. @@ -295,7 +294,6 @@ class Config(ConfigOpticBase, ConfigModelBase): distance: дистанция в метрах распространения светового поля между плоскостями. lens_pixel_size: размер пикселя в метрах скрещенных линз в оптической системе (нужен исключительно для моделирования). lens_size: размер скрещенных линз в метрах в оптической системе (нужен исключительно для моделирования). - trainable_cylind_lens: обучаемые диагональные матрицы, линза перед фурье плоскостью """ ConfigOpticBase.__init__(self, wavelength, distance) @@ -322,7 +320,6 @@ class Config(ConfigOpticBase, ConfigModelBase): self._input_vector_split_x: int = left_matrix_split_x self._input_vector_split_y: int = left_matrix_split_y self._result_vector_split: int = result_matrix_split - self._trainable_cylind_lens = trainable_cylind_lens @property def matrix_split_x(self) -> int: diff --git a/src/optical_matrix_multiplication/optical_mul.py b/src/optical_matrix_multiplication/optical_mul.py index cbe5f57..d7759d1 100644 --- a/src/optical_matrix_multiplication/optical_mul.py +++ b/src/optical_matrix_multiplication/optical_mul.py @@ -371,7 +371,7 @@ class TrainableLensOpticalMul(_nn.Module): phase_row = phase_normalized.unsqueeze(0).unsqueeze(0) writer.add_image(f"{tag}/phase_row", phase_row, global_step, dataformats='CHW') - fig, ax = plt.subplots(figsize=(6, 4)) + fig, ax = plt.subplots(figsize=(12, 4)) ax.plot(wrapped_phase.detach().cpu().numpy(), label=f'Step {global_step}') ax.set_title(f"Cylindrical Lens Phase Profile (x) {tag}") ax.set_xlabel("Pixel Index") diff --git a/src/optical_matrix_multiplication/propagator.py b/src/optical_matrix_multiplication/propagator.py index 3079d13..66111a6 100644 --- a/src/optical_matrix_multiplication/propagator.py +++ b/src/optical_matrix_multiplication/propagator.py @@ -111,7 +111,6 @@ class Propagator(_ABC, _nn.Module): Распределение комплексной амплитуды светового поля, после распространения. """ - if (resul_shape is not None): field_shape = field.shape[-2:] operator_Y_shape = self.operator_Y.shape[-2:] diff --git a/src/train_gpt2.py b/src/train_gpt2.py index edee343..a5aa3d7 100644 --- a/src/train_gpt2.py +++ b/src/train_gpt2.py @@ -218,8 +218,9 @@ m = MODEL_CLASS( layers_num=layers_num ) m = m.to(device) -writer.add_text('model', str(m), 0) - +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) +# TODO for all experiments optimizer = torch.optim.AdamW(m.parameters(), lr=learning_rate, betas=(0.90, 0.95), weight_decay=0.01) #################################### Checkpoint Function ######################################### diff --git a/src/train_optics_trainable_focal_dist_lens_64.py b/src/train_optics_trainable_focal_dist_lens_64.py index a0aac71..87a9558 100644 --- a/src/train_optics_trainable_focal_dist_lens_64.py +++ b/src/train_optics_trainable_focal_dist_lens_64.py @@ -329,7 +329,8 @@ m = MODEL_CLASS( layers_num=layers_num ) m = m.to(device) -writer.add_text('model', str(m), 0) +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) #################################### Train ######################################### diff --git a/src/train_optics_trainable_lens_128.py b/src/train_optics_trainable_lens_128.py index 799f312..8c3f2dc 100644 --- a/src/train_optics_trainable_lens_128.py +++ b/src/train_optics_trainable_lens_128.py @@ -338,6 +338,8 @@ m = MODEL_CLASS( ) m = m.to(device) writer.add_text('model', str(m), 0) +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) #################################### Train ######################################### @@ -450,6 +452,7 @@ task_results = "\n".join([complete(m, encode(task_prompt), 32) for task_prompt i print(task_results) writer.add_text('completions/task', task_results, i+1) +m.log_trainable_optic_params(writer, max_iters) # Save final checkpoint save_checkpoint( model=m, diff --git a/src/train_optics_trainable_lens_256.py b/src/train_optics_trainable_lens_256.py index d7df7f1..59e5f31 100644 --- a/src/train_optics_trainable_lens_256.py +++ b/src/train_optics_trainable_lens_256.py @@ -238,7 +238,7 @@ class OpticGPT2TrainableScalarAndLens(nn.Module): ################################################################################################### batch_size = 50 -gradient_accumulation_steps = 5 # check this impl for correctness https://unsloth.ai/blog/gradient +gradient_accumulation_steps = 1 # check this impl for correctness https://unsloth.ai/blog/gradient max_iters = int(4e4) #40000 eval_interval = 300 learning_rate = 1e-3 @@ -338,6 +338,8 @@ m = MODEL_CLASS( ) m = m.to(device) writer.add_text('model', str(m), 0) +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) #################################### Train ######################################### @@ -450,6 +452,8 @@ task_results = "\n".join([complete(m, encode(task_prompt), 32) for task_prompt i print(task_results) writer.add_text('completions/task', task_results, i+1) +m.log_trainable_optic_params(writer, max_iters) + # Save final checkpoint save_checkpoint( model=m, diff --git a/src/train_optics_trainable_lens_512.py b/src/train_optics_trainable_lens_512.py index 16bd46e..9d081e2 100644 --- a/src/train_optics_trainable_lens_512.py +++ b/src/train_optics_trainable_lens_512.py @@ -238,7 +238,7 @@ class OpticGPT2TrainableScalarAndLens(nn.Module): ################################################################################################### batch_size = 50 -gradient_accumulation_steps = 10 # check this impl for correctness https://unsloth.ai/blog/gradient +gradient_accumulation_steps = 1 # check this impl for correctness https://unsloth.ai/blog/gradient max_iters = int(4e4) #40000 eval_interval = 300 learning_rate = 1e-3 @@ -338,6 +338,8 @@ m = MODEL_CLASS( ) m = m.to(device) writer.add_text('model', str(m), 0) +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) #################################### Train ######################################### @@ -450,6 +452,8 @@ task_results = "\n".join([complete(m, encode(task_prompt), 32) for task_prompt i print(task_results) writer.add_text('completions/task', task_results, i+1) +m.log_trainable_optic_params(writer, max_iters) + # Save final checkpoint save_checkpoint( model=m, diff --git a/src/train_optics_trainable_lens_64.py b/src/train_optics_trainable_lens_64.py index b3f4c3c..bbe5a73 100644 --- a/src/train_optics_trainable_lens_64.py +++ b/src/train_optics_trainable_lens_64.py @@ -337,7 +337,8 @@ m = MODEL_CLASS( layers_num=layers_num ) m = m.to(device) -writer.add_text('model', str(m), 0) +model_description = str(m) + f'\nParameters count - {sum(p.numel() for p in m.parameters())}' +writer.add_text('model', model_description, 0) #################################### Train ######################################### @@ -450,6 +451,7 @@ task_results = "\n".join([complete(m, encode(task_prompt), 32) for task_prompt i print(task_results) writer.add_text('completions/task', task_results, i+1) +m.log_trainable_optic_params(writer, max_iters) # Save final checkpoint save_checkpoint( model=m,