diff --git a/src/train_char_gpt2_128.py b/src/train_char_gpt2_128.py index a407bee..7207915 100644 --- a/src/train_char_gpt2_128.py +++ b/src/train_char_gpt2_128.py @@ -221,8 +221,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_256.py b/src/train_char_gpt2_256.py index b862708..48b0cf7 100644 --- a/src/train_char_gpt2_256.py +++ b/src/train_char_gpt2_256.py @@ -221,8 +221,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_512.py b/src/train_char_gpt2_512.py index b6d7137..d368aac 100644 --- a/src/train_char_gpt2_512.py +++ b/src/train_char_gpt2_512.py @@ -221,8 +221,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_64.py b/src/train_char_gpt2_64.py index 6a12c5a..e5c5276 100644 --- a/src/train_char_gpt2_64.py +++ b/src/train_char_gpt2_64.py @@ -221,8 +221,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_koef_128.py b/src/train_char_gpt2_koef_128.py index ee5814c..1bfab84 100644 --- a/src/train_char_gpt2_koef_128.py +++ b/src/train_char_gpt2_koef_128.py @@ -223,8 +223,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_koef_256.py b/src/train_char_gpt2_koef_256.py index d4aa19e..e58f8e9 100644 --- a/src/train_char_gpt2_koef_256.py +++ b/src/train_char_gpt2_koef_256.py @@ -223,8 +223,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_koef_512.py b/src/train_char_gpt2_koef_512.py index 093084a..e55283d 100644 --- a/src/train_char_gpt2_koef_512.py +++ b/src/train_char_gpt2_koef_512.py @@ -223,8 +223,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_char_gpt2_koef_64.py b/src/train_char_gpt2_koef_64.py index 6b68bba..47bd8c3 100644 --- a/src/train_char_gpt2_koef_64.py +++ b/src/train_char_gpt2_koef_64.py @@ -223,8 +223,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_128.py b/src/train_optics_char_gpt2_128.py index 1933153..aa2056d 100644 --- a/src/train_optics_char_gpt2_128.py +++ b/src/train_optics_char_gpt2_128.py @@ -327,8 +327,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_256.py b/src/train_optics_char_gpt2_256.py index 2634f9b..367c77b 100644 --- a/src/train_optics_char_gpt2_256.py +++ b/src/train_optics_char_gpt2_256.py @@ -327,8 +327,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_512.py b/src/train_optics_char_gpt2_512.py index e4eb00d..d9a1344 100644 --- a/src/train_optics_char_gpt2_512.py +++ b/src/train_optics_char_gpt2_512.py @@ -327,8 +327,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_64.py b/src/train_optics_char_gpt2_64.py index 877c41c..b158037 100644 --- a/src/train_optics_char_gpt2_64.py +++ b/src/train_optics_char_gpt2_64.py @@ -327,8 +327,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_ff.py b/src/train_optics_char_gpt2_ff.py index ac21ec3..5e6f8d3 100644 --- a/src/train_optics_char_gpt2_ff.py +++ b/src/train_optics_char_gpt2_ff.py @@ -326,8 +326,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_nokoef_128.py b/src/train_optics_char_gpt2_nokoef_128.py index b1d0977..ebaccff 100644 --- a/src/train_optics_char_gpt2_nokoef_128.py +++ b/src/train_optics_char_gpt2_nokoef_128.py @@ -325,8 +325,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_nokoef_256.py b/src/train_optics_char_gpt2_nokoef_256.py index 905408c..ff8613a 100644 --- a/src/train_optics_char_gpt2_nokoef_256.py +++ b/src/train_optics_char_gpt2_nokoef_256.py @@ -325,8 +325,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_nokoef_512.py b/src/train_optics_char_gpt2_nokoef_512.py index bd9b26c..5b500df 100644 --- a/src/train_optics_char_gpt2_nokoef_512.py +++ b/src/train_optics_char_gpt2_nokoef_512.py @@ -325,8 +325,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_char_gpt2_nokoef_64.py b/src/train_optics_char_gpt2_nokoef_64.py index 7dc825a..ff1a557 100644 --- a/src/train_optics_char_gpt2_nokoef_64.py +++ b/src/train_optics_char_gpt2_nokoef_64.py @@ -325,8 +325,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_trainable_focal_dist_lens_64.py b/src/train_optics_trainable_focal_dist_lens_64.py index 883e705..5f4779c 100644 --- a/src/train_optics_trainable_focal_dist_lens_64.py +++ b/src/train_optics_trainable_focal_dist_lens_64.py @@ -343,8 +343,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model ######################################### diff --git a/src/train_optics_trainable_lens_128.py b/src/train_optics_trainable_lens_128.py index 9a551d3..4192f39 100644 --- a/src/train_optics_trainable_lens_128.py +++ b/src/train_optics_trainable_lens_128.py @@ -341,8 +341,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_trainable_lens_256.py b/src/train_optics_trainable_lens_256.py index c802f4d..84275d1 100644 --- a/src/train_optics_trainable_lens_256.py +++ b/src/train_optics_trainable_lens_256.py @@ -341,8 +341,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_trainable_lens_512.py b/src/train_optics_trainable_lens_512.py index b090e0c..03963bd 100644 --- a/src/train_optics_trainable_lens_512.py +++ b/src/train_optics_trainable_lens_512.py @@ -341,8 +341,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################mo diff --git a/src/train_optics_trainable_lens_64.py b/src/train_optics_trainable_lens_64.py index 751147b..9f3f0f9 100644 --- a/src/train_optics_trainable_lens_64.py +++ b/src/train_optics_trainable_lens_64.py @@ -341,8 +341,6 @@ def perplexity(model, data, batch_size=32): # Progress update processed = min(i + batch_size, total_sequences) print(f"\rppl {processed}/{total_sequences} ({processed/total_sequences*100:.1f}%)", end="", flush=True) - - print() # Final newline return np.exp(total_loss_sum / total_tokens_count) #################################### Model #########################################