main
Vladimir 1 month ago
parent fd482cc559
commit d66af64937

@ -269,13 +269,13 @@ def test(start_time, epoch, batches_per_epoch, batch_size, model, optimizer, cre
######################################### Training ################################################################
h_dim = 32
h_dim = 64
category_feature_dim = 8
layers_num = 6
num_heads = 2
class_num = 1
dataset_dropout_rate = 0.4
classifier_dropout_date = 0.4
features_dropout_rate = 0.4
model_dropout_date = 0.4
epochs = 500
batch_size = 30000
datasets_per_epoch = 1
@ -285,7 +285,7 @@ comment = sys.argv[1]
logs_dir = f'runs/{datetime.now().date()}_{datetime.now().hour:02d}_{datetime.now().minute:02d}_{datetime.now().second:02d}_{comment}/'
writer = SummaryWriter(logs_dir)
сheсkpoints_dir = f'checkpoints/{datetime.now().date()}_{datetime.now().hour:02d}_{datetime.now().minute:02d}_{datetime.now().second:02d}_{comment}/'
script_snapshot_path = Path(logs_dir + "bert_training_ddp.py")
script_snapshot_path = Path(logs_dir + Path(sys.argv[0]).name)
Path(сheсkpoints_dir).mkdir(parents=True, exist_ok=True)
print("Logs dir:", logs_dir)
print("Chekpoints dir:", сheсkpoints_dir)
@ -298,7 +298,7 @@ credit_train_dataset = CreditProductsDataset(
targets_path="/wd/data/train_target.csv",
train_uniq_client_ids_path=f"/wd/fold3_train_ids.csv",
test_uniq_client_ids_path=f"/wd/fold3_test_ids.csv",
dropout_rate=dataset_dropout_rate
dropout_rate=features_dropout_rate
)
print(f"Dataset preparation time: {datetime.now() - start_prep_time}")
@ -316,7 +316,7 @@ classifier = BertClassifier(
h_dim=h_dim,
class_num=class_num,
max_seq_len=credit_train_dataset.max_user_history,
dropout_rate = classifier_dropout_date
dropout_rate = model_dropout_date
)
model = Model(encoder=encoder, classifier=classifier).to("cuda")
@ -424,4 +424,4 @@ finally:
rocauc=rocauc,
сheсkpoints_dir=сheсkpoints_dir
)
writer.close()
writer.close()

@ -329,7 +329,7 @@ if __name__ == "__main__":
logs_dir = f'runs/{datetime.now().date()}_{datetime.now().hour:02d}_{datetime.now().minute:02d}_{datetime.now().second:02d}_{comment}/'
writer = SummaryWriter(logs_dir)
сheсkpoints_dir = f'checkpoints/{datetime.now().date()}_{datetime.now().hour:02d}_{datetime.now().minute:02d}_{datetime.now().second:02d}_{comment}/'
script_snapshot_path = Path(logs_dir + "bert_training_ddp.py")
script_snapshot_path = Path(logs_dir + Path(sys.argv[0]).name)
Path(сheсkpoints_dir).mkdir(parents=True, exist_ok=True)
print("Logs dir:", logs_dir)
print("Chekpoints dir:", сheсkpoints_dir)
@ -449,4 +449,4 @@ if __name__ == "__main__":
if rank == 0:
writer.close()
torch.distributed.destroy_process_group()

@ -24,7 +24,7 @@ Path(сheсkpoints_dir).mkdir(parents=True, exist_ok=True)
print("Logs dir:", logs_dir)
print("Chekpoints dir:", сheсkpoints_dir)
writer = SummaryWriter(logs_dir)
script_snapshot_path = Path(logs_dir + "bert_training.py")
script_snapshot_path = Path(logs_dir + Path(sys.argv[0]).name)
script_snapshot_path.write_bytes(Path(sys.argv[0]).read_bytes()) # copy this version of script
script_snapshot_path.chmod(0o400) # with read-only permission
@ -366,4 +366,4 @@ finally:
credit_dataset=credit_train_dataset,
encoder = encoder, model=model, optimizer=optimizer, epoch=epoch+1,
loss=loss.item(), rocauc=test_auroc.compute().item(), сheсkpoints_dir=сheсkpoints_dir)
writer.close()
writer.close()

Loading…
Cancel
Save