From dc81cb368ce3e5761f298726fb3fade1b0ca5901 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 07:35:08 +0000 Subject: [PATCH] fix: ensure non-zero learning rate during warmup at iteration 0 The warmup learning rate calculation has been modified to use (it + 1)/(warmup_iters + 1) instead of it/warmup_iters. This ensures a non-zero learning rate at iteration 0 while maintaining the same linear warmup behavior. Fixes #443 --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 951bda99..de578505 100644 --- a/train.py +++ b/train.py @@ -231,7 +231,7 @@ def estimate_loss(): def get_lr(it): # 1) linear warmup for warmup_iters steps if it < warmup_iters: - return learning_rate * it / warmup_iters + return learning_rate * (it + 1) / (warmup_iters + 1) # 2) if it > lr_decay_iters, return min learning rate if it > lr_decay_iters: return min_lr