mirror of
https://github.com/karpathy/nanoGPT.git
synced 2026-04-17 15:13:23 +02:00
fix: ensure non-zero learning rate during warmup at iteration 0
The warmup learning rate calculation has been modified to use (it + 1)/(warmup_iters + 1) instead of it/warmup_iters. This ensures a non-zero learning rate at iteration 0 while maintaining the same linear warmup behavior. Fixes #443
This commit is contained in:
2
train.py
2
train.py
@@ -231,7 +231,7 @@ def estimate_loss():
|
||||
def get_lr(it):
|
||||
# 1) linear warmup for warmup_iters steps
|
||||
if it < warmup_iters:
|
||||
return learning_rate * it / warmup_iters
|
||||
return learning_rate * (it + 1) / (warmup_iters + 1)
|
||||
# 2) if it > lr_decay_iters, return min learning rate
|
||||
if it > lr_decay_iters:
|
||||
return min_lr
|
||||
|
||||
Reference in New Issue
Block a user