From ca03bd75df4a810eef26d1e58b2cef8cfbfffb24 Mon Sep 17 00:00:00 2001 From: Moshe Island Date: Tue, 3 Oct 2023 19:18:22 +0300 Subject: [PATCH] deepspeed-chat: fix incorrect lr when using lora only (#756) When using lora only, get_optimizer_grouped_parameters() returns a list of 3 parameter groups, where only the second is not empty. Then, deepspeed removes empty parameter groups. [ref: DeepSpeedEngine._configure_optimizer() deepspeed v0.10.3] However, the lr_scheduler still contains 3 groups. This causes the lr scheduler to update the lora params with the wrong lr. Fix it by removing all empty groups in get_optimizer_grouped_parameters(). Change-Id: I520841312bdedd6a572cf4c827e0bbf06f983575 Signed-off-by: Moshe Island Co-authored-by: Moshe Island --- applications/DeepSpeed-Chat/training/utils/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/applications/DeepSpeed-Chat/training/utils/utils.py b/applications/DeepSpeed-Chat/training/utils/utils.py index 99106393a..56adc5d38 100644 --- a/applications/DeepSpeed-Chat/training/utils/utils.py +++ b/applications/DeepSpeed-Chat/training/utils/utils.py @@ -209,9 +209,12 @@ def get_optimizer_grouped_parameters( 0.0, }, ] - if not optimizer_grouped_parameters[1]["params"]: - optimizer_grouped_parameters.pop(1) - return optimizer_grouped_parameters + + non_empty_groups = [] + for group in optimizer_grouped_parameters: + if group["params"]: + non_empty_groups.append(group) + return non_empty_groups def _z3_params_to_fetch(param_list):