<fill-mask>Modify codes so that different accelerators can be called …

…according to specific device conditions (#863) * use get_accelerator to get device * bfloat16 --> bf16 --------- Co-authored-by: Olatunji Ruwase <[email protected]>
microsoft · Feb 26, 2024 · 6540db6 · 6540db6
1 parent 0ac02da
commit 6540db6
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 6 deletions.
diff --git a/inference/huggingface/fill-mask/test-bert.py b/inference/huggingface/fill-mask/test-bert.py
@@ -4,13 +4,14 @@
 import torch
 import os
 import argparse
+from deepspeed.accelerator import get_accelerator
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--model", "-m", type=str, help="hf model name")
-parser.add_argument("--dtype", type=str, default="fp16", help="fp16 or fp32")
+parser.add_argument("--dtype", type=str, default="fp16", help="fp16 or fp32 or bf16")
 parser.add_argument("--local_rank", type=int, default=0, help="local rank")
 parser.add_argument("--trials", type=int, default=8, help="number of trials")
-parser.add_argument("--kernel-inject", action="store_true", help="inject kernels on")
+parser.add_argument("--kernel_inject", action="store_true", help="inject kernels on")
 parser.add_argument("--graphs", action="store_true", help="CUDA Graphs on")
 parser.add_argument("--triton", action="store_true", help="triton kernels on")
 parser.add_argument("--deepspeed", action="store_true", help="use deepspeed inference")
@@ -26,11 +27,11 @@
     pipe.model,
     mp_size=world_size,
     dtype=torch.float16 if args.triton else torch.float,
-    replace_with_kernel_inject=True,
+    replace_with_kernel_inject=args.kernel_inject,
     use_triton=args.triton,
 )
 
-pipe.device = torch.device(f'cuda:{local_rank}')
+pipe.device = torch.device(get_accelerator().device_name(local_rank))
 output = pipe("In Autumn the [MASK] fall from the trees.")
 
 if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:

diff --git a/inference/huggingface/fill-mask/test-electra.py b/inference/huggingface/fill-mask/test-electra.py
@@ -4,6 +4,7 @@
 import torch
 import os
 from transformers.models.electra.modeling_electra import ElectraLayer
+from deepspeed.accelerator import get_accelerator
 
 local_rank = int(os.getenv('LOCAL_RANK', '0'))
 world_size = int(os.getenv('WORLD_SIZE', '4'))
@@ -21,7 +22,7 @@
     dtype=torch.float,
     injection_policy={ElectraLayer: ('output.dense')}
 )
-pipe.device = torch.device(f'cuda:{local_rank}')
+pipe.device = torch.device(get_accelerator().device_name(local_rank))
 output = pipe(f"HuggingFace is creating a {pipe.tokenizer.mask_token} that the community uses to solve NLP tasks.")
 
 if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:

diff --git a/inference/huggingface/fill-mask/test-roberta.py b/inference/huggingface/fill-mask/test-roberta.py
@@ -4,6 +4,7 @@
 import torch
 import os
 from transformers.models.roberta.modeling_roberta import RobertaLayer
+from deepspeed.accelerator import get_accelerator
 
 local_rank = int(os.getenv('LOCAL_RANK', '0'))
 world_size = int(os.getenv('WORLD_SIZE', '4'))
@@ -22,7 +23,7 @@
     injection_policy={RobertaLayer: ('output.dense')}
 )
 
-pipe.device = torch.device(f'cuda:{local_rank}')
+pipe.device = torch.device(get_accelerator().device_name(local_rank))
 output = pipe("The invention of the <mask> revolutionized the way we communicate with each other.")
 
 if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0: