Skip to content

Commit

Permalink
<fill-mask>Modify codes so that different accelerators can be called …
Browse files Browse the repository at this point in the history
…according to specific device conditions (#863)

* use get_accelerator to get device

* bfloat16 --> bf16

---------

Co-authored-by: Olatunji Ruwase <[email protected]>
  • Loading branch information
foin6 and tjruwase authored Feb 26, 2024
1 parent 0ac02da commit 6540db6
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
9 changes: 5 additions & 4 deletions inference/huggingface/fill-mask/test-bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import torch
import os
import argparse
from deepspeed.accelerator import get_accelerator

parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, help="hf model name")
parser.add_argument("--dtype", type=str, default="fp16", help="fp16 or fp32")
parser.add_argument("--dtype", type=str, default="fp16", help="fp16 or fp32 or bf16")
parser.add_argument("--local_rank", type=int, default=0, help="local rank")
parser.add_argument("--trials", type=int, default=8, help="number of trials")
parser.add_argument("--kernel-inject", action="store_true", help="inject kernels on")
parser.add_argument("--kernel_inject", action="store_true", help="inject kernels on")
parser.add_argument("--graphs", action="store_true", help="CUDA Graphs on")
parser.add_argument("--triton", action="store_true", help="triton kernels on")
parser.add_argument("--deepspeed", action="store_true", help="use deepspeed inference")
Expand All @@ -26,11 +27,11 @@
pipe.model,
mp_size=world_size,
dtype=torch.float16 if args.triton else torch.float,
replace_with_kernel_inject=True,
replace_with_kernel_inject=args.kernel_inject,
use_triton=args.triton,
)

pipe.device = torch.device(f'cuda:{local_rank}')
pipe.device = torch.device(get_accelerator().device_name(local_rank))
output = pipe("In Autumn the [MASK] fall from the trees.")

if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
Expand Down
3 changes: 2 additions & 1 deletion inference/huggingface/fill-mask/test-electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
import os
from transformers.models.electra.modeling_electra import ElectraLayer
from deepspeed.accelerator import get_accelerator

local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '4'))
Expand All @@ -21,7 +22,7 @@
dtype=torch.float,
injection_policy={ElectraLayer: ('output.dense')}
)
pipe.device = torch.device(f'cuda:{local_rank}')
pipe.device = torch.device(get_accelerator().device_name(local_rank))
output = pipe(f"HuggingFace is creating a {pipe.tokenizer.mask_token} that the community uses to solve NLP tasks.")

if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
Expand Down
3 changes: 2 additions & 1 deletion inference/huggingface/fill-mask/test-roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
import os
from transformers.models.roberta.modeling_roberta import RobertaLayer
from deepspeed.accelerator import get_accelerator

local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '4'))
Expand All @@ -22,7 +23,7 @@
injection_policy={RobertaLayer: ('output.dense')}
)

pipe.device = torch.device(f'cuda:{local_rank}')
pipe.device = torch.device(get_accelerator().device_name(local_rank))
output = pipe("The invention of the <mask> revolutionized the way we communicate with each other.")

if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
Expand Down

0 comments on commit 6540db6

Please sign in to comment.