Skip to content

Commit

Permalink
Update Human Eval example script
Browse files Browse the repository at this point in the history
  • Loading branch information
lekurile committed Feb 22, 2024
1 parent 605e060 commit b55daca
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions evaluation/inference/human_eval/run_human_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from human_eval.evaluation import evaluate_functional_correctness

parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, help="evaluation model name")
parser.add_argument("--max-tokens", type=int, default=256, help="max new tokens")
parser.add_argument("--num-samples-per-task", type=int, default=10, help="number of samples to gen/eval per task")
parser.add_argument("--model", "-m", type=str, default="codellama/CodeLlama-7b-Python-hf", help="evaluation model name")
parser.add_argument("--max-tokens", type=int, default=512, help="max new tokens")
parser.add_argument("--num-samples-per-task", type=int, default=20, help="number of samples to gen/eval per task")
parser.add_argument("--local_rank", type=int, default=int(os.getenv("LOCAL_RANK", "0")), help="local rank")
args = parser.parse_args()

Expand All @@ -27,7 +27,9 @@ def generate_samples(generation_function):
for _ in range(args.num_samples_per_task)
]
return samples
# TODO (lekurile): Move these functions to utils in DSE repo (TBD)

print("Loading Problems")
problems = read_problems("human-eval/data/HumanEval.jsonl.gz")

print("Initializing HuggingFace Pipeline")
device = torch.device(get_accelerator().device_name(args.local_rank))
Expand All @@ -36,18 +38,22 @@ def generate_samples(generation_function):
max_length=args.max_tokens,
return_full_text=False)

print("Initializing DeepSpeed-MII Pipeline")
mii_pipe = mii.pipeline(args.model)

print("Loading Problems")
problems = read_problems("human-eval/data/HumanEval.jsonl.gz")

print("Generating Base Samples")
base_samples = generate_samples(generate_base_completion)

print("Base Pipeline Teardown")
del base_pipe
torch.cuda.empty_cache()

print("Initializing DeepSpeed-MII Pipeline")
mii_pipe = mii.pipeline(args.model)

print("Generating MII Samples")
mii_samples = generate_samples(generate_mii_completion)

print("MII Pipeline Teardown")
mii_pipe.destroy()

print("Writing Samples")
write_jsonl("base_samples.jsonl", base_samples)
write_jsonl("mii_samples.jsonl", mii_samples)
Expand All @@ -59,7 +65,5 @@ def generate_samples(generation_function):
print(f"Base Results = {base_results}")
print(f"MII Results = {mii_results}")

print("Executing Assertions")
for key in base_results.keys():
assert numpy.allclose(base_results[key], mii_results[key], rtol=0.2), \
f"Base result: {base_results[key]}, MII result: {mii_results[key]}, outside of rtol."
print(f"{key} - Base Result: {base_results[key]}, MII result: {mii_results[key]}")

0 comments on commit b55daca

Please sign in to comment.