forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 387
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
# Conflicts: # .github/workflows/build.yml # .gitignore # CMakeLists.txt # Makefile
- Loading branch information
Showing
28 changed files
with
1,087 additions
and
727 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// swift-tools-version:5.3 | ||
|
||
import PackageDescription | ||
|
||
let package = Package( | ||
name: "llama", | ||
products: [ | ||
.library(name: "llama", targets: ["llama"]), | ||
], | ||
targets: [ | ||
.target( | ||
name: "llama", | ||
path: ".", | ||
sources: ["ggml.c", "llama.cpp"], | ||
publicHeadersPath: "spm-headers", | ||
cSettings: [.unsafeFlags(["-Wno-shorten-64-to-32"])] | ||
), | ||
], | ||
cxxLanguageStandard: .cxx11 | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/usr/bin/env python3 | ||
# Original by https://github.com/eiz | ||
# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818 | ||
import argparse | ||
import glob | ||
import os | ||
import struct | ||
import sys | ||
from sentencepiece import SentencePieceProcessor | ||
|
||
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Upgrade old ggml model files to the current format') | ||
parser.add_argument('dir_model', help='directory containing ggml .bin files') | ||
parser.add_argument('tokenizer_model', help='path to LLaMA tokenizer.model file') | ||
return parser.parse_args() | ||
|
||
def read_header(f_in): | ||
struct_fmt = "i" * (3 + len(HPARAMS)) | ||
struct_size = struct.calcsize(struct_fmt) | ||
buf = f_in.read(struct_size) | ||
return struct.unpack(struct_fmt, buf) | ||
|
||
def write_header(f_out, header): | ||
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header | ||
|
||
if magic != 0x67676d6c: | ||
raise Exception('Invalid file magic. Must be an old style ggml file.') | ||
|
||
values = [ | ||
0x67676d66, # magic: ggml in hex | ||
1, # file version | ||
vocab_size, | ||
dim, | ||
multiple_of, | ||
n_heads, | ||
n_layers, | ||
rot, | ||
ftype | ||
] | ||
f_out.write(struct.pack("i" * len(values), *values)) | ||
|
||
def write_tokens(fout, tokenizer): | ||
for i in range(tokenizer.vocab_size()): | ||
if tokenizer.is_unknown(i): | ||
text = " \u2047 ".encode("utf-8") | ||
elif tokenizer.is_control(i): | ||
text = b"" | ||
elif tokenizer.is_byte(i): | ||
piece = tokenizer.id_to_piece(i) | ||
if len(piece) != 6: | ||
print(f"Invalid token: {piece}") | ||
sys.exit(1) | ||
byte_value = int(piece[3:-1], 16) | ||
text = struct.pack("B", byte_value) | ||
else: | ||
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") | ||
fout.write(struct.pack("i", len(text))) | ||
fout.write(text) | ||
fout.write(struct.pack("f", tokenizer.get_score(i))) | ||
|
||
def read_tokens(f_in, tokenizer): | ||
for i in range(tokenizer.vocab_size()): | ||
len_b = f_in.read(4) | ||
(length,) = struct.unpack("i", len_b) | ||
f_in.read(length) | ||
|
||
def copy_all_data(f_out, f_in): | ||
while True: | ||
buf = f_in.read(1024 * 1024) | ||
if not buf: | ||
break | ||
f_out.write(buf) | ||
|
||
def convert_one_file(path_in, tokenizer): | ||
path_tmp = f"{path_in}.tmp" | ||
path_orig= f"{path_in}.orig" | ||
print(f"converting {path_in}") | ||
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out: | ||
write_header(f_out, read_header(f_in)) | ||
read_tokens(f_in, tokenizer) | ||
write_tokens(f_out, tokenizer) | ||
copy_all_data(f_out, f_in) | ||
os.rename(path_in, path_orig) | ||
os.rename(path_tmp, path_in) | ||
|
||
def main(): | ||
args = parse_args() | ||
files = [] | ||
files.extend(glob.glob(f"{args.dir_model}/*.bin")) | ||
files.extend(glob.glob(f"{args.dir_model}/*.bin.*")) | ||
|
||
tokenizer = SentencePieceProcessor(args.tokenizer_model) | ||
|
||
for file in files: | ||
convert_one_file(file, tokenizer) | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.