Skip to content

Commit

Permalink
Merge branch 'master' into concedo
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/build.yml
#	.gitignore
#	CMakeLists.txt
#	Makefile
  • Loading branch information
LostRuins committed Mar 29, 2023
2 parents 2713072 + 5a5f8b1 commit 49c4c22
Show file tree
Hide file tree
Showing 28 changed files with 1,087 additions and 727 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
.vscode/
.DS_Store

.build/
build/
build-em/
build-debug/
Expand All @@ -20,12 +21,17 @@ models/*
/quantize
/result
/perplexity
/embedding

arm_neon.h
compile_commands.json

.envrc
.direnv/

.venv
__pycache__
.swiftpm

dist/
llama_for_kobold.spec
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
LDFLAGS =

# warnings
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function


#lets try enabling everything
CFLAGS += -pthread -mf16c -mfma -mavx2 -mavx -msse3
CXXFLAGS += -pthread
Expand Down
20 changes: 20 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// swift-tools-version:5.3

import PackageDescription

let package = Package(
name: "llama",
products: [
.library(name: "llama", targets: ["llama"]),
],
targets: [
.target(
name: "llama",
path: ".",
sources: ["ggml.c", "llama.cpp"],
publicHeadersPath: "spm-headers",
cSettings: [.unsafeFlags(["-Wno-shorten-64-to-32"])]
),
],
cxxLanguageStandard: .cxx11
)
2 changes: 0 additions & 2 deletions convert-pth-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,11 @@ def main():

print(f"Extracting only the vocab from '{fname_model}'\n")

model = torch.load(fname_model, map_location="cpu")

with open(fname_out, "wb") as fout:
write_header(fout, hparams, ftype)
write_tokens(fout, tokenizer)

del model

print(f"Done. Output file: {fname_out}\n")

Expand Down
100 changes: 100 additions & 0 deletions convert-unversioned-ggml-to-ggml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python3
# Original by https://github.com/eiz
# https://github.com/ggerganov/llama.cpp/issues/324#issuecomment-1476227818
import argparse
import glob
import os
import struct
import sys
from sentencepiece import SentencePieceProcessor

HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]

def parse_args():
parser = argparse.ArgumentParser(description='Upgrade old ggml model files to the current format')
parser.add_argument('dir_model', help='directory containing ggml .bin files')
parser.add_argument('tokenizer_model', help='path to LLaMA tokenizer.model file')
return parser.parse_args()

def read_header(f_in):
struct_fmt = "i" * (3 + len(HPARAMS))
struct_size = struct.calcsize(struct_fmt)
buf = f_in.read(struct_size)
return struct.unpack(struct_fmt, buf)

def write_header(f_out, header):
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header

if magic != 0x67676d6c:
raise Exception('Invalid file magic. Must be an old style ggml file.')

values = [
0x67676d66, # magic: ggml in hex
1, # file version
vocab_size,
dim,
multiple_of,
n_heads,
n_layers,
rot,
ftype
]
f_out.write(struct.pack("i" * len(values), *values))

def write_tokens(fout, tokenizer):
for i in range(tokenizer.vocab_size()):
if tokenizer.is_unknown(i):
text = " \u2047 ".encode("utf-8")
elif tokenizer.is_control(i):
text = b""
elif tokenizer.is_byte(i):
piece = tokenizer.id_to_piece(i)
if len(piece) != 6:
print(f"Invalid token: {piece}")
sys.exit(1)
byte_value = int(piece[3:-1], 16)
text = struct.pack("B", byte_value)
else:
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
fout.write(struct.pack("i", len(text)))
fout.write(text)
fout.write(struct.pack("f", tokenizer.get_score(i)))

def read_tokens(f_in, tokenizer):
for i in range(tokenizer.vocab_size()):
len_b = f_in.read(4)
(length,) = struct.unpack("i", len_b)
f_in.read(length)

def copy_all_data(f_out, f_in):
while True:
buf = f_in.read(1024 * 1024)
if not buf:
break
f_out.write(buf)

def convert_one_file(path_in, tokenizer):
path_tmp = f"{path_in}.tmp"
path_orig= f"{path_in}.orig"
print(f"converting {path_in}")
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
write_header(f_out, read_header(f_in))
read_tokens(f_in, tokenizer)
write_tokens(f_out, tokenizer)
copy_all_data(f_out, f_in)
os.rename(path_in, path_orig)
os.rename(path_tmp, path_in)

def main():
args = parse_args()
files = []
files.extend(glob.glob(f"{args.dir_model}/*.bin"))
files.extend(glob.glob(f"{args.dir_model}/*.bin.*"))

tokenizer = SentencePieceProcessor(args.tokenizer_model)

for file in files:
convert_one_file(file, tokenizer)

if __name__ == "__main__":
main()
Loading

0 comments on commit 49c4c22

Please sign in to comment.