Skip to content

Commit

Permalink
count tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffery9 committed Mar 8, 2023
1 parent 6c72c29 commit 30b34a2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
22 changes: 22 additions & 0 deletions book_maker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,25 @@
"sinhalese": "si",
"castilian": "es",
}

import tiktoken

def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-3.5-turbo": # note: future models may deviate from this
num_tokens = 0
for message in messages:
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
bs4
openai
tiktoken
requests
ebooklib
rich
Expand Down

0 comments on commit 30b34a2

Please sign in to comment.