Skip to content

Commit

Permalink
refactor code and resolve conflicts with upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffery9 committed Mar 12, 2023
1 parent 9d242a7 commit 7eeb4d2
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 56 deletions.
107 changes: 52 additions & 55 deletions book_maker/translator/chatgptapi_translator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import time

import openai
from ..utils import num_tokens_from_messages
from os import environ

from .base_translator import Base
from ..utils import num_tokens_from_messages


PROMPT_ENV_MAP = {
Expand Down Expand Up @@ -59,8 +59,9 @@ def get_translation(self, text):
text=text, language=self.language
),
}
]
count_tokens = num_tokens_from_messages(message_log)
)

count_tokens = num_tokens_from_messages(messages)
consumed_tokens = 0
t_text = ""
if count_tokens > 4000:
Expand All @@ -75,71 +76,67 @@ def get_translation(self, text):
text_segment = text_list[n * splits : (n + 1) * splits]
sub_text = ".".join(text_segment)
print(sub_text)
message_log = []

if self.prompt_sys_msg:
message_log.append(
{"role": "system", "content": self.prompt_sys_msg},
)

message_log.append(
{
"role": "user",
"content": self.prompt_template.format(
text=sub_text, language=self.language
),
}
)

t_sub_text, completion = self.call_chatgpt(message_log)
print(t_sub_text)
consumed_tokens += completion["usage"]["prompt_tokens"]

t_text = t_text + t_sub_text

else:

t_sub_text, completion = self.call_chatgpt(messages)
consumed_tokens += completion["usage"]["prompt_tokens"]

print(f"{consumed_tokens} prompt tokens used.")
return t_text

def call_chatgpt(self, message_log):
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
model="gpt-3.5-turbo", messages=message_log
)
t_text = (
t_sub_text = (
completion["choices"][0]
.get("message")
.get("content")
.encode("utf8")
.decode()
)
return t_text
consumed_tokens += completion["usage"]["prompt_tokens"]

return t_sub_text, completion

def translate(self, text):
# todo: Determine whether to print according to the cli option
print(text)

else:
try:
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
# english prompt here to save tokens
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
}
],
)
t_text = (
completion["choices"][0]
.get("message")
.get("content")
.encode("utf8")
.decode()
)
consumed_tokens += completion["usage"]["prompt_tokens"]

except Exception as e:
# TIME LIMIT for open api please pay
key_len = self.key.count(",") + 1
sleep_time = int(60 / key_len)
time.sleep(sleep_time)
print(e, f"will sleep {sleep_time} seconds")
self.rotate_key()
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
}
],
)
t_text = (
completion["choices"][0]
.get("message")
.get("content")
.encode("utf8")
.decode()
)
consumed_tokens += completion["usage"]["prompt_tokens"]
try:
t_text = self.get_translation(text)
except Exception as e:
# todo: better sleep time? why sleep alawys about key_len
# 1. openai server error or own network interruption, sleep for a fixed time
# 2. an apikey has no money or reach limit, don’t sleep, just replace it with another apikey
# 3. all apikey reach limit, then use current sleep
sleep_time = int(60 / self.key_len)
print(e, f"will sleep {sleep_time} seconds")
time.sleep(sleep_time)

print(t_text)
print(f"{consumed_tokens} prompt tokens used.")
t_text = self.get_translation(text)

# todo: Determine whether to print according to the cli option
print(t_text.strip())
return t_text
2 changes: 1 addition & 1 deletion book_maker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(
print(
f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
Expand Down

0 comments on commit 7eeb4d2

Please sign in to comment.