From 2515379aed192f8709c6fabacc0b8ca3bf2a31c2 Mon Sep 17 00:00:00 2001
From: Jeffery CHEN Fan <jeffery9@gmail.com>
Date: Wed, 8 Mar 2023 12:45:00 +0800
Subject: [PATCH 1/5] count tokens

---
 book_maker/utils.py | 22 ++++++++++++++++++++++
 requirements.txt    |  1 +
 2 files changed, 23 insertions(+)

diff --git a/book_maker/utils.py b/book_maker/utils.py
index cfa74a41..9e593cdc 100644
--- a/book_maker/utils.py
+++ b/book_maker/utils.py
@@ -118,6 +118,28 @@
     "castilian": "es",
 }
 
+import tiktoken
+
+def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
+    """Returns the number of tokens used by a list of messages."""
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model == "gpt-3.5-turbo":  # note: future models may deviate from this
+        num_tokens = 0
+        for message in messages:
+            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(value))
+                if key == "name":  # if there's a name, the role is omitted
+                    num_tokens += -1  # role is always required and always 1 token
+        num_tokens += 2  # every reply is primed with <im_start>assistant
+        return num_tokens
+    else:
+        raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
+See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
+
 
 def prompt_config_to_kwargs(prompt_config):
     prompt_config = prompt_config or {}
diff --git a/requirements.txt b/requirements.txt
index 53a9d591..a38f5493 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 bs4
 openai
+tiktoken
 requests
 ebooklib
 rich

From 084b7a617fa7e4a1f67d2f9745ff35274ce9d380 Mon Sep 17 00:00:00 2001
From: Jeffery CHEN Fan <jeffery9@gmail.com>
Date: Wed, 8 Mar 2023 14:38:00 +0800
Subject: [PATCH 2/5] transalte in splits for token larger than 4096

---
 book_maker/translator/chatgptapi_translator.py |  1 +
 book_maker/utils.py                            | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 0850b941..47abe201 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -1,6 +1,7 @@
 import time
 
 import openai
+from ..utils import num_tokens_from_messages
 from os import environ
 
 from .base_translator import Base
diff --git a/book_maker/utils.py b/book_maker/utils.py
index 9e593cdc..da5e3506 100644
--- a/book_maker/utils.py
+++ b/book_maker/utils.py
@@ -120,6 +120,7 @@
 
 import tiktoken
 
+
 def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
     """Returns the number of tokens used by a list of messages."""
     try:
@@ -129,7 +130,9 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
     if model == "gpt-3.5-turbo":  # note: future models may deviate from this
         num_tokens = 0
         for message in messages:
-            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            num_tokens += (
+                4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            )
             for key, value in message.items():
                 num_tokens += len(encoding.encode(value))
                 if key == "name":  # if there's a name, the role is omitted
@@ -137,8 +140,10 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
         num_tokens += 2  # every reply is primed with <im_start>assistant
         return num_tokens
     else:
-        raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
-See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not presently implemented for model {model}.
+See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
 
 
 def prompt_config_to_kwargs(prompt_config):

From 9d242a7f6268d8c4d9e6bf6825e854d327f70762 Mon Sep 17 00:00:00 2001
From: jeffery <jeffery9@outlook.com>
Date: Wed, 8 Mar 2023 15:31:27 +0800
Subject: [PATCH 3/5] count token used.

---
 .../translator/chatgptapi_translator.py       | 77 +++++++++++++++----
 1 file changed, 63 insertions(+), 14 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index 47abe201..adfcb619 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -59,7 +59,22 @@ def get_translation(self, text):
                     text=text, language=self.language
                 ),
             }
-        )
+        ]
+        count_tokens = num_tokens_from_messages(message_log)
+        consumed_tokens = 0
+        t_text = ""
+        if count_tokens > 4000:
+            print("too long!")
+
+            splits = count_tokens // 4000 + 1
+
+            text_list = text.split(".")
+            sub_text = ""
+            t_sub_text = ""
+            for n in range(splits):
+                text_segment = text_list[n * splits : (n + 1) * splits]
+                sub_text = ".".join(text_segment)
+                print(sub_text)
 
         completion = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
@@ -73,24 +88,58 @@ def get_translation(self, text):
             .decode()
         )
         return t_text
+                consumed_tokens += completion["usage"]["prompt_tokens"]
 
     def translate(self, text):
         # todo: Determine whether to print according to the cli option
         print(text)
 
-        try:
-            t_text = self.get_translation(text)
-        except Exception as e:
-            # todo: better sleep time? why sleep alawys about key_len
-            # 1. openai server error or own network interruption, sleep for a fixed time
-            # 2. an apikey has no money or reach limit, don’t sleep, just replace it with another apikey
-            # 3. all apikey reach limit, then use current sleep
-            sleep_time = int(60 / self.key_len)
-            print(e, f"will sleep {sleep_time} seconds")
-            time.sleep(sleep_time)
+            else:
+                try:
+                    completion = openai.ChatCompletion.create(
+                        model="gpt-3.5-turbo",
+                        messages=[
+                            {
+                                "role": "user",
+                                # english prompt here to save tokens
+                                "content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
+                            }
+                        ],
+                    )
+                    t_text = (
+                        completion["choices"][0]
+                        .get("message")
+                        .get("content")
+                        .encode("utf8")
+                        .decode()
+                    )
+                    consumed_tokens += completion["usage"]["prompt_tokens"]
 
-            t_text = self.get_translation(text)
+                except Exception as e:
+                    # TIME LIMIT for open api please pay
+                    key_len = self.key.count(",") + 1
+                    sleep_time = int(60 / key_len)
+                    time.sleep(sleep_time)
+                    print(e, f"will sleep  {sleep_time} seconds")
+                    self.rotate_key()
+                    completion = openai.ChatCompletion.create(
+                        model="gpt-3.5-turbo",
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
+                            }
+                        ],
+                    )
+                    t_text = (
+                        completion["choices"][0]
+                        .get("message")
+                        .get("content")
+                        .encode("utf8")
+                        .decode()
+                    )
+                    consumed_tokens += completion["usage"]["prompt_tokens"]
 
-        # todo: Determine whether to print according to the cli option
-        print(t_text.strip())
+        print(t_text)
+        print(f"{consumed_tokens} prompt tokens used.")
         return t_text

From 7eeb4d25a2dab92e9e7a356a7afbccd5edfb3782 Mon Sep 17 00:00:00 2001
From: jeffery <jeffery9@outlook.com>
Date: Sun, 12 Mar 2023 17:35:44 +0800
Subject: [PATCH 4/5] refactor code and resolve conflicts with upstream

---
 .../translator/chatgptapi_translator.py       | 107 +++++++++---------
 book_maker/utils.py                           |   2 +-
 2 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index adfcb619..c973098b 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -1,10 +1,10 @@
 import time
 
 import openai
-from ..utils import num_tokens_from_messages
 from os import environ
 
 from .base_translator import Base
+from ..utils import num_tokens_from_messages
 
 
 PROMPT_ENV_MAP = {
@@ -59,8 +59,9 @@ def get_translation(self, text):
                     text=text, language=self.language
                 ),
             }
-        ]
-        count_tokens = num_tokens_from_messages(message_log)
+        )
+
+        count_tokens = num_tokens_from_messages(messages)
         consumed_tokens = 0
         t_text = ""
         if count_tokens > 4000:
@@ -75,71 +76,67 @@ def get_translation(self, text):
                 text_segment = text_list[n * splits : (n + 1) * splits]
                 sub_text = ".".join(text_segment)
                 print(sub_text)
+                message_log = []
+
+                if self.prompt_sys_msg:
+                    message_log.append(
+                        {"role": "system", "content": self.prompt_sys_msg},
+                    )
+
+                message_log.append(
+                    {
+                        "role": "user",
+                        "content": self.prompt_template.format(
+                            text=sub_text, language=self.language
+                        ),
+                    }
+                )
+
+                t_sub_text, completion = self.call_chatgpt(message_log)
+                print(t_sub_text)
+                consumed_tokens += completion["usage"]["prompt_tokens"]
+
+                t_text = t_text + t_sub_text
+
+        else:
+
+            t_sub_text, completion = self.call_chatgpt(messages)
+            consumed_tokens += completion["usage"]["prompt_tokens"]
+
+        print(f"{consumed_tokens} prompt tokens used.")
+        return t_text
 
+    def call_chatgpt(self, message_log):
         completion = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=messages,
+            model="gpt-3.5-turbo", messages=message_log
         )
-        t_text = (
+        t_sub_text = (
             completion["choices"][0]
             .get("message")
             .get("content")
             .encode("utf8")
             .decode()
         )
-        return t_text
-                consumed_tokens += completion["usage"]["prompt_tokens"]
+
+        return t_sub_text, completion
 
     def translate(self, text):
         # todo: Determine whether to print according to the cli option
         print(text)
 
-            else:
-                try:
-                    completion = openai.ChatCompletion.create(
-                        model="gpt-3.5-turbo",
-                        messages=[
-                            {
-                                "role": "user",
-                                # english prompt here to save tokens
-                                "content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
-                            }
-                        ],
-                    )
-                    t_text = (
-                        completion["choices"][0]
-                        .get("message")
-                        .get("content")
-                        .encode("utf8")
-                        .decode()
-                    )
-                    consumed_tokens += completion["usage"]["prompt_tokens"]
-
-                except Exception as e:
-                    # TIME LIMIT for open api please pay
-                    key_len = self.key.count(",") + 1
-                    sleep_time = int(60 / key_len)
-                    time.sleep(sleep_time)
-                    print(e, f"will sleep  {sleep_time} seconds")
-                    self.rotate_key()
-                    completion = openai.ChatCompletion.create(
-                        model="gpt-3.5-turbo",
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
-                            }
-                        ],
-                    )
-                    t_text = (
-                        completion["choices"][0]
-                        .get("message")
-                        .get("content")
-                        .encode("utf8")
-                        .decode()
-                    )
-                    consumed_tokens += completion["usage"]["prompt_tokens"]
+        try:
+            t_text = self.get_translation(text)
+        except Exception as e:
+            # todo: better sleep time? why sleep alawys about key_len
+            # 1. openai server error or own network interruption, sleep for a fixed time
+            # 2. an apikey has no money or reach limit, don’t sleep, just replace it with another apikey
+            # 3. all apikey reach limit, then use current sleep
+            sleep_time = int(60 / self.key_len)
+            print(e, f"will sleep {sleep_time} seconds")
+            time.sleep(sleep_time)
 
-        print(t_text)
-        print(f"{consumed_tokens} prompt tokens used.")
+            t_text = self.get_translation(text)
+
+        # todo: Determine whether to print according to the cli option
+        print(t_text.strip())
         return t_text
diff --git a/book_maker/utils.py b/book_maker/utils.py
index da5e3506..2265c0d6 100644
--- a/book_maker/utils.py
+++ b/book_maker/utils.py
@@ -140,7 +140,7 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
         num_tokens += 2  # every reply is primed with <im_start>assistant
         return num_tokens
     else:
-        raise NotImplementedError(
+        print(
             f"""num_tokens_from_messages() is not presently implemented for model {model}.
 See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
         )

From f43c778379f7822ce91072737400e9cf1eb3a7f2 Mon Sep 17 00:00:00 2001
From: jeffery <jeffery9@outlook.com>
Date: Sun, 12 Mar 2023 17:41:35 +0800
Subject: [PATCH 5/5] fix var

---
 book_maker/translator/chatgptapi_translator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py
index c973098b..50ad906d 100644
--- a/book_maker/translator/chatgptapi_translator.py
+++ b/book_maker/translator/chatgptapi_translator.py
@@ -110,7 +110,7 @@ def call_chatgpt(self, message_log):
         completion = openai.ChatCompletion.create(
             model="gpt-3.5-turbo", messages=message_log
         )
-        t_sub_text = (
+        t_text = (
             completion["choices"][0]
             .get("message")
             .get("content")
@@ -118,7 +118,7 @@ def call_chatgpt(self, message_log):
             .decode()
         )
 
-        return t_sub_text, completion
+        return t_text, completion
 
     def translate(self, text):
         # todo: Determine whether to print according to the cli option