Skip to content

Commit

Permalink
Feat/add target lang choices (#12)
Browse files Browse the repository at this point in the history
* Feat: add multipile languages choices and a new argument language to set targett language

Co-authored-by: yihong0618 <[email protected]>
  • Loading branch information
2 people authored and jeffery9 committed Mar 4, 2023
1 parent 9c890a8 commit 059a372
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/make_test_ebook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ jobs:
- name: make test ebook
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2
run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2 --language zh-hans
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,19 @@ Make bilingual epub books Using AI translate
3. 本地放了一个 animal_farm.epub 给大家测试
4. 默认用了 ChatGPT 模型,用 `--model gpt3` 来使用 gpt3 模型
5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢)
6. Set the target language like `--language "Simplified Chinese"`.
Suppot ` "Japanese" / "Traditional Chinese" / "German" / "French" / "Korean"`.
Default target language is `"Simplified Chinese"`. Support language list please see the LANGUAGES at [utils.py](./utils.py).

e.g.
```shell
# 如果你想快速测一下
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test --language "Simplified Chinese"
# or do it
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key}
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language "Simplified Chinese"
# or 用 gpt3 模型
export OPENAI_API_KEY=${your_api_key}
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit --language "Simplified Chinese"
```

## 注意
Expand Down
35 changes: 26 additions & 9 deletions make_book.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
from bs4 import BeautifulSoup as bs
from ebooklib import epub
from rich import print
from utils import LANGUAGES, TO_LANGUAGE_CODE

NO_LIMIT = False
IS_TEST = False
RESUME = False


class Base:
def __init__(self, key):
def __init__(self, key, language):
pass

@abstractmethod
Expand All @@ -27,7 +28,7 @@ def translate(self, text):


class GPT3(Base):
def __init__(self, key):
def __init__(self, key, language):
self.api_key = key
self.api_url = "https://api.openai.com/v1/completions"
self.headers = {
Expand All @@ -43,10 +44,11 @@ def __init__(self, key):
"top_p": 1,
}
self.session = requests.session()
self.language = language

def translate(self, text):
print(text)
self.data["prompt"] = f"Please help me to translate,`{text}` to Chinese"
self.data["prompt"] = f"Please help me to translate,`{text}` to {self.language}"
r = self.session.post(self.api_url, headers=self.headers, json=self.data)
if not r.ok:
return text
Expand All @@ -64,12 +66,14 @@ def translate(self, text):


class ChatGPT(Base):
def __init__(self, key):
super().__init__(key)
def __init__(self, key, language):
super().__init__(key, language)
self.key = key
self.language = language

def translate(self, text):
print(text)
print(self.language, "!!!")
openai.api_key = self.key
try:
completion = openai.ChatCompletion.create(
Expand All @@ -78,7 +82,7 @@ def translate(self, text):
{
"role": "user",
# english prompt here to save tokens
"content": f"Please help me to translate,`{text}` to Chinese, please return only translated content not include the origin text",
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
}
],
)
Expand Down Expand Up @@ -117,10 +121,10 @@ def translate(self, text):


class BEPUB:
def __init__(self, epub_name, model, key, resume):
def __init__(self, epub_name, model, key, resume, language):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
self.translate_model = model(key)
self.translate_model = model(key, language)
self.origin_book = epub.read_epub(self.epub_name)
self.p_to_save = []
self.resume = resume
Expand Down Expand Up @@ -236,6 +240,14 @@ def save_progress(self):
choices=["chatgpt", "gpt3"], # support DeepL later
help="Use which model",
)
parser.add_argument(
"--language",
type=str,
choices=sorted(LANGUAGES.keys())
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
default="zh-hans",
help="language to translate to",
)
parser.add_argument(
"--resume",
dest="resume",
Expand All @@ -253,5 +265,10 @@ def save_progress(self):
if not options.book_name.endswith(".epub"):
raise Exception("please use epub file")
model = MODEL_DICT.get(options.model, "chatgpt")
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME)
language = options.language
if options.language in LANGUAGES:
# use the value for prompt
language = LANGUAGES.get(language, language)

e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME, language=language)
e.make_bilingual_book()
119 changes: 119 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Borrowed from : https://github.com/openai/whisper
LANGUAGES = {
"en": "english",
"zh-hans": "simplified chinese",
"zh-hant": "traditional chinese",
"de": "german",
"es": "spanish",
"ru": "russian",
"ko": "korean",
"fr": "french",
"ja": "japanese",
"pt": "portuguese",
"tr": "turkish",
"pl": "polish",
"ca": "catalan",
"nl": "dutch",
"ar": "arabic",
"sv": "swedish",
"it": "italian",
"id": "indonesian",
"hi": "hindi",
"fi": "finnish",
"vi": "vietnamese",
"he": "hebrew",
"uk": "ukrainian",
"el": "greek",
"ms": "malay",
"cs": "czech",
"ro": "romanian",
"da": "danish",
"hu": "hungarian",
"ta": "tamil",
"no": "norwegian",
"th": "thai",
"ur": "urdu",
"hr": "croatian",
"bg": "bulgarian",
"lt": "lithuanian",
"la": "latin",
"mi": "maori",
"ml": "malayalam",
"cy": "welsh",
"sk": "slovak",
"te": "telugu",
"fa": "persian",
"lv": "latvian",
"bn": "bengali",
"sr": "serbian",
"az": "azerbaijani",
"sl": "slovenian",
"kn": "kannada",
"et": "estonian",
"mk": "macedonian",
"br": "breton",
"eu": "basque",
"is": "icelandic",
"hy": "armenian",
"ne": "nepali",
"mn": "mongolian",
"bs": "bosnian",
"kk": "kazakh",
"sq": "albanian",
"sw": "swahili",
"gl": "galician",
"mr": "marathi",
"pa": "punjabi",
"si": "sinhala",
"km": "khmer",
"sn": "shona",
"yo": "yoruba",
"so": "somali",
"af": "afrikaans",
"oc": "occitan",
"ka": "georgian",
"be": "belarusian",
"tg": "tajik",
"sd": "sindhi",
"gu": "gujarati",
"am": "amharic",
"yi": "yiddish",
"lo": "lao",
"uz": "uzbek",
"fo": "faroese",
"ht": "haitian creole",
"ps": "pashto",
"tk": "turkmen",
"nn": "nynorsk",
"mt": "maltese",
"sa": "sanskrit",
"lb": "luxembourgish",
"my": "myanmar",
"bo": "tibetan",
"tl": "tagalog",
"mg": "malagasy",
"as": "assamese",
"tt": "tatar",
"haw": "hawaiian",
"ln": "lingala",
"ha": "hausa",
"ba": "bashkir",
"jw": "javanese",
"su": "sundanese",
}

# language code lookup by name, with a few language aliases
TO_LANGUAGE_CODE = {
**{language: code for code, language in LANGUAGES.items()},
"burmese": "my",
"valencian": "ca",
"flemish": "nl",
"haitian": "ht",
"letzeburgesch": "lb",
"pushto": "ps",
"panjabi": "pa",
"moldavian": "ro",
"moldovan": "ro",
"sinhalese": "si",
"castilian": "es",
}

0 comments on commit 059a372

Please sign in to comment.