diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index 8565ae83..90d0f421 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -37,6 +37,10 @@ jobs: run: | python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --test_num 20 --model google + - name: make txt book test with batch_size + run: | + python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model google + - name: make openai key ebook test if: env.OPENAI_API_KEY != null diff --git a/README-CN.md b/README-CN.md index 349c1f64..028139fe 100644 --- a/README-CN.md +++ b/README-CN.md @@ -15,7 +15,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 ## 使用 -1. `pip install -r requirements.txt` +1. `pip install -r requirements.txt` 或 `pip install -U bbook_maker` 2. 使用 `--openai_key` 指定 OpenAI API key,如果有多个可以用英文逗号分隔(xxx,xxx,xxx),可以减少接口调用次数限制带来的错误。 或者,指定环境变量 `BMM_OPENAI_API_KEY` 来略过这个选项。 3. 本地放了一个 `test_books/animal_farm.epub` 给大家测试 @@ -42,9 +42,11 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** - +19. 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效) ### 示范用例 +**如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`** + ```shell # 如果你想快速测一下 python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test @@ -70,6 +72,9 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # 翻译 txt 文件 python3 make_book.py --book_name test_books/the_little_prince.txt --test +# 聚合多行翻译 txt 文件 +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 + ``` 更加小白的示例 diff --git a/README.md b/README.md index f30d0007..0150d4f8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,3 @@ -This forked added Google Translate support, only supported translate to `zh-CN`. -Usage: make sure to add `--model google` in the command. - - **[中文](./README-CN.md) | English** # bilingual_book_maker @@ -19,7 +15,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u ## Use -1. `pip install -r requirements.txt` +1. `pip install -r requirements.txt` or `pip install -U bbook_maker`(you can use) 2. Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits. Or, just set environment variable `BMM_OPENAI_API_KEY` instead. 3. A sample book, `test_books/animal_farm.epub`, is provided for testing purposes. @@ -45,9 +41,12 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. 17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. 18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +19. Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files). ### Examples +**Note if use `pip install bbook_maker` all commands can change to `bbook args`** + ```shell # Test quickly python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test --language zh-hans @@ -76,6 +75,8 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # translate txt file python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans +# aggregated translation txt file +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 ``` More understandable example diff --git a/book_maker/cli.py b/book_maker/cli.py index aa6a3d43..8314a099 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -1,12 +1,12 @@ import argparse +import json import os from os import environ as env -import json +import book_maker.obok as obok from book_maker.loader import BOOK_LOADER_DICT from book_maker.translator import MODEL_DICT from book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE -import book_maker.obok as obok def parse_prompt_arg(prompt_arg): @@ -156,6 +156,13 @@ def main(): metavar="PROMPT_ARG", help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.", ) + parser.add_argument( + "--batch_size", + dest="batch_size", + type=int, + default=10, + help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)", + ) options = parser.parse_args() PROXY = options.proxy @@ -219,6 +226,7 @@ def main(): translate_tags=options.translate_tags, allow_navigable_strings=options.allow_navigable_strings, prompt_config=parse_prompt_arg(options.prompt_arg), + batch_size=options.batch_size, ) e.make_bilingual_book() diff --git a/book_maker/loader/__init__.py b/book_maker/loader/__init__.py index 98441ea2..8b55bf08 100644 --- a/book_maker/loader/__init__.py +++ b/book_maker/loader/__init__.py @@ -1,5 +1,4 @@ from book_maker.loader.epub_loader import EPUBBookLoader - from book_maker.loader.txt_loader import TXTBookLoader BOOK_LOADER_DICT = { diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index eb81b45b..3587ae7a 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -9,9 +9,10 @@ from rich import print from tqdm import tqdm -from .base_loader import BaseBookLoader from book_maker.utils import prompt_config_to_kwargs +from .base_loader import BaseBookLoader + class EPUBBookLoader(BaseBookLoader): def __init__( @@ -21,6 +22,7 @@ def __init__( key, resume, language, + batch_size, model_api_base=None, is_test=False, test_num=5, diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index 37eceb92..f5238a73 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -1,9 +1,10 @@ import sys from pathlib import Path -from .base_loader import BaseBookLoader from book_maker.utils import prompt_config_to_kwargs +from .base_loader import BaseBookLoader + class TXTBookLoader(BaseBookLoader): def __init__( @@ -13,6 +14,7 @@ def __init__( key, resume, language, + batch_size, translate_tags, allow_navigable_strings, model_api_base=None, @@ -32,6 +34,7 @@ def __init__( self.bilingual_result = [] self.bilingual_temp_result = [] self.test_num = test_num + self.batch_size = batch_size try: with open(f"{txt_name}", "r", encoding="utf-8") as f: @@ -57,17 +60,22 @@ def make_bilingual_book(self): p_to_save_len = len(self.p_to_save) try: - for i in self.origin_book: - if self._is_special_text(i): + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + for i in sliced_list: + batch_text = "".join(i) + if self._is_special_text(batch_text): continue if self.resume and index < p_to_save_len: pass else: - temp = self.translate_model.translate(i) + temp = self.translate_model.translate(batch_text) self.p_to_save.append(temp) - self.bilingual_result.append(i) + self.bilingual_result.append(batch_text) self.bilingual_result.append(temp) - index += 1 + index += self.batch_size if self.is_test and index > self.test_num: break @@ -85,8 +93,14 @@ def make_bilingual_book(self): def _save_temp_book(self): index = 0 - for i in range(0, len(self.origin_book)): - self.bilingual_temp_result.append(self.origin_book[i]) + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + + for i in range(0, len(sliced_list)): + batch_text = "".join(sliced_list[i]) + self.bilingual_temp_result.append(batch_text) if self._is_special_text(self.origin_book[i]): continue if index < len(self.p_to_save): diff --git a/book_maker/obok.py b/book_maker/obok.py index c945a992..350def08 100644 --- a/book_maker/obok.py +++ b/book_maker/obok.py @@ -164,19 +164,19 @@ __version__ = "4.0.0" __about__ = "Obok v{0}\nCopyright © 2012-2020 Physisticated et al.".format(__version__) -import sys -import os -import subprocess -import sqlite3 import base64 import binascii -import re -import zipfile import hashlib -import xml.etree.ElementTree as ET -import string +import os +import re import shutil +import sqlite3 +import string +import subprocess +import sys import tempfile +import xml.etree.ElementTree as ET +import zipfile can_parse_xml = True try: @@ -199,14 +199,14 @@ def _load_crypto_libcrypto(): from ctypes import ( CDLL, POINTER, - c_void_p, + Structure, c_char_p, c_int, c_long, - Structure, c_ulong, - create_string_buffer, + c_void_p, cast, + create_string_buffer, ) from ctypes.util import find_library diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py index 50ad906d..7d442d45 100644 --- a/book_maker/translator/chatgptapi_translator.py +++ b/book_maker/translator/chatgptapi_translator.py @@ -1,12 +1,11 @@ import time +from os import environ import openai -from os import environ from .base_translator import Base from ..utils import num_tokens_from_messages - PROMPT_ENV_MAP = { "user": "BBM_CHATGPTAPI_USER_MSG_TEMPLATE", "system": "BBM_CHATGPTAPI_SYS_MSG", diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..8912e245 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +from setuptools import find_packages, setup + +setup( + name="bbook_maker", + description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.", + version="0.1.0", + license="MIT", + author="yihong0618", + author_email="zouzou0208@gmail.com", + packages=find_packages(), + url="https://github.com/yihong0618/bilingual_book_maker", + python_requires=">=3.7", + install_requires=["bs4", "openai", "requests", "ebooklib", "rich", "tqdm"], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + entry_points={ + "console_scripts": ["bbook_maker = book_maker.cli:main"], + }, +)