Skip to content

Commit

Permalink
fix(#92): add a arguments to allow NavigableStrings translate (#126)
Browse files Browse the repository at this point in the history
* fix(#92): add a arguments to allow NavigableStrings


---------

Co-authored-by: yihong0618 <[email protected]>
  • Loading branch information
InzamZ and yihong0618 authored Mar 10, 2023
1 parent dfcf078 commit cbe165d
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
1 change: 1 addition & 0 deletions README-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
**请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**

e.g.
```shell
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**

### Eamples

Expand Down
23 changes: 18 additions & 5 deletions book_maker/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,37 @@ def main():
default="p",
help="example --translate-tags p,blockquote",
)
parser.add_argument(
"--allow_navigable_strings",
dest="allow_navigable_strings",
action="store_true",
default=False,
help="allow NavigableStrings to be translated",
)

options = parser.parse_args()
PROXY = options.proxy
if PROXY != "":
os.environ["http_proxy"] = PROXY
os.environ["https_proxy"] = PROXY

OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise Exception("OpenAI API key not provided, please google how to obtain it")
translate_model = MODEL_DICT.get(options.model)
assert translate_model is not None, "unsupported model"
if translate_model in ["gpt3", "chatgptapi"]:
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise Exception(
"OpenAI API key not provided, please google how to obtain it"
)
else:
OPENAI_API_KEY = ""

book_type = options.book_name.split(".")[-1]
support_type_list = list(BOOK_LOADER_DICT.keys())
if book_type not in support_type_list:
raise Exception(
f"now only support files of these formats: {','.join(support_type_list)}"
)
translate_model = MODEL_DICT.get(options.model)
assert translate_model is not None, "unsupported model"

book_loader = BOOK_LOADER_DICT.get(book_type)
assert book_loader is not None, "unsupported loader"
Expand All @@ -129,6 +141,7 @@ def main():
is_test=options.test,
test_num=options.test_num,
translate_tags=options.translate_tags,
allow_navigable_strings=options.allow_navigable_strings,
)
e.make_bilingual_book()

Expand Down
10 changes: 10 additions & 0 deletions book_maker/loader/epub_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ def __init__(
is_test=False,
test_num=5,
translate_tags="p",
allow_navigable_strings=False,
):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
self.translate_model = model(key, language, model_api_base)
self.is_test = is_test
self.test_num = test_num
self.translate_tags = translate_tags
self.allow_navigable_strings = allow_navigable_strings

try:
self.origin_book = epub.read_epub(self.epub_name)
Expand Down Expand Up @@ -77,6 +79,12 @@ def make_bilingual_book(self):
else len(bs(i.content, "html.parser").findAll(trans_taglist))
for i in all_items
)
all_p_length += self.allow_navigable_strings * sum(
0
if i.get_type() != ITEM_DOCUMENT
else len(bs(i.content, "html.parser").findAll(text=True))
for i in all_items
)
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
index = 0
p_to_save_len = len(self.p_to_save)
Expand All @@ -85,6 +93,8 @@ def make_bilingual_book(self):
if item.get_type() == ITEM_DOCUMENT:
soup = bs(item.content, "html.parser")
p_list = soup.findAll(trans_taglist)
if self.allow_navigable_strings:
p_list.extend(soup.findAll(text=True))
is_test_done = self.is_test and index > self.test_num
for p in p_list:
if is_test_done or not p.text or self._is_special_text(p.text):
Expand Down

0 comments on commit cbe165d

Please sign in to comment.