Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
linyuan0213 committed Apr 20, 2024
2 parents ae3fcaa + 194a9a3 commit 5c4a966
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 75 deletions.
52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,58 @@

馒头模拟登录需要添加 cookie,通过cookie访问接口,不排除禁用账户的可能

### 插件

- 自定义识别词

​ tmdb id获取:[tmdb](https://www.themoviedb.org/?language=zh-CN) 网站搜索关键词,打开相关电影复制url对应数字id, 如 https://www.themoviedb.org/movie/693134-dune-part-two?language=zh-CN tmdb id 为693134


- 通用识别词维护:

​ 编辑 [通用识别词](https://pad.xcreal.cc/p/通用识别词) 添加关键词

​ 格式如下:

​ 屏蔽:被替换词

​ 替换:被替换词@@替换词

​ 替换+集偏移:被替换词@@替换词@@前定位词@@后定位词@@集偏移

​ 集偏移:前定位词@@后定位词@@集偏移

- 电影识别词维护:

​ 编辑 [电影识别词](https://pad.xcreal.cc/p/电影识别词) 添加关键词

​ 格式如下:

​ 屏蔽:tmdb id@@被替换词

​ 替换:tmdb id@@被替换词@@替换词

​ 替换+集偏移:tmdb id@@被替换词@@替换词@@前定位词@@后定位词@@集偏移

​ 集偏移:tmdb id@@前定位词@@后定位词@@集偏移

- 电视识别词维护:

​ 编辑 [电视识别词](https://pad.xcreal.cc/p/电视识别词) 添加关键词

格式同电影识别词

- 动漫识别词维护:

​ 编辑 [动漫识别词](https://pad.xcreal.cc/p/动漫识别词) 添加关键词

格式同电影识别词



**如果有好用的识别词,请共同维护**


### 开启公开站点

在 config.yaml 的 laboratory 添加 ```show_more_sites: true```
Expand Down
87 changes: 76 additions & 11 deletions app/helper/ocr_helper.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,83 @@
import base64
import ddddocr
import cv2
import numpy as np
from PIL import Image
import log

from app.utils import RequestUtils
from app.utils import RequestUtils, StringUtils


class OcrHelper:

_ocr_b64_url = "https://nastool.cn/captcha/base64"
@staticmethod
def around_white(img):
"""
四周置白色
"""
w, h = img.shape
for _w in range(w):
for _h in range(h):
if (_w <= 5) or (_h <= 5) or (_w >= w-5) or (_h >= h-5):
img.itemset((_w, _h), 255)
return img

@staticmethod
def noise_unsome_piexl(img):
'''
邻域非同色降噪
查找像素点上下左右相邻点的颜色,如果是非白色的非像素点颜色,则填充为白色
'''
w, h = img.shape
for _w in range(w):
for _h in range(h):
if _h != 0 and _w != 0 and _w < w - 1 and _h < h - 1:# 剔除顶点、底点
center_color = img[_w, _h] # 当前坐标颜色
top_color = img[_w, _h + 1]
bottom_color = img[_w, _h - 1]
left_color = img[_w - 1, _h]
right_color = img[_w + 1, _h]
cnt = 0
if top_color.all() == center_color.all():
cnt += 1
if bottom_color.all() == center_color.all():
cnt += 1
if left_color.all() == center_color.all():
cnt += 1
if right_color.all() == center_color.all():
cnt += 1
if cnt < 1:
img.itemset((_w, _h), 255)
return img

def image_pre_process(self, image):
"""
图片预处理
"""
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
denoised_image = cv2.fastNlMeansDenoising(binary_image, h=30, templateWindowSize=11, searchWindowSize=21)
noise_unsome = OcrHelper.noise_unsome_piexl(denoised_image)
op_image = OcrHelper.around_white(noise_unsome)

return op_image

def recognize_captcha(self, image_content):
"""
识别验证码
"""
res = ""
try:
ocr = ddddocr.DdddOcr(show_ad=False)
image = np.asarray(bytearray(image_content), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
op_image = self.image_pre_process(image)
pil_image = Image.fromarray(cv2.cvtColor(op_image, cv2.COLOR_BGR2RGB))
res = ocr.classification(pil_image)
res = StringUtils.replace_strings(res, {'之': '2', '>': '7'})
return res.upper()
except Exception as e:
log.error(f"{str(e)}{res}")
return res

def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None):
"""
Expand All @@ -22,12 +94,5 @@ def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None)
image_bin = ret.content
if not image_bin:
return ""
image_b64 = base64.b64encode(image_bin).decode()
if not image_b64:
return ""
ret = RequestUtils(content_type="application/json").post_res(
url=self._ocr_b64_url,
json={"base64_img": image_b64})
if ret:
return ret.json().get("result")
return self.recognize_captcha(image_bin)
return ""
Loading

0 comments on commit 5c4a966

Please sign in to comment.