Skip to content

Commit

Permalink
fix: m-team刷流
Browse files Browse the repository at this point in the history
  • Loading branch information
linyuan0213 committed Mar 29, 2024
1 parent f40361f commit 4ac3eca
Show file tree
Hide file tree
Showing 10 changed files with 396 additions and 93 deletions.
10 changes: 9 additions & 1 deletion app/helper/site_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from datetime import datetime
import os
import re
import json

from lxml import etree

from app.utils import SystemUtils
from app.utils import SystemUtils, JsonUtils
from config import RMT_SUBEXT


Expand All @@ -18,6 +19,13 @@ def is_logged_in(cls, html_text):
:param html_text:
:return:
"""
if JsonUtils.is_valid_json(html_text):
json_data = json.loads(html_text)
if 'message' in json_data and json_data['message'] == 'SUCCESS':
return True
else:
return False

html = etree.HTML(html_text)
if not html:
return False
Expand Down
94 changes: 54 additions & 40 deletions app/sites/site_userinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,56 +79,70 @@ def build(self, url, site_id, site_name,
html_text = chrome.get_html()
else:
proxies = Config().get_proxies() if proxy else None
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if 'm-team' in url:
profile_url = url + '/api/member/profile'
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).post_res(url=profile_url, data={})
else:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
# 单独处理m-team
if 'm-team' in url:
json_data = json.loads(html_text)
if 'message' in json_data and json_data['message'] != "SUCCESS":
return None
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
else:
# 第一次登录反爬
if html_text.find("title") == -1:
i = html_text.find("window.location")
if i == -1:
return None
else:
log.error("【Sites】站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None

# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
tmp_url = url + html_text[i:html_text.find(";")] \
.replace("\"", "").replace("+", "").replace(" ", "").replace("window.location=", "")
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=tmp_url)
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
log.error("【Sites】站点 %s 被反爬限制:%s, 状态码:%s" % (site_name, url, res.status_code))
return None

# 兼容假首页情况,假首页通常没有 <link rel="search" 属性
if '"search"' not in html_text and '"csrf-token"' not in html_text:
res = RequestUtils(cookies=site_cookie,
session=session,
headers=ua,
proxies=proxies
).get_res(url=url + "/index.php")
if res and res.status_code == 200:
if "charset=utf-8" in res.text or "charset=UTF-8" in res.text:
res.encoding = "UTF-8"
else:
res.encoding = res.apparent_encoding
html_text = res.text
if not html_text:
return None
elif res is not None:
log.error(f"【Sites】站点 {site_name} 连接失败,状态码:{res.status_code}")
return None
Expand Down
135 changes: 88 additions & 47 deletions app/sites/siteconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import pickle
import random
import time
import re
import json
from functools import lru_cache

from lxml import etree
from urllib.parse import urlsplit

from app.helper import ChromeHelper
from app.utils import ExceptionUtils, StringUtils, RequestUtils
Expand Down Expand Up @@ -115,66 +118,104 @@ def check_torrent_attr(self, torrent_url, cookie, ua=None, proxy=False):
:return: 种子属性,包含FREE 2XFREE HR PEER_COUNT等属性
"""
ret_attr = {
"free": False,
"2xfree": False,
"hr": False,
"peer_count": 0
}
if not torrent_url:
return ret_attr
xpath_strs = self.get_grap_conf(torrent_url)
if not xpath_strs:
return ret_attr
html_text = self.__get_site_page_html(url=torrent_url,
cookie=cookie,
ua=ua,
render=xpath_strs.get('RENDER'),
proxy=proxy)
if not html_text:
return ret_attr
try:
html = etree.HTML(html_text)
# 检测2XFREE
for xpath_str in xpath_strs.get("2XFREE"):
if html.xpath(xpath_str):
ret_attr["free"] = True
ret_attr["2xfree"] = True
# 检测FREE
for xpath_str in xpath_strs.get("FREE"):
if html.xpath(xpath_str):
ret_attr["free"] = True
# 检测HR
for xpath_str in xpath_strs.get("HR"):
if html.xpath(xpath_str):
ret_attr["hr"] = True
# 检测PEER_COUNT当前做种人数
for xpath_str in xpath_strs.get("PEER_COUNT"):
peer_count_dom = html.xpath(xpath_str)
if peer_count_dom:
peer_count_str = ''.join(peer_count_dom[0].itertext())
peer_count_digit_str = ""
for m in peer_count_str:
if m.isdigit():
peer_count_digit_str = peer_count_digit_str + m
if m == " ":
break
ret_attr["peer_count"] = int(peer_count_digit_str) if len(peer_count_digit_str) > 0 else 0
except Exception as err:
ExceptionUtils.exception_traceback(err)
"free": False,
"2xfree": False,
"hr": False,
"peer_count": 0
}
if 'm-team' in torrent_url:
split_url = urlsplit(torrent_url)
base_url = f"{split_url.scheme}://{split_url.netloc}"
detail_url = f"{base_url}/api/torrent/detail"
res = re.findall(r'\d+', torrent_url)
param = res[0]
json_text = self.__get_site_page_html(url=detail_url,
cookie=cookie,
ua=ua,
proxy=proxy,
param=param)
json_data = json.loads(json_text)
if json_data['message'] != "SUCCESS":
return ret_attr
discount = json_data['data']['status']['discount']
seeders = json_data['data']['status']['seeders']
if discount == 'FREE':
ret_attr["free"] = True
ret_attr['peer_count'] = int(seeders)

else:
if not torrent_url:
return ret_attr
xpath_strs = self.get_grap_conf(torrent_url)
if not xpath_strs:
return ret_attr
html_text = self.__get_site_page_html(url=torrent_url,
cookie=cookie,
ua=ua,
render=xpath_strs.get('RENDER'),
proxy=proxy)
if not html_text:
return ret_attr
try:
html = etree.HTML(html_text)
# 检测2XFREE
for xpath_str in xpath_strs.get("2XFREE"):
if html.xpath(xpath_str):
ret_attr["free"] = True
ret_attr["2xfree"] = True
# 检测FREE
for xpath_str in xpath_strs.get("FREE"):
if html.xpath(xpath_str):
ret_attr["free"] = True
# 检测HR
for xpath_str in xpath_strs.get("HR"):
if html.xpath(xpath_str):
ret_attr["hr"] = True
# 检测PEER_COUNT当前做种人数
for xpath_str in xpath_strs.get("PEER_COUNT"):
peer_count_dom = html.xpath(xpath_str)
if peer_count_dom:
peer_count_str = ''.join(peer_count_dom[0].itertext())
peer_count_digit_str = ""
for m in peer_count_str:
if m.isdigit():
peer_count_digit_str = peer_count_digit_str + m
if m == " ":
break
ret_attr["peer_count"] = int(peer_count_digit_str) if len(peer_count_digit_str) > 0 else 0
except Exception as err:
ExceptionUtils.exception_traceback(err)
# 随机休眼后再返回
time.sleep(round(random.uniform(1, 5), 1))
return ret_attr

@staticmethod
@lru_cache(maxsize=128)
def __get_site_page_html(url, cookie, ua, render=False, proxy=False):
def __get_site_page_html(url, cookie, ua, render=False, proxy=False, param=None):
chrome = ChromeHelper(headless=True)
if render and chrome.get_status():
# 开渲染
if chrome.visit(url=url, cookie=cookie, ua=ua, proxy=proxy):
# 等待页面加载完成
time.sleep(10)
return chrome.get_html()
elif 'm-team' in url:
param = {'id': param}
headers = {}
headers.update({
"User-Agent": f"{ua}"
})
headers.update({
"contentType": 'application/json;charset=UTF-8'
})
res = RequestUtils(
cookies=cookie,
headers=headers,
proxies=Config().get_proxies() if proxy else None
).post_res(url=url, data=param)
if res and res.status_code == 200:
res.encoding = res.apparent_encoding
return res.text
else:
res = RequestUtils(
cookies=cookie,
Expand Down
16 changes: 12 additions & 4 deletions app/sites/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,18 @@ def test_connection(self, site_id):
else:
# 计时
start_time = datetime.now()
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=Config().get_proxies() if site_info.get("proxy") else None
).get_res(url=site_url)
# m-team处理
if 'm-team' in site_url:
url = site_url + '/api/member/profile'
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=Config().get_proxies() if site_info.get("proxy") else None
).post_res(url=url, data={})
else:
res = RequestUtils(cookies=site_cookie,
headers=ua,
proxies=Config().get_proxies() if site_info.get("proxy") else None
).get_res(url=site_url)
seconds = int((datetime.now() - start_time).microseconds / 1000)
if res and res.status_code == 200:
if not SiteHelper.is_logged_in(res.text):
Expand Down
Loading

0 comments on commit 4ac3eca

Please sign in to comment.