From 80396cd9fb2f30a4adc10ee44e5b7c4e9bfb4f6e Mon Sep 17 00:00:00 2001 From: Aziteee <113450723+Aziteee@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:27:13 +0800 Subject: [PATCH] feat: add qq music and bilibili parser (#16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 支持解析QQ音乐和哔哩哔哩的分享链接 ![image](https://github.com/user-attachments/assets/2bf50ea0-c1f3-4eee-9770-c341d08ba260) 由于原parser功能比较单一,所以有对代码进行重构,把请求的逻辑放到了parser里面 ```release-note 支持解析QQ音乐和哔哩哔哩的分享链接 ``` --- .../editor/hyperlink/HyperLinkRequest.java | 37 ----- .../handler/HyperLinkBilibiliParser.java | 71 +++++++++ .../handler/HyperLinkDefaultParser.java | 146 +++++++++++++----- .../hyperlink/handler/HyperLinkParser.java | 4 +- .../handler/HyperLinkParserConfiguration.java | 19 ++- .../handler/HyperLinkQQMusicParser.java | 83 ++++++++++ .../editor/hyperlink/handler/ParserType.java | 7 +- .../service/HyperLinkCardServiceImpl.java | 21 +-- 8 files changed, 289 insertions(+), 99 deletions(-) create mode 100644 src/main/java/run/halo/editor/hyperlink/handler/HyperLinkBilibiliParser.java create mode 100644 src/main/java/run/halo/editor/hyperlink/handler/HyperLinkQQMusicParser.java diff --git a/src/main/java/run/halo/editor/hyperlink/HyperLinkRequest.java b/src/main/java/run/halo/editor/hyperlink/HyperLinkRequest.java index d3450eb..e77fe20 100644 --- a/src/main/java/run/halo/editor/hyperlink/HyperLinkRequest.java +++ b/src/main/java/run/halo/editor/hyperlink/HyperLinkRequest.java @@ -21,43 +21,6 @@ @RequiredArgsConstructor public class HyperLinkRequest { - private final HttpClientFactory clientFactory; - - public Mono getHyperLinkDetail(URI linkURI) { - AtomicReference resourceUrl = new AtomicReference<>(linkURI.toString()); - return clientFactory.createHttpClientBuilder(linkURI.getHost()) - .map(httpClient -> httpClient.followRedirect(true, (clientRequest) -> { - if (StringUtils.hasText(clientRequest.resourceUrl())) { - resourceUrl.set(clientRequest.resourceUrl()); - } - })) - .map(httpClient -> WebClient.builder() - .clientConnector(new ReactorClientHttpConnector(httpClient)) - .build()) - .flatMap(webClient -> webClient.get() - .uri(linkURI) - .accept(MediaType.TEXT_HTML) - .headers(httpHeaders -> { - httpHeaders.set(HttpHeaders.USER_AGENT, - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " - + "like Gecko) Chrome/58.0.3029.110 Safari/537.3"); - httpHeaders.set(HttpHeaders.REFERER, - linkURI.getScheme() + "://" + linkURI.getHost()); - }) - .retrieve() - .bodyToFlux(DataBuffer.class) - .flatMap(dataBuffer -> { - String content = dataBuffer.toString(StandardCharsets.UTF_8); - DataBufferUtils.release(dataBuffer); - return Mono.just(content); - }) - .reduce(new StringBuilder(), StringBuilder::append) - .filter(stringBuilder -> !stringBuilder.isEmpty()) - .map(StringBuilder::toString) - .map(htmlContent -> new HyperLinkResponse(htmlContent, resourceUrl.get())) - ); - } - public record HyperLinkResponse(String htmlContent, String url) { } diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkBilibiliParser.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkBilibiliParser.java new file mode 100644 index 0000000..294fe7f --- /dev/null +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkBilibiliParser.java @@ -0,0 +1,71 @@ +package run.halo.editor.hyperlink.handler; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpHeaders; +import org.springframework.http.client.reactive.ReactorClientHttpConnector; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.core.publisher.Mono; +import run.halo.editor.hyperlink.HttpClientFactory; +import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; +import java.net.URI; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +@RequiredArgsConstructor +public class HyperLinkBilibiliParser implements HyperLinkParser { + + private final HttpClientFactory clientFactory; + private final ObjectMapper objectMapper; + + public Mono parse(URI linkURI) { + return getHyperLinkDetail(linkURI) + .map(item -> { + var hyperLinkDTO = new HyperLinkBaseDTO(); + try { + JsonNode root = objectMapper.readTree(item); + JsonNode data = root.path("data"); + + hyperLinkDTO.setUrl(linkURI.toString()); + hyperLinkDTO.setTitle(data.path("title").asText()); + hyperLinkDTO.setImage(data.path("pic").asText()); + hyperLinkDTO.setDescription("UP主:" + data.path("owner").path("name").asText()); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + return hyperLinkDTO; + }); + } + + public Mono getHyperLinkDetail(URI linkURI) { + String api = "https://api.bilibili.com/x/web-interface/view?" + getQueryParam(linkURI); + return clientFactory.createHttpClientBuilder(linkURI.getHost()) + .map(httpClient -> WebClient.builder() + .clientConnector(new ReactorClientHttpConnector(httpClient)) + .build()) + .flatMap(webClient -> webClient.get() + .uri(api) + .headers(httpHeaders -> { + httpHeaders.set(HttpHeaders.CONTENT_TYPE, "application/json"); + }) + .retrieve() + .bodyToMono(String.class)); + } + + public String getQueryParam(URI linkURI) { + Pattern pattern = Pattern.compile("video/([a-zA-Z0-9]+)"); + Matcher matcher = pattern.matcher(linkURI.toString()); + if (!matcher.find()) { + throw new RuntimeException("id not found"); + } + String id = matcher.group(1); + System.out.println(id); + if (id.chars().allMatch(Character::isDigit)) { + return "aid=" + id; + } else { + return "bvid=" + id; + } + } +} diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java index 0ecabd4..eb40f23 100644 --- a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java @@ -1,77 +1,147 @@ package run.halo.editor.hyperlink.handler; +import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicReference; +import lombok.RequiredArgsConstructor; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.parser.Parser; import org.jsoup.select.Elements; +import org.springframework.core.io.buffer.DataBuffer; +import org.springframework.core.io.buffer.DataBufferUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.client.reactive.ReactorClientHttpConnector; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.server.ServerWebInputException; +import reactor.core.publisher.Mono; +import run.halo.app.infra.utils.PathUtils; +import run.halo.editor.hyperlink.HttpClientFactory; +import run.halo.editor.hyperlink.HyperLinkRequest; import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; /** * @author LIlGG */ +@RequiredArgsConstructor public class HyperLinkDefaultParser implements HyperLinkParser { + private final HttpClientFactory clientFactory; + @SuppressWarnings("checkstyle:MissingSwitchDefault") @Override - public HyperLinkBaseDTO parse(String htmlContent) { - var hyperLinkBaseDTO = new HyperLinkBaseDTO(); - Document parse = Jsoup.parse(htmlContent, Parser.htmlParser()); + public Mono parse(URI linkURI) { + return getHyperLinkDetail(linkURI) + .switchIfEmpty( + Mono.error(new ServerWebInputException("this website is not supported."))) + .map(item -> { + var actualURI = URI.create(item.url()); - Elements meta = parse.getElementsByTag("meta"); - parserMetas(meta, hyperLinkBaseDTO); + var hyperLinkDTO = new HyperLinkBaseDTO(); + Document parse = Jsoup.parse(item.htmlContent(), Parser.htmlParser()); - var titles = parse.getElementsByTag("title"); - if (!CollectionUtils.isEmpty(titles)) { - var title = titles.get(0).text(); - hyperLinkBaseDTO.setTitle(title); - } + Elements meta = parse.getElementsByTag("meta"); + parserMetas(meta, hyperLinkDTO); + + var titles = parse.getElementsByTag("title"); + if (!CollectionUtils.isEmpty(titles)) { + var title = titles.get(0).text(); + hyperLinkDTO.setTitle(title); + } - Elements links = parse.getElementsByTag("link"); - parserLinks(links, hyperLinkBaseDTO); + Elements links = parse.getElementsByTag("link"); + parserLinks(links, hyperLinkDTO); - return hyperLinkBaseDTO; + if (org.apache.commons.lang3.StringUtils.isNotBlank(hyperLinkDTO.getIcon()) + && !PathUtils.isAbsoluteUri(hyperLinkDTO.getIcon())) { + hyperLinkDTO.setIcon(actualURI.resolve(hyperLinkDTO.getIcon()).toString()); + } + if (org.apache.commons.lang3.StringUtils.isNotBlank(hyperLinkDTO.getImage()) + && !PathUtils.isAbsoluteUri(hyperLinkDTO.getImage())) { + hyperLinkDTO.setImage(actualURI.resolve(hyperLinkDTO.getImage()).toString()); + } + if (org.apache.commons.lang3.StringUtils.isBlank(hyperLinkDTO.getUrl())) { + hyperLinkDTO.setUrl(actualURI.toString()); + } + return hyperLinkDTO; + }); + } + + public Mono getHyperLinkDetail(URI linkURI) { + AtomicReference resourceUrl = new AtomicReference<>(linkURI.toString()); + return clientFactory.createHttpClientBuilder(linkURI.getHost()) + .map(httpClient -> httpClient.followRedirect(true, (clientRequest) -> { + if (StringUtils.hasText(clientRequest.resourceUrl())) { + resourceUrl.set(clientRequest.resourceUrl()); + } + })) + .map(httpClient -> WebClient.builder() + .clientConnector(new ReactorClientHttpConnector(httpClient)) + .build()) + .flatMap(webClient -> webClient.get() + .uri(linkURI) + .accept(MediaType.TEXT_HTML) + .headers(httpHeaders -> { + httpHeaders.set(HttpHeaders.USER_AGENT, + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " + + "like Gecko) Chrome/58.0.3029.110 Safari/537.3"); + httpHeaders.set(HttpHeaders.REFERER, + linkURI.getScheme() + "://" + linkURI.getHost()); + }) + .retrieve() + .bodyToFlux(DataBuffer.class) + .flatMap(dataBuffer -> { + String content = dataBuffer.toString(StandardCharsets.UTF_8); + DataBufferUtils.release(dataBuffer); + return Mono.just(content); + }) + .reduce(new StringBuilder(), StringBuilder::append) + .filter(stringBuilder -> !stringBuilder.isEmpty()) + .map(StringBuilder::toString) + .map(htmlContent -> new HyperLinkRequest.HyperLinkResponse(htmlContent, resourceUrl.get()))); } private void parserLinks(Elements links, HyperLinkBaseDTO hyperLinkBaseDTO) { links.stream().filter(element -> element.hasAttr("rel")) - .forEach(element -> { - String rel = element.attr("rel"); - List rels = Arrays.asList(rel.split(" ")); - if (rels.contains("icon") && !StringUtils.hasText(hyperLinkBaseDTO.getIcon())) { - hyperLinkBaseDTO.setIcon(element.attr("href")); - } - }); + .forEach(element -> { + String rel = element.attr("rel"); + List rels = Arrays.asList(rel.split(" ")); + if (rels.contains("icon") && !StringUtils.hasText(hyperLinkBaseDTO.getIcon())) { + hyperLinkBaseDTO.setIcon(element.attr("href")); + } + }); } private void parserMetas(Elements metas, HyperLinkBaseDTO hyperLinkBaseDTO) { metas.stream().filter(element -> element.hasAttr("property")) - .forEach(element -> { - String property = element.attr("property"); - String content = element.attr("content"); - switch (property) { - case "og:title" -> hyperLinkBaseDTO.setTitle(content); - case "og:description" -> hyperLinkBaseDTO.setDescription(content); - case "og:image" -> hyperLinkBaseDTO.setImage(content); - case "og:url" -> hyperLinkBaseDTO.setUrl(content); - default -> { + .forEach(element -> { + String property = element.attr("property"); + String content = element.attr("content"); + switch (property) { + case "og:title" -> hyperLinkBaseDTO.setTitle(content); + case "og:description" -> hyperLinkBaseDTO.setDescription(content); + case "og:image" -> hyperLinkBaseDTO.setImage(content); + case "og:url" -> hyperLinkBaseDTO.setUrl(content); + default -> { + } } - } - }); + }); if (!StringUtils.hasText(hyperLinkBaseDTO.getDescription())) { metas.stream().filter(element -> element.hasAttr("name")) - .forEach(element -> { - String name = element.attr("name"); - String content = element.attr("content"); - if ("description".equals(name)) { - hyperLinkBaseDTO.setDescription(content); - } - }); + .forEach(element -> { + String name = element.attr("name"); + String content = element.attr("content"); + if ("description".equals(name)) { + hyperLinkBaseDTO.setDescription(content); + } + }); } } } diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParser.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParser.java index bec72d8..1115c6d 100644 --- a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParser.java +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParser.java @@ -1,11 +1,13 @@ package run.halo.editor.hyperlink.handler; +import reactor.core.publisher.Mono; import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; +import java.net.URI; /** * @author LIlGG */ public interface HyperLinkParser { - T parse(String htmlContent); + Mono parse(URI linkURI); } diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParserConfiguration.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParserConfiguration.java index 96affd0..b31d451 100644 --- a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParserConfiguration.java +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkParserConfiguration.java @@ -1,17 +1,34 @@ package run.halo.editor.hyperlink.handler; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.RequiredArgsConstructor; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import run.halo.editor.hyperlink.HttpClientFactory; import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; /** * @author LIlGG */ @Configuration +@RequiredArgsConstructor public class HyperLinkParserConfiguration { + private final HttpClientFactory httpClientFactory; + @Bean public HyperLinkParser defaultParser() { - return new HyperLinkDefaultParser(); + return new HyperLinkDefaultParser(httpClientFactory); + } + + @Bean + public HyperLinkParser qqMusicParser() { + return new HyperLinkQQMusicParser(httpClientFactory, new ObjectMapper()); + } + + @Bean + public HyperLinkParser bilibiliParser() { + return new HyperLinkBilibiliParser(httpClientFactory, new ObjectMapper()); } } diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkQQMusicParser.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkQQMusicParser.java new file mode 100644 index 0000000..1926135 --- /dev/null +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkQQMusicParser.java @@ -0,0 +1,83 @@ +package run.halo.editor.hyperlink.handler; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpHeaders; +import org.springframework.http.client.reactive.ReactorClientHttpConnector; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.core.publisher.Mono; +import run.halo.editor.hyperlink.HttpClientFactory; +import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; +import java.net.URI; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +public class HyperLinkQQMusicParser implements HyperLinkParser { + + private final HttpClientFactory clientFactory; + private final ObjectMapper objectMapper; + + public Mono parse(URI linkURI) { + return getHyperLinkDetail(linkURI) + .map(item -> { + var hyperLinkDTO = new HyperLinkBaseDTO(); + try { + JsonNode root = objectMapper.readTree(item); + JsonNode songInfo = root.path("data").get(0); + + String title = songInfo.path("title").asText(); + List singers = songInfo.path("singer") + .findValues("name") + .stream() + .map(JsonNode::asText) + .collect(Collectors.toList()); + String singer = String.join("/", singers); + String album = songInfo.path("album").path("name").asText(); + String albumId = songInfo.path("album").path("mid").asText(); + + hyperLinkDTO.setUrl(linkURI.toString()); + hyperLinkDTO.setTitle(title); + hyperLinkDTO.setIcon("https://y.gtimg.cn/music/photo_new/T002R300x300M000" + albumId + ".jpg?max_age=2592000"); + hyperLinkDTO.setDescription(singer + " - " + album); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + return hyperLinkDTO; + }); + } + + public Mono getHyperLinkDetail(URI linkURI) { + String api = "https://c.y.qq.com/v8/fcg-bin/fcg_play_single_song.fcg?" + getQueryParam(linkURI) + "&platform=yqq&format=json"; + return clientFactory.createHttpClientBuilder(linkURI.getHost()) + .map(httpClient -> WebClient.builder() + .clientConnector(new ReactorClientHttpConnector(httpClient)) + .build()) + .flatMap(webClient -> webClient.get() + .uri(api) + .headers(httpHeaders -> { + httpHeaders.set(HttpHeaders.CONTENT_TYPE, "application/json"); + }) + .retrieve() + .bodyToMono(String.class)); + } + + public String getQueryParam(URI linkURI) { + Pattern pattern = Pattern.compile("(?:songDetail/|songid=)([a-zA-Z0-9]+)"); + Matcher matcher = pattern.matcher(linkURI.toString()); + if (!matcher.find()) { + throw new RuntimeException("id not found"); + } + String id = matcher.group(1); + System.out.println(id); + if (id.chars().allMatch(Character::isDigit)) { + return "songid=" + id; + } else { + return "songmid=" + id; + } + } +} diff --git a/src/main/java/run/halo/editor/hyperlink/handler/ParserType.java b/src/main/java/run/halo/editor/hyperlink/handler/ParserType.java index 5c37573..73db297 100644 --- a/src/main/java/run/halo/editor/hyperlink/handler/ParserType.java +++ b/src/main/java/run/halo/editor/hyperlink/handler/ParserType.java @@ -1,13 +1,16 @@ package run.halo.editor.hyperlink.handler; import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; +import java.util.regex.Pattern; /** * @author LIlGG */ public enum ParserType { - DEFAULT("default", HyperLinkDefaultParser.class); + DEFAULT("default", HyperLinkDefaultParser.class), + QQMUSIC("(i.)?y.qq.com", HyperLinkQQMusicParser.class), + BILIBILI("www.bilibili.com", HyperLinkBilibiliParser.class); private final String host; private final Class> type; @@ -28,7 +31,7 @@ public Class> getType() { public static Class> getBeanTypeByHost( String host) { for (ParserType type : values()) { - if (type.getHost().equalsIgnoreCase(host)) { + if (Pattern.matches(type.getHost(), host)) { return type.getType(); } } diff --git a/src/main/java/run/halo/editor/hyperlink/service/HyperLinkCardServiceImpl.java b/src/main/java/run/halo/editor/hyperlink/service/HyperLinkCardServiceImpl.java index 84ce09c..82d7c7a 100644 --- a/src/main/java/run/halo/editor/hyperlink/service/HyperLinkCardServiceImpl.java +++ b/src/main/java/run/halo/editor/hyperlink/service/HyperLinkCardServiceImpl.java @@ -39,26 +39,7 @@ public Mono getHyperLinkDetail(String linkUrl) { if (Objects.nonNull(cacheHyperLink)) { return Mono.just(cacheHyperLink); } - return request.getHyperLinkDetail(uri) - .switchIfEmpty( - Mono.error(new ServerWebInputException("this website is not supported.")) - ) - .map(item -> { - var actualURI = URI.create(item.url()); - var hyperLinkDTO = parserFactory.getParser(uri.getHost()).parse(item.htmlContent()); - if (StringUtils.isNotBlank(hyperLinkDTO.getIcon()) - && !PathUtils.isAbsoluteUri(hyperLinkDTO.getIcon())) { - hyperLinkDTO.setIcon(actualURI.resolve(hyperLinkDTO.getIcon()).toString()); - } - if (StringUtils.isNotBlank(hyperLinkDTO.getImage()) - && !PathUtils.isAbsoluteUri(hyperLinkDTO.getImage())) { - hyperLinkDTO.setImage(actualURI.resolve(hyperLinkDTO.getImage()).toString()); - } - if (StringUtils.isBlank(hyperLinkDTO.getUrl())) { - hyperLinkDTO.setUrl(actualURI.toString()); - } - return hyperLinkDTO; - }) + return parserFactory.getParser(uri.getHost()).parse(uri) .doOnNext(hyperLinkBaseDTO -> { hyperLinkCache.put(linkUrl, hyperLinkBaseDTO); });