1
This commit is contained in:
@@ -0,0 +1,128 @@
|
||||
package com.ruoyi.jarvis.util;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 线报消息解析工具类
|
||||
*
|
||||
* @author ruoyi
|
||||
* @date 2025-01-10
|
||||
*/
|
||||
public class LineReportParser {
|
||||
|
||||
// 京东链接正则表达式
|
||||
private static final Pattern JD_URL_PATTERN = Pattern.compile("https?://[^\\s]*?jd\\.com[^\\s]*");
|
||||
|
||||
// SKUID正则表达式(10-13位数字)
|
||||
private static final Pattern SKUID_PATTERN = Pattern.compile("\\b(\\d{10,13})\\b");
|
||||
|
||||
/**
|
||||
* 从线报消息中提取所有京东链接
|
||||
*
|
||||
* @param message 线报消息
|
||||
* @return 京东链接列表
|
||||
*/
|
||||
public static List<String> extractJdUrls(String message) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
if (message == null || message.trim().isEmpty()) {
|
||||
return urls;
|
||||
}
|
||||
|
||||
Matcher matcher = JD_URL_PATTERN.matcher(message);
|
||||
while (matcher.find()) {
|
||||
String url = matcher.group();
|
||||
// 清理URL末尾的标点符号
|
||||
url = url.replaceAll("[\\s,,。!!??]+$", "");
|
||||
if (!urls.contains(url)) {
|
||||
urls.add(url);
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从线报消息中提取所有可能的SKUID
|
||||
*
|
||||
* @param message 线报消息
|
||||
* @return SKUID列表
|
||||
*/
|
||||
public static List<String> extractSkuids(String message) {
|
||||
Set<String> skuids = new LinkedHashSet<>();
|
||||
if (message == null || message.trim().isEmpty()) {
|
||||
return new ArrayList<>(skuids);
|
||||
}
|
||||
|
||||
// 先从URL中提取SKUID
|
||||
List<String> urls = extractJdUrls(message);
|
||||
for (String url : urls) {
|
||||
String skuid = extractSkuidFromUrl(url);
|
||||
if (skuid != null) {
|
||||
skuids.add(skuid);
|
||||
}
|
||||
}
|
||||
|
||||
// 再从文本中直接提取可能的SKUID(10-13位数字)
|
||||
Matcher matcher = SKUID_PATTERN.matcher(message);
|
||||
while (matcher.find()) {
|
||||
String skuid = matcher.group(1);
|
||||
// 只添加11-13位的,避免误识别(如手机号等)
|
||||
if (skuid.length() >= 11) {
|
||||
skuids.add(skuid);
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<>(skuids);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从JD链接中提取SKUID
|
||||
*
|
||||
* @param url JD链接
|
||||
* @return SKUID,如果提取失败返回null
|
||||
*/
|
||||
public static String extractSkuidFromUrl(String url) {
|
||||
if (url == null || url.trim().isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 匹配 item.jd.com/{skuid}.html
|
||||
Pattern pattern1 = Pattern.compile("item\\.jd\\.com/(\\d+)\\.html");
|
||||
Matcher matcher1 = pattern1.matcher(url);
|
||||
if (matcher1.find()) {
|
||||
return matcher1.group(1);
|
||||
}
|
||||
|
||||
// 匹配 sku=xxx 或 skuId=xxx
|
||||
Pattern pattern2 = Pattern.compile("[?&]sku[Ii]?d?=(\\d+)");
|
||||
Matcher matcher2 = pattern2.matcher(url);
|
||||
if (matcher2.find()) {
|
||||
return matcher2.group(1);
|
||||
}
|
||||
|
||||
// 匹配短链接中的数字
|
||||
Pattern pattern3 = Pattern.compile("u\\.jd\\.com/[^\\s]*(\\d{10,13})");
|
||||
Matcher matcher3 = pattern3.matcher(url);
|
||||
if (matcher3.find()) {
|
||||
return matcher3.group(1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析线报消息,返回提取的信息
|
||||
*
|
||||
* @param message 线报消息
|
||||
* @return 包含URLs和SKUIDs的Map
|
||||
*/
|
||||
public static Map<String, Object> parseMessage(String message) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("urls", extractJdUrls(message));
|
||||
result.put("skuids", extractSkuids(message));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user