This commit is contained in:
2025-10-09 19:45:14 +08:00
parent e6ced14040
commit 2a93522bcf
15 changed files with 2158 additions and 1 deletions

View File

@@ -0,0 +1,128 @@
package com.ruoyi.jarvis.util;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 线报消息解析工具类
*
* @author ruoyi
* @date 2025-01-10
*/
public class LineReportParser {
// 京东链接正则表达式
private static final Pattern JD_URL_PATTERN = Pattern.compile("https?://[^\\s]*?jd\\.com[^\\s]*");
// SKUID正则表达式10-13位数字
private static final Pattern SKUID_PATTERN = Pattern.compile("\\b(\\d{10,13})\\b");
/**
* 从线报消息中提取所有京东链接
*
* @param message 线报消息
* @return 京东链接列表
*/
public static List<String> extractJdUrls(String message) {
List<String> urls = new ArrayList<>();
if (message == null || message.trim().isEmpty()) {
return urls;
}
Matcher matcher = JD_URL_PATTERN.matcher(message);
while (matcher.find()) {
String url = matcher.group();
// 清理URL末尾的标点符号
url = url.replaceAll("[\\s,,。!?]+$", "");
if (!urls.contains(url)) {
urls.add(url);
}
}
return urls;
}
/**
* 从线报消息中提取所有可能的SKUID
*
* @param message 线报消息
* @return SKUID列表
*/
public static List<String> extractSkuids(String message) {
Set<String> skuids = new LinkedHashSet<>();
if (message == null || message.trim().isEmpty()) {
return new ArrayList<>(skuids);
}
// 先从URL中提取SKUID
List<String> urls = extractJdUrls(message);
for (String url : urls) {
String skuid = extractSkuidFromUrl(url);
if (skuid != null) {
skuids.add(skuid);
}
}
// 再从文本中直接提取可能的SKUID10-13位数字
Matcher matcher = SKUID_PATTERN.matcher(message);
while (matcher.find()) {
String skuid = matcher.group(1);
// 只添加11-13位的避免误识别如手机号等
if (skuid.length() >= 11) {
skuids.add(skuid);
}
}
return new ArrayList<>(skuids);
}
/**
* 从JD链接中提取SKUID
*
* @param url JD链接
* @return SKUID如果提取失败返回null
*/
public static String extractSkuidFromUrl(String url) {
if (url == null || url.trim().isEmpty()) {
return null;
}
// 匹配 item.jd.com/{skuid}.html
Pattern pattern1 = Pattern.compile("item\\.jd\\.com/(\\d+)\\.html");
Matcher matcher1 = pattern1.matcher(url);
if (matcher1.find()) {
return matcher1.group(1);
}
// 匹配 sku=xxx 或 skuId=xxx
Pattern pattern2 = Pattern.compile("[?&]sku[Ii]?d?=(\\d+)");
Matcher matcher2 = pattern2.matcher(url);
if (matcher2.find()) {
return matcher2.group(1);
}
// 匹配短链接中的数字
Pattern pattern3 = Pattern.compile("u\\.jd\\.com/[^\\s]*(\\d{10,13})");
Matcher matcher3 = pattern3.matcher(url);
if (matcher3.find()) {
return matcher3.group(1);
}
return null;
}
/**
* 解析线报消息,返回提取的信息
*
* @param message 线报消息
* @return 包含URLs和SKUIDs的Map
*/
public static Map<String, Object> parseMessage(String message) {
Map<String, Object> result = new HashMap<>();
result.put("urls", extractJdUrls(message));
result.put("skuids", extractSkuids(message));
return result;
}
}