This commit is contained in:
2025-10-30 19:29:28 +08:00
parent a986918a9e
commit 16810ea9de

View File

@@ -22,6 +22,8 @@ public class LineReportParser {
private static final Pattern PRICE_AND_UJD_PATTERN = Pattern.compile("(?i)\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b\\s+https?://[^\\s]*u\\.jd\\.com/[^\\s]+");
// 也支持 URL 在前,后面跟价格: https://u.jd.com/xxxxx 1428.28
private static final Pattern UJD_AND_PRICE_PATTERN = Pattern.compile("(?i)https?://[^\\s]*u\\.jd\\.com/[^\\s]+\\s+\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b");
// 行内价格通用匹配(支持¥/💰/纯数字),用于邻近行回溯
private static final Pattern PRICE_NEAR_PATTERN = Pattern.compile("[¥💰]?\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)");
/**
* 从线报消息中提取所有京东链接
@@ -60,6 +62,12 @@ public class LineReportParser {
// 逐行处理,减少跨行误配
String[] lines = message.split("\r?\n");
// 预清洗行尾标点,保留原顺序
for (int i = 0; i < lines.length; i++) {
if (lines[i] != null) {
lines[i] = lines[i].trim();
}
}
for (String line : lines) {
if (line == null || line.trim().isEmpty()) continue;
@@ -96,6 +104,40 @@ public class LineReportParser {
}
}
// 邻近行补偿若同一行未抓到价格则在同一段落内向上回溯2行寻找价格优先含💰/¥)
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line == null || line.isEmpty()) continue;
Matcher urlM = JD_URL_PATTERN.matcher(line);
while (urlM.find()) {
String url = urlM.group();
url = url.replaceAll("[\\s,,。!?]+$", "");
if (result.containsKey(url)) {
continue; // 已有同一行价格匹配
}
// 回溯前两行找价格
Double price = null;
for (int k = 1; k <= 2 && i - k >= 0; k++) {
String prev = lines[i - k];
if (prev == null || prev.isEmpty()) break; // 遇空行认为分段
// 优先匹配包含💰或¥的价格
Matcher rich = Pattern.compile("[¥💰]\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)").matcher(prev);
if (rich.find()) {
price = parsePrice(rich.group(1));
break;
}
Matcher any = PRICE_NEAR_PATTERN.matcher(prev);
if (any.find()) {
price = parsePrice(any.group(1));
// 不break继续上一行寻找更强标记的价格若后一行找到rich将覆盖
}
}
if (price != null) {
result.put(url, price);
}
}
}
return result;
}