1
This commit is contained in:
@@ -22,6 +22,8 @@ public class LineReportParser {
|
||||
private static final Pattern PRICE_AND_UJD_PATTERN = Pattern.compile("(?i)\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b\\s+https?://[^\\s]*u\\.jd\\.com/[^\\s]+");
|
||||
// 也支持 URL 在前,后面跟价格: https://u.jd.com/xxxxx 1428.28
|
||||
private static final Pattern UJD_AND_PRICE_PATTERN = Pattern.compile("(?i)https?://[^\\s]*u\\.jd\\.com/[^\\s]+\\s+\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b");
|
||||
// 行内价格通用匹配(支持¥/💰/纯数字),用于邻近行回溯
|
||||
private static final Pattern PRICE_NEAR_PATTERN = Pattern.compile("[¥💰]?\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)");
|
||||
|
||||
/**
|
||||
* 从线报消息中提取所有京东链接
|
||||
@@ -60,6 +62,12 @@ public class LineReportParser {
|
||||
|
||||
// 逐行处理,减少跨行误配
|
||||
String[] lines = message.split("\r?\n");
|
||||
// 预清洗行尾标点,保留原顺序
|
||||
for (int i = 0; i < lines.length; i++) {
|
||||
if (lines[i] != null) {
|
||||
lines[i] = lines[i].trim();
|
||||
}
|
||||
}
|
||||
for (String line : lines) {
|
||||
if (line == null || line.trim().isEmpty()) continue;
|
||||
|
||||
@@ -96,6 +104,40 @@ public class LineReportParser {
|
||||
}
|
||||
}
|
||||
|
||||
// 邻近行补偿:若同一行未抓到价格,则在同一段落内向上回溯2行寻找价格(优先含💰/¥)
|
||||
for (int i = 0; i < lines.length; i++) {
|
||||
String line = lines[i];
|
||||
if (line == null || line.isEmpty()) continue;
|
||||
Matcher urlM = JD_URL_PATTERN.matcher(line);
|
||||
while (urlM.find()) {
|
||||
String url = urlM.group();
|
||||
url = url.replaceAll("[\\s,,。!!??]+$", "");
|
||||
if (result.containsKey(url)) {
|
||||
continue; // 已有同一行价格匹配
|
||||
}
|
||||
// 回溯前两行找价格
|
||||
Double price = null;
|
||||
for (int k = 1; k <= 2 && i - k >= 0; k++) {
|
||||
String prev = lines[i - k];
|
||||
if (prev == null || prev.isEmpty()) break; // 遇空行认为分段
|
||||
// 优先匹配包含💰或¥的价格
|
||||
Matcher rich = Pattern.compile("[¥💰]\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)").matcher(prev);
|
||||
if (rich.find()) {
|
||||
price = parsePrice(rich.group(1));
|
||||
break;
|
||||
}
|
||||
Matcher any = PRICE_NEAR_PATTERN.matcher(prev);
|
||||
if (any.find()) {
|
||||
price = parsePrice(any.group(1));
|
||||
// 不break,继续上一行寻找更强标记的价格,若后一行找到rich将覆盖
|
||||
}
|
||||
}
|
||||
if (price != null) {
|
||||
result.put(url, price);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user