diff --git a/ruoyi-system/src/main/java/com/ruoyi/jarvis/util/LineReportParser.java b/ruoyi-system/src/main/java/com/ruoyi/jarvis/util/LineReportParser.java index a3780cc..2d19402 100644 --- a/ruoyi-system/src/main/java/com/ruoyi/jarvis/util/LineReportParser.java +++ b/ruoyi-system/src/main/java/com/ruoyi/jarvis/util/LineReportParser.java @@ -22,6 +22,8 @@ public class LineReportParser { private static final Pattern PRICE_AND_UJD_PATTERN = Pattern.compile("(?i)\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b\\s+https?://[^\\s]*u\\.jd\\.com/[^\\s]+"); // 也支持 URL 在前,后面跟价格: https://u.jd.com/xxxxx 1428.28 private static final Pattern UJD_AND_PRICE_PATTERN = Pattern.compile("(?i)https?://[^\\s]*u\\.jd\\.com/[^\\s]+\\s+\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b"); + // 行内价格通用匹配(支持¥/💰/纯数字),用于邻近行回溯 + private static final Pattern PRICE_NEAR_PATTERN = Pattern.compile("[¥💰]?\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)"); /** * 从线报消息中提取所有京东链接 @@ -60,6 +62,12 @@ public class LineReportParser { // 逐行处理,减少跨行误配 String[] lines = message.split("\r?\n"); + // 预清洗行尾标点,保留原顺序 + for (int i = 0; i < lines.length; i++) { + if (lines[i] != null) { + lines[i] = lines[i].trim(); + } + } for (String line : lines) { if (line == null || line.trim().isEmpty()) continue; @@ -96,6 +104,40 @@ public class LineReportParser { } } + // 邻近行补偿:若同一行未抓到价格,则在同一段落内向上回溯2行寻找价格(优先含💰/¥) + for (int i = 0; i < lines.length; i++) { + String line = lines[i]; + if (line == null || line.isEmpty()) continue; + Matcher urlM = JD_URL_PATTERN.matcher(line); + while (urlM.find()) { + String url = urlM.group(); + url = url.replaceAll("[\\s,,。!!??]+$", ""); + if (result.containsKey(url)) { + continue; // 已有同一行价格匹配 + } + // 回溯前两行找价格 + Double price = null; + for (int k = 1; k <= 2 && i - k >= 0; k++) { + String prev = lines[i - k]; + if (prev == null || prev.isEmpty()) break; // 遇空行认为分段 + // 优先匹配包含💰或¥的价格 + Matcher rich = Pattern.compile("[¥💰]\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)").matcher(prev); + if (rich.find()) { + price = parsePrice(rich.group(1)); + break; + } + Matcher any = PRICE_NEAR_PATTERN.matcher(prev); + if (any.find()) { + price = parsePrice(any.group(1)); + // 不break,继续上一行寻找更强标记的价格,若后一行找到rich将覆盖 + } + } + if (price != null) { + result.put(url, price); + } + } + } + return result; }