diff --git a/pom.xml b/pom.xml index 096b687..0f40bae 100644 --- a/pom.xml +++ b/pom.xml @@ -133,6 +133,7 @@ 17 17 + UTF-8 diff --git a/src/main/java/cn/van/business/util/JDUtil.java b/src/main/java/cn/van/business/util/JDUtil.java index bff8936..f740128 100644 --- a/src/main/java/cn/van/business/util/JDUtil.java +++ b/src/main/java/cn/van/business/util/JDUtil.java @@ -2796,10 +2796,18 @@ public class JDUtil { return null; } - // 优化后的正则表达式,更精确地匹配价格格式 - // 匹配格式:🔥折扣◉价格💰 或 🔥折扣◉价格 - Pattern pattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?"); - Matcher matcher = pattern.matcher(input); + final String normalized = decodeUnicodeEscapes(input); + if (!normalized.equals(input)) { + logger.info("parsePrice已对输入做Unicode反转义"); + } + + // 添加调试信息,检查输入字符串的字符编码 + logger.debug("输入字符串长度: {}", normalized.length()); + logger.debug("输入字符串字节数组: {}", java.util.Arrays.toString(normalized.getBytes())); + + // 第一次尝试 - 使用Unicode转义序列匹配(支持前面可选的 \u239C,再跟 \u25C9) + Pattern pattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?"); + Matcher matcher = pattern.matcher(normalized); if (matcher.find()) { logger.info("parsePrice第一次正则匹配到的价格{}", matcher.group(2)); @@ -2809,13 +2817,14 @@ public class JDUtil { return Double.parseDouble(priceStr); } } catch (NumberFormatException e) { + logger.warn("解析价格失败: {}", e.getMessage()); return null; } } - // fallback处理 - 更宽松的匹配,不依赖💰符号 - Pattern fallbackPattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)"); - Matcher fallbackMatcher = fallbackPattern.matcher(input); + // 第二次尝试 - 更宽松的匹配,不依赖💰符号(同样允许可选的 \u239C) + Pattern fallbackPattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)"); + Matcher fallbackMatcher = fallbackPattern.matcher(normalized); if (fallbackMatcher.find()) { logger.info("parsePrice第二次正则匹配到的价格{}", fallbackMatcher.group(2)); try { @@ -2824,13 +2833,14 @@ public class JDUtil { return Double.parseDouble(priceStr); } } catch (NumberFormatException e) { + logger.warn("解析价格失败: {}", e.getMessage()); return null; } } // 第三次尝试 - 直接匹配数字价格,不依赖特殊符号 Pattern simplePattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0"); - Matcher simpleMatcher = simplePattern.matcher(input); + Matcher simpleMatcher = simplePattern.matcher(normalized); if (simpleMatcher.find()) { logger.info("parsePrice第三次正则匹配到的价格{}", simpleMatcher.group(1)); try { @@ -2839,11 +2849,53 @@ public class JDUtil { return Double.parseDouble(priceStr); } } catch (NumberFormatException e) { + logger.warn("解析价格失败: {}", e.getMessage()); return null; } } + // 第四次尝试 - 最简单的数字匹配(依赖 💰) + Pattern numberPattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0"); + Matcher numberMatcher = numberPattern.matcher(normalized); + if (numberMatcher.find()) { + logger.info("parsePrice第四次正则匹配到的价格{}", numberMatcher.group(1)); + try { + String priceStr = numberMatcher.group(1).trim(); + if (priceStr.matches("\\d+\\.?\\d*|\\d*\\.\\d+")) { + return Double.parseDouble(priceStr); + } + } catch (NumberFormatException e) { + logger.warn("解析价格失败: {}", e.getMessage()); + return null; + } + } + + logger.warn("所有正则表达式都未匹配到价格"); return null; } + private static String decodeUnicodeEscapes(String text) { + if (text == null || text.indexOf("\\u") == -1) { + return text; + } + StringBuilder sb = new StringBuilder(text.length()); + for (int i = 0; i < text.length();) { + char ch = text.charAt(i); + if (ch == '\\' && i + 1 < text.length() && text.charAt(i + 1) == 'u' && i + 6 <= text.length()) { + String hex = text.substring(i + 2, i + 6); + try { + int code = Integer.parseInt(hex, 16); + sb.append((char) code); + i += 6; + continue; + } catch (NumberFormatException ignore) { + // fall through + } + } + sb.append(ch); + i++; + } + return sb.toString(); + } + }