This commit is contained in:
雷欧(林平凡)
2025-08-11 18:01:32 +08:00
parent cdb871a202
commit 1a762eb50a
2 changed files with 61 additions and 8 deletions

View File

@@ -133,6 +133,7 @@
<configuration>
<source>17</source>
<target>17</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>

View File

@@ -2796,10 +2796,18 @@ public class JDUtil {
return null;
}
// 优化后的正则表达式,更精确地匹配价格格式
// 匹配格式:🔥折扣◉价格💰 或 🔥折扣◉价格
Pattern pattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
Matcher matcher = pattern.matcher(input);
final String normalized = decodeUnicodeEscapes(input);
if (!normalized.equals(input)) {
logger.info("parsePrice已对输入做Unicode反转义");
}
// 添加调试信息,检查输入字符串的字符编码
logger.debug("输入字符串长度: {}", normalized.length());
logger.debug("输入字符串字节数组: {}", java.util.Arrays.toString(normalized.getBytes()));
// 第一次尝试 - 使用Unicode转义序列匹配支持前面可选的 \u239C再跟 \u25C9
Pattern pattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
Matcher matcher = pattern.matcher(normalized);
if (matcher.find()) {
logger.info("parsePrice第一次正则匹配到的价格{}", matcher.group(2));
@@ -2809,13 +2817,14 @@ public class JDUtil {
return Double.parseDouble(priceStr);
}
} catch (NumberFormatException e) {
logger.warn("解析价格失败: {}", e.getMessage());
return null;
}
}
// fallback处理 - 更宽松的匹配,不依赖💰符号
Pattern fallbackPattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)");
Matcher fallbackMatcher = fallbackPattern.matcher(input);
// 第二次尝试 - 更宽松的匹配,不依赖💰符号(同样允许可选的 \u239C
Pattern fallbackPattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)");
Matcher fallbackMatcher = fallbackPattern.matcher(normalized);
if (fallbackMatcher.find()) {
logger.info("parsePrice第二次正则匹配到的价格{}", fallbackMatcher.group(2));
try {
@@ -2824,13 +2833,14 @@ public class JDUtil {
return Double.parseDouble(priceStr);
}
} catch (NumberFormatException e) {
logger.warn("解析价格失败: {}", e.getMessage());
return null;
}
}
// 第三次尝试 - 直接匹配数字价格,不依赖特殊符号
Pattern simplePattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
Matcher simpleMatcher = simplePattern.matcher(input);
Matcher simpleMatcher = simplePattern.matcher(normalized);
if (simpleMatcher.find()) {
logger.info("parsePrice第三次正则匹配到的价格{}", simpleMatcher.group(1));
try {
@@ -2839,11 +2849,53 @@ public class JDUtil {
return Double.parseDouble(priceStr);
}
} catch (NumberFormatException e) {
logger.warn("解析价格失败: {}", e.getMessage());
return null;
}
}
// 第四次尝试 - 最简单的数字匹配(依赖 💰)
Pattern numberPattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
Matcher numberMatcher = numberPattern.matcher(normalized);
if (numberMatcher.find()) {
logger.info("parsePrice第四次正则匹配到的价格{}", numberMatcher.group(1));
try {
String priceStr = numberMatcher.group(1).trim();
if (priceStr.matches("\\d+\\.?\\d*|\\d*\\.\\d+")) {
return Double.parseDouble(priceStr);
}
} catch (NumberFormatException e) {
logger.warn("解析价格失败: {}", e.getMessage());
return null;
}
}
logger.warn("所有正则表达式都未匹配到价格");
return null;
}
private static String decodeUnicodeEscapes(String text) {
if (text == null || text.indexOf("\\u") == -1) {
return text;
}
StringBuilder sb = new StringBuilder(text.length());
for (int i = 0; i < text.length();) {
char ch = text.charAt(i);
if (ch == '\\' && i + 1 < text.length() && text.charAt(i + 1) == 'u' && i + 6 <= text.length()) {
String hex = text.substring(i + 2, i + 6);
try {
int code = Integer.parseInt(hex, 16);
sb.append((char) code);
i += 6;
continue;
} catch (NumberFormatException ignore) {
// fall through
}
}
sb.append(ch);
i++;
}
return sb.toString();
}
}