1
This commit is contained in:
1
pom.xml
1
pom.xml
@@ -133,6 +133,7 @@
|
||||
<configuration>
|
||||
<source>17</source>
|
||||
<target>17</target>
|
||||
<encoding>UTF-8</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
@@ -2796,10 +2796,18 @@ public class JDUtil {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 优化后的正则表达式,更精确地匹配价格格式
|
||||
// 匹配格式:🔥折扣◉价格💰 或 🔥折扣◉价格
|
||||
Pattern pattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
final String normalized = decodeUnicodeEscapes(input);
|
||||
if (!normalized.equals(input)) {
|
||||
logger.info("parsePrice已对输入做Unicode反转义");
|
||||
}
|
||||
|
||||
// 添加调试信息,检查输入字符串的字符编码
|
||||
logger.debug("输入字符串长度: {}", normalized.length());
|
||||
logger.debug("输入字符串字节数组: {}", java.util.Arrays.toString(normalized.getBytes()));
|
||||
|
||||
// 第一次尝试 - 使用Unicode转义序列匹配(支持前面可选的 \u239C,再跟 \u25C9)
|
||||
Pattern pattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
|
||||
Matcher matcher = pattern.matcher(normalized);
|
||||
|
||||
if (matcher.find()) {
|
||||
logger.info("parsePrice第一次正则匹配到的价格{}", matcher.group(2));
|
||||
@@ -2809,13 +2817,14 @@ public class JDUtil {
|
||||
return Double.parseDouble(priceStr);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("解析价格失败: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// fallback处理 - 更宽松的匹配,不依赖💰符号
|
||||
Pattern fallbackPattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)");
|
||||
Matcher fallbackMatcher = fallbackPattern.matcher(input);
|
||||
// 第二次尝试 - 更宽松的匹配,不依赖💰符号(同样允许可选的 \u239C)
|
||||
Pattern fallbackPattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)");
|
||||
Matcher fallbackMatcher = fallbackPattern.matcher(normalized);
|
||||
if (fallbackMatcher.find()) {
|
||||
logger.info("parsePrice第二次正则匹配到的价格{}", fallbackMatcher.group(2));
|
||||
try {
|
||||
@@ -2824,13 +2833,14 @@ public class JDUtil {
|
||||
return Double.parseDouble(priceStr);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("解析价格失败: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 第三次尝试 - 直接匹配数字价格,不依赖特殊符号
|
||||
Pattern simplePattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
|
||||
Matcher simpleMatcher = simplePattern.matcher(input);
|
||||
Matcher simpleMatcher = simplePattern.matcher(normalized);
|
||||
if (simpleMatcher.find()) {
|
||||
logger.info("parsePrice第三次正则匹配到的价格{}", simpleMatcher.group(1));
|
||||
try {
|
||||
@@ -2839,11 +2849,53 @@ public class JDUtil {
|
||||
return Double.parseDouble(priceStr);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("解析价格失败: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 第四次尝试 - 最简单的数字匹配(依赖 💰)
|
||||
Pattern numberPattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
|
||||
Matcher numberMatcher = numberPattern.matcher(normalized);
|
||||
if (numberMatcher.find()) {
|
||||
logger.info("parsePrice第四次正则匹配到的价格{}", numberMatcher.group(1));
|
||||
try {
|
||||
String priceStr = numberMatcher.group(1).trim();
|
||||
if (priceStr.matches("\\d+\\.?\\d*|\\d*\\.\\d+")) {
|
||||
return Double.parseDouble(priceStr);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("解析价格失败: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
logger.warn("所有正则表达式都未匹配到价格");
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String decodeUnicodeEscapes(String text) {
|
||||
if (text == null || text.indexOf("\\u") == -1) {
|
||||
return text;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder(text.length());
|
||||
for (int i = 0; i < text.length();) {
|
||||
char ch = text.charAt(i);
|
||||
if (ch == '\\' && i + 1 < text.length() && text.charAt(i + 1) == 'u' && i + 6 <= text.length()) {
|
||||
String hex = text.substring(i + 2, i + 6);
|
||||
try {
|
||||
int code = Integer.parseInt(hex, 16);
|
||||
sb.append((char) code);
|
||||
i += 6;
|
||||
continue;
|
||||
} catch (NumberFormatException ignore) {
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
sb.append(ch);
|
||||
i++;
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user