1
This commit is contained in:
1
pom.xml
1
pom.xml
@@ -133,6 +133,7 @@
|
|||||||
<configuration>
|
<configuration>
|
||||||
<source>17</source>
|
<source>17</source>
|
||||||
<target>17</target>
|
<target>17</target>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
|
|||||||
@@ -2796,10 +2796,18 @@ public class JDUtil {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 优化后的正则表达式,更精确地匹配价格格式
|
final String normalized = decodeUnicodeEscapes(input);
|
||||||
// 匹配格式:🔥折扣◉价格💰 或 🔥折扣◉价格
|
if (!normalized.equals(input)) {
|
||||||
Pattern pattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
|
logger.info("parsePrice已对输入做Unicode反转义");
|
||||||
Matcher matcher = pattern.matcher(input);
|
}
|
||||||
|
|
||||||
|
// 添加调试信息,检查输入字符串的字符编码
|
||||||
|
logger.debug("输入字符串长度: {}", normalized.length());
|
||||||
|
logger.debug("输入字符串字节数组: {}", java.util.Arrays.toString(normalized.getBytes()));
|
||||||
|
|
||||||
|
// 第一次尝试 - 使用Unicode转义序列匹配(支持前面可选的 \u239C,再跟 \u25C9)
|
||||||
|
Pattern pattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)\\s*\\uD83D\\uDCB0?");
|
||||||
|
Matcher matcher = pattern.matcher(normalized);
|
||||||
|
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
logger.info("parsePrice第一次正则匹配到的价格{}", matcher.group(2));
|
logger.info("parsePrice第一次正则匹配到的价格{}", matcher.group(2));
|
||||||
@@ -2809,13 +2817,14 @@ public class JDUtil {
|
|||||||
return Double.parseDouble(priceStr);
|
return Double.parseDouble(priceStr);
|
||||||
}
|
}
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("解析价格失败: {}", e.getMessage());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback处理 - 更宽松的匹配,不依赖💰符号
|
// 第二次尝试 - 更宽松的匹配,不依赖💰符号(同样允许可选的 \u239C)
|
||||||
Pattern fallbackPattern = Pattern.compile("(\\uD83D\\uDECD|\\u25C9)[^\\d]*([\\d.]+)");
|
Pattern fallbackPattern = Pattern.compile("(?:\\u239C)?(\\u25C9)[^\\d]*([\\d.]+)");
|
||||||
Matcher fallbackMatcher = fallbackPattern.matcher(input);
|
Matcher fallbackMatcher = fallbackPattern.matcher(normalized);
|
||||||
if (fallbackMatcher.find()) {
|
if (fallbackMatcher.find()) {
|
||||||
logger.info("parsePrice第二次正则匹配到的价格{}", fallbackMatcher.group(2));
|
logger.info("parsePrice第二次正则匹配到的价格{}", fallbackMatcher.group(2));
|
||||||
try {
|
try {
|
||||||
@@ -2824,13 +2833,14 @@ public class JDUtil {
|
|||||||
return Double.parseDouble(priceStr);
|
return Double.parseDouble(priceStr);
|
||||||
}
|
}
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("解析价格失败: {}", e.getMessage());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 第三次尝试 - 直接匹配数字价格,不依赖特殊符号
|
// 第三次尝试 - 直接匹配数字价格,不依赖特殊符号
|
||||||
Pattern simplePattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
|
Pattern simplePattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
|
||||||
Matcher simpleMatcher = simplePattern.matcher(input);
|
Matcher simpleMatcher = simplePattern.matcher(normalized);
|
||||||
if (simpleMatcher.find()) {
|
if (simpleMatcher.find()) {
|
||||||
logger.info("parsePrice第三次正则匹配到的价格{}", simpleMatcher.group(1));
|
logger.info("parsePrice第三次正则匹配到的价格{}", simpleMatcher.group(1));
|
||||||
try {
|
try {
|
||||||
@@ -2839,11 +2849,53 @@ public class JDUtil {
|
|||||||
return Double.parseDouble(priceStr);
|
return Double.parseDouble(priceStr);
|
||||||
}
|
}
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("解析价格失败: {}", e.getMessage());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 第四次尝试 - 最简单的数字匹配(依赖 💰)
|
||||||
|
Pattern numberPattern = Pattern.compile("([\\d.]+)\\s*\\uD83D\\uDCB0");
|
||||||
|
Matcher numberMatcher = numberPattern.matcher(normalized);
|
||||||
|
if (numberMatcher.find()) {
|
||||||
|
logger.info("parsePrice第四次正则匹配到的价格{}", numberMatcher.group(1));
|
||||||
|
try {
|
||||||
|
String priceStr = numberMatcher.group(1).trim();
|
||||||
|
if (priceStr.matches("\\d+\\.?\\d*|\\d*\\.\\d+")) {
|
||||||
|
return Double.parseDouble(priceStr);
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("解析价格失败: {}", e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn("所有正则表达式都未匹配到价格");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String decodeUnicodeEscapes(String text) {
|
||||||
|
if (text == null || text.indexOf("\\u") == -1) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
StringBuilder sb = new StringBuilder(text.length());
|
||||||
|
for (int i = 0; i < text.length();) {
|
||||||
|
char ch = text.charAt(i);
|
||||||
|
if (ch == '\\' && i + 1 < text.length() && text.charAt(i + 1) == 'u' && i + 6 <= text.length()) {
|
||||||
|
String hex = text.substring(i + 2, i + 6);
|
||||||
|
try {
|
||||||
|
int code = Integer.parseInt(hex, 16);
|
||||||
|
sb.append((char) code);
|
||||||
|
i += 6;
|
||||||
|
continue;
|
||||||
|
} catch (NumberFormatException ignore) {
|
||||||
|
// fall through
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.append(ch);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user