1
This commit is contained in:
@@ -81,7 +81,7 @@ public class LineReportParser {
|
|||||||
// 从匹配片段中继续提取 URL
|
// 从匹配片段中继续提取 URL
|
||||||
String fragment = m1.group();
|
String fragment = m1.group();
|
||||||
Matcher urlM = JD_URL_PATTERN.matcher(fragment);
|
Matcher urlM = JD_URL_PATTERN.matcher(fragment);
|
||||||
if (price != null && urlM.find()) {
|
if (price != null && price > 10 && urlM.find()) {
|
||||||
String url = urlM.group();
|
String url = urlM.group();
|
||||||
url = url.replaceAll("[\\s,,。!!??]+$", "");
|
url = url.replaceAll("[\\s,,。!!??]+$", "");
|
||||||
result.put(url, price);
|
result.put(url, price);
|
||||||
@@ -100,7 +100,7 @@ public class LineReportParser {
|
|||||||
}
|
}
|
||||||
String priceStr = m2.group(1);
|
String priceStr = m2.group(1);
|
||||||
Double price = parsePrice(priceStr);
|
Double price = parsePrice(priceStr);
|
||||||
if (url != null && price != null) {
|
if (url != null && price != null && price > 10) {
|
||||||
result.put(url, price);
|
result.put(url, price);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -140,35 +140,35 @@ public class LineReportParser {
|
|||||||
// 不break,继续上一行寻找更强标记的价格,若后一行找到rich将覆盖
|
// 不break,继续上一行寻找更强标记的价格,若后一行找到rich将覆盖
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (price != null) {
|
if (price != null && price > 10) {
|
||||||
result.put(url, price);
|
result.put(url, price);
|
||||||
}
|
}
|
||||||
}
|
if (!result.containsKey(url)) {
|
||||||
if (!result.containsKey(url)) {
|
// 前向查找:向下最多2行寻找价格
|
||||||
// 前向查找:向下最多2行寻找价格
|
Double fprice = null;
|
||||||
Double fprice = null;
|
for (int k = 1; k <= 2 && i + k < lines.length; k++) {
|
||||||
for (int k = 1; k <= 2 && i + k < lines.length; k++) {
|
String next = lines[i + k];
|
||||||
String next = lines[i + k];
|
if (next == null || next.isEmpty()) break; // 空行视为分段
|
||||||
if (next == null || next.isEmpty()) break; // 空行视为分段
|
// 优先匹配包含💰或¥的价格
|
||||||
// 优先匹配包含💰或¥的价格
|
Matcher richN = Pattern.compile("[¥💰]\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)").matcher(next);
|
||||||
Matcher richN = Pattern.compile("[¥💰]\\s*([0-9]{1,6}(?:\\.[0-9]{1,2})?)").matcher(next);
|
if (richN.find()) {
|
||||||
if (richN.find()) {
|
fprice = parsePrice(richN.group(1));
|
||||||
fprice = parsePrice(richN.group(1));
|
break;
|
||||||
break;
|
}
|
||||||
|
// 其次匹配“不高于 xxxx”
|
||||||
|
Matcher nhN = NOT_HIGHER_PATTERN.matcher(next);
|
||||||
|
if (nhN.find()) {
|
||||||
|
fprice = parsePrice(nhN.group(1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Matcher anyN = PRICE_NEAR_PATTERN.matcher(next);
|
||||||
|
if (anyN.find()) {
|
||||||
|
fprice = parsePrice(anyN.group(1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// 其次匹配“不高于 xxxx”
|
if (fprice != null && fprice > 10) {
|
||||||
Matcher nhN = NOT_HIGHER_PATTERN.matcher(next);
|
result.put(url, fprice);
|
||||||
if (nhN.find()) {
|
|
||||||
fprice = parsePrice(nhN.group(1));
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
Matcher anyN = PRICE_NEAR_PATTERN.matcher(next);
|
|
||||||
if (anyN.find()) {
|
|
||||||
fprice = parsePrice(anyN.group(1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (fprice != null) {
|
|
||||||
result.put(url, fprice);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user