This commit is contained in:
2025-10-30 18:54:52 +08:00
parent 471c8df097
commit a986918a9e
2 changed files with 73 additions and 0 deletions

View File

@@ -184,6 +184,8 @@ public class BatchPublishServiceImpl implements IBatchPublishService
// 提取SKUID和URL
List<String> urls = LineReportParser.extractJdUrls(message);
List<String> skuids = LineReportParser.extractSkuids(message);
// 解析每个URL对应的价格如有
Map<String, Double> urlPriceMap = LineReportParser.extractPriceForUrls(message);
log.info("提取到 {} 个URL, {} 个SKUID", urls.size(), skuids.size());
log.info("提取的URLs: {}", urls);
@@ -220,6 +222,11 @@ public class BatchPublishServiceImpl implements IBatchPublishService
continue;
}
// 如果文本中包含该URL的显式价格则覆盖价格字段
if (urlPriceMap.containsKey(url)) {
productInfo.put("price", urlPriceMap.get(url));
productInfo.put("_priceSource", "text");
}
// 不对URL阶段做去重保持与输入一致允许同款多条
products.add(productInfo);
Object skuObj = productInfo.get("skuid");

View File

@@ -18,6 +18,11 @@ public class LineReportParser {
// SKUID正则表达式10-13位数字
private static final Pattern SKUID_PATTERN = Pattern.compile("\\b(\\d{10,13})\\b");
// 行内价格 + 短链接:示例 1428.28 https://u.jd.com/xxxxx
private static final Pattern PRICE_AND_UJD_PATTERN = Pattern.compile("(?i)\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b\\s+https?://[^\\s]*u\\.jd\\.com/[^\\s]+");
// 也支持 URL 在前,后面跟价格: https://u.jd.com/xxxxx 1428.28
private static final Pattern UJD_AND_PRICE_PATTERN = Pattern.compile("(?i)https?://[^\\s]*u\\.jd\\.com/[^\\s]+\\s+\\b(\\d{1,6}(?:\\.\\d{1,2})?)\\b");
/**
* 从线报消息中提取所有京东链接
*
@@ -43,6 +48,67 @@ public class LineReportParser {
return urls;
}
/**
* 从行文本中提取每个短链接对应的价格(若存在)。
* 仅关联 u.jd.com 短链,返回 Map<url, price>
*/
public static Map<String, Double> extractPriceForUrls(String message) {
Map<String, Double> result = new LinkedHashMap<>();
if (message == null || message.trim().isEmpty()) {
return result;
}
// 逐行处理,减少跨行误配
String[] lines = message.split("\r?\n");
for (String line : lines) {
if (line == null || line.trim().isEmpty()) continue;
// 价格在前 URL 在后
Matcher m1 = PRICE_AND_UJD_PATTERN.matcher(line);
while (m1.find()) {
String priceStr = m1.group(1);
Double price = parsePrice(priceStr);
// 从匹配片段中继续提取 URL
String fragment = m1.group();
Matcher urlM = JD_URL_PATTERN.matcher(fragment);
if (price != null && urlM.find()) {
String url = urlM.group();
url = url.replaceAll("[\\s,,。!?]+$", "");
result.put(url, price);
}
}
// URL 在前 价格在后
Matcher m2 = UJD_AND_PRICE_PATTERN.matcher(line);
while (m2.find()) {
String fragment = m2.group();
Matcher urlM = JD_URL_PATTERN.matcher(fragment);
String url = null;
if (urlM.find()) {
url = urlM.group();
url = url.replaceAll("[\\s,,。!?]+$", "");
}
String priceStr = m2.group(1);
Double price = parsePrice(priceStr);
if (url != null && price != null) {
result.put(url, price);
}
}
}
return result;
}
private static Double parsePrice(String s) {
try {
if (s == null) return null;
String t = s.replace(",", "").trim();
return Double.parseDouble(t);
} catch (Exception e) {
return null;
}
}
/**
* 从线报消息中提取所有可能的SKUID
*