This commit is contained in:
Leo
2025-11-09 15:59:42 +08:00
parent c0908690b4
commit 10020e6d52

View File

@@ -98,9 +98,16 @@ private String cleanForbiddenPhrases(String text) {
return text;
}
String cleaned = text;
// 新增:清理"咨询客服立减""咨询客服""客服"及变体(含空格)
// 优先处理长组合,避免被拆分后遗漏
cleaned = cleaned.replaceAll("咨询\\s*客服\\s*立减", ""); // 匹配"咨询客服立减""咨询 客服 立减"等
cleaned = cleaned.replaceAll("咨询\\s*客服", ""); // 匹配"咨询客服""咨询 客服"等
cleaned = cleaned.replaceAll("\\s*服", ""); // 匹配"客服""客 服"等
// 一、政策补贴及特殊渠道类(长组合优先)
cleaned = cleaned.replaceAll("咨询客服领\\s*国补", "");
cleaned = cleaned.replaceAll("政府\\s*补贴", ""); // 匹配"政府 补贴"等带空格的情况
cleaned = cleaned.replaceAll("政府\\s*补贴", "");
cleaned = cleaned.replaceAll("购车\\s*补贴", "");
cleaned = cleaned.replaceAll("家电\\s*下乡", "");
cleaned = cleaned.replaceAll("内部\\s*渠道", "");
@@ -128,15 +135,15 @@ private String cleanForbiddenPhrases(String text) {
cleaned = cleaned.replaceAll("原单", "");
cleaned = cleaned.replaceAll("尾单", "");
cleaned = cleaned.replaceAll("工厂\\s*货", "");
cleaned = cleaned.replaceAll("专柜\\s*验货", ""); // 无授权时违规
cleaned = cleaned.replaceAll("专柜\\s*验货", "");
// 四、线下导流及规避监管类(多变体覆盖)
cleaned = cleaned.replaceAll("\\s*信", ""); // 匹配"微信""微 信"
cleaned = cleaned.replaceAll("\\s*信", ""); // 谐音变体
cleaned = cleaned.replaceAll("\\s*信", "");
cleaned = cleaned.replaceAll("\\s*信", "");
cleaned = cleaned.replaceAll("V我", "");
cleaned = cleaned.replaceAll("\\s*卫星", "");
cleaned = cleaned.replaceAll("QQ", "");
cleaned = cleaned.replaceAll("扣扣", ""); // 谐音
cleaned = cleaned.replaceAll("扣扣", "");
cleaned = cleaned.replaceAll("手机\\s*号", "");
cleaned = cleaned.replaceAll("淘宝\\s*链接", "");
cleaned = cleaned.replaceAll("拼多\\s*多", "");
@@ -146,7 +153,7 @@ private String cleanForbiddenPhrases(String text) {
cleaned = cleaned.replaceAll("", "");
cleaned = cleaned.replaceAll("垃圾", "");
cleaned = cleaned.replaceAll("笨蛋", "");
cleaned = cleaned.replaceAll("SB", ""); // 单独出现时清理(避免误判可后续加上下文校验)
cleaned = cleaned.replaceAll("SB", "");
cleaned = cleaned.replaceAll("原味", "");
cleaned = cleaned.replaceAll("情趣", "");