|
@@ -0,0 +1,249 @@
|
|
|
+package com.yonge.cooleshow.biz.dal.wordfilter;
|
|
|
+
|
|
|
+import com.yonge.cooleshow.biz.dal.service.SysConfigService;
|
|
|
+import com.yonge.cooleshow.common.constant.SysConfigConstant;
|
|
|
+import com.yonge.toolset.base.exception.BizException;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.context.annotation.Configuration;
|
|
|
+
|
|
|
+import javax.annotation.PostConstruct;
|
|
|
+import java.io.BufferedReader;
|
|
|
+import java.io.InputStreamReader;
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 词库上下文环境
|
|
|
+ * <p>
|
|
|
+ * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
|
|
|
+ *
|
|
|
+ * @author minghu.zhang
|
|
|
+ */
|
|
|
+@SuppressWarnings({"rawtypes", "unchecked"})
|
|
|
+@Configuration
|
|
|
+public class WordContext {
|
|
|
+ private final static Logger log = LoggerFactory.getLogger(WordContext.class);
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private SysConfigService sysConfigService;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 敏感词字典
|
|
|
+ */
|
|
|
+ private final Map wordMap = new HashMap(1024);
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 是否已初始化
|
|
|
+ */
|
|
|
+ private boolean init;
|
|
|
+ /**
|
|
|
+ * 黑名单列表
|
|
|
+ */
|
|
|
+ private String blackList;
|
|
|
+ /**
|
|
|
+ * 白名单列表
|
|
|
+ */
|
|
|
+ private String whiteList;
|
|
|
+
|
|
|
+ @PostConstruct
|
|
|
+ public void init() {
|
|
|
+ this.blackList = sysConfigService.findConfigValue(SysConfigConstant.BLACK_LIST);
|
|
|
+ this.whiteList = sysConfigService.findConfigValue(SysConfigConstant.WHITE_LIST);
|
|
|
+ initKeyWord();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取初始化的敏感词列表
|
|
|
+ *
|
|
|
+ * @return 敏感词列表
|
|
|
+ */
|
|
|
+ public Map getWordMap() {
|
|
|
+ return wordMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 初始化
|
|
|
+ */
|
|
|
+ private synchronized void initKeyWord() {
|
|
|
+ try {
|
|
|
+ if (!init) {
|
|
|
+ // 将敏感词库加入到HashMap中
|
|
|
+ addWord(strToSet(blackList), WordType.BLACK);
|
|
|
+ // 将非敏感词库也加入到HashMap中
|
|
|
+ addWord(strToSet(whiteList), WordType.WHITE);
|
|
|
+ }
|
|
|
+ init = true;
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("初始化失败:" + e);
|
|
|
+ throw new BizException("敏感词列表初始化失败");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
|
|
|
+ * 中 = { isEnd = 0 国 = {<br>
|
|
|
+ * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 }
|
|
|
+ * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } }
|
|
|
+ */
|
|
|
+ public void addWord(Iterable<String> wordList, WordType wordType) {
|
|
|
+ Map nowMap;
|
|
|
+ Map<String, String> newWorMap;
|
|
|
+ // 迭代keyWordSet
|
|
|
+ for (String key : wordList) {
|
|
|
+ nowMap = wordMap;
|
|
|
+ for (int i = 0; i < key.length(); i++) {
|
|
|
+ // 转换成char型
|
|
|
+ char keyChar = key.charAt(i);
|
|
|
+ // 获取
|
|
|
+ Object wordMap = nowMap.get(keyChar);
|
|
|
+ // 如果存在该key,直接赋值
|
|
|
+ if (wordMap != null) {
|
|
|
+ nowMap = (Map) wordMap;
|
|
|
+ } else {
|
|
|
+ // 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个
|
|
|
+ newWorMap = new HashMap<>(4);
|
|
|
+ // 不是最后一个
|
|
|
+ newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal()));
|
|
|
+ nowMap.put(keyChar, newWorMap);
|
|
|
+ nowMap = newWorMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (i == key.length() - 1) {
|
|
|
+ // 最后一个
|
|
|
+ nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal()));
|
|
|
+ nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 修改敏感词后初始化
|
|
|
+ */
|
|
|
+ public void mapInit() {
|
|
|
+ init = false;
|
|
|
+ this.blackList = sysConfigService.findConfigValue(SysConfigConstant.BLACK_LIST);
|
|
|
+ this.whiteList = sysConfigService.findConfigValue(SysConfigConstant.WHITE_LIST);
|
|
|
+ wordMap.clear();
|
|
|
+ initKeyWord();
|
|
|
+ /*Map nowMap;
|
|
|
+ for (String key : wordList) {
|
|
|
+ List<Map> cacheList = new ArrayList<>();
|
|
|
+ nowMap = wordMap;
|
|
|
+ for (int i = 0; i < key.length(); i++) {
|
|
|
+ char keyChar = key.charAt(i);
|
|
|
+
|
|
|
+ Object map = nowMap.get(keyChar);
|
|
|
+ if (map != null) {
|
|
|
+ nowMap = (Map) map;
|
|
|
+ cacheList.add(nowMap);
|
|
|
+ } else {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (i == key.length() - 1) {
|
|
|
+ char[] keys = key.toCharArray();
|
|
|
+ boolean cleanable = false;
|
|
|
+ char lastChar = 0;
|
|
|
+ for (int j = cacheList.size() - 1; j >= 0; j--) {
|
|
|
+ Map cacheMap = cacheList.get(j);
|
|
|
+ if (j == cacheList.size() - 1) {
|
|
|
+ if (String.valueOf(WordType.BLACK.ordinal()).equals(cacheMap.get("isWhiteWord"))) {
|
|
|
+ if (wordType == WordType.WHITE) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (String.valueOf(WordType.WHITE.ordinal()).equals(cacheMap.get("isWhiteWord"))) {
|
|
|
+ if (wordType == WordType.BLACK) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cacheMap.remove("isWhiteWord");
|
|
|
+ cacheMap.remove("isEnd");
|
|
|
+ if (cacheMap.size() == 0) {
|
|
|
+ cleanable = true;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (cleanable) {
|
|
|
+ Object isEnd = cacheMap.get("isEnd");
|
|
|
+ if (String.valueOf(EndType.IS_END.ordinal()).equals(isEnd)) {
|
|
|
+ cleanable = false;
|
|
|
+ }
|
|
|
+ cacheMap.remove(lastChar);
|
|
|
+ }
|
|
|
+ lastChar = keys[j];
|
|
|
+ }
|
|
|
+
|
|
|
+ if (cleanable) {
|
|
|
+ wordMap.remove(lastChar);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }*/
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除敏感词
|
|
|
+ * @param paramName 黑/白名单(black_list/white_list)
|
|
|
+ * @param word 敏感词
|
|
|
+ */
|
|
|
+ public void removeWord(String paramName, String word) {
|
|
|
+ if (!paramName.equals(SysConfigConstant.BLACK_LIST) && !paramName.equals(SysConfigConstant.WHITE_LIST)) {
|
|
|
+ throw new BizException("paramName不合法");
|
|
|
+ }
|
|
|
+ Set<String> list = strToSet(sysConfigService.findConfigValue(paramName));
|
|
|
+ list.removeIf(s -> s.equals(word));
|
|
|
+ sysConfigService.updateByName(paramName, list.toString().replaceAll("(?:\\[|null|\\]| +)", ""));
|
|
|
+ mapInit();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 添加敏感词
|
|
|
+ * @param paramName 黑/白名单(black_list/white_list)
|
|
|
+ * @param word 敏感词
|
|
|
+ */
|
|
|
+ public void addWord(String paramName, String word) {
|
|
|
+ if (!paramName.equals(SysConfigConstant.BLACK_LIST) && !paramName.equals(SysConfigConstant.WHITE_LIST)) {
|
|
|
+ throw new BizException("paramName不合法");
|
|
|
+ }
|
|
|
+ Set<String> list = strToSet(sysConfigService.findConfigValue(paramName));
|
|
|
+ list.add(word);
|
|
|
+ sysConfigService.updateByName(paramName, list.toString().replaceAll("(?:\\[|null|\\]| +)", ""));
|
|
|
+ mapInit();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 读取敏感词库中的内容,将内容添加到set集合中
|
|
|
+ */
|
|
|
+ private Set<String> readWordFile(String file) throws Exception {
|
|
|
+ Set<String> set;
|
|
|
+ // 字符编码
|
|
|
+ String encoding = "UTF-8";
|
|
|
+ try (InputStreamReader read = new InputStreamReader(
|
|
|
+ this.getClass().getResourceAsStream(file), encoding)) {
|
|
|
+ set = new HashSet<>();
|
|
|
+ BufferedReader bufferedReader = new BufferedReader(read);
|
|
|
+ String txt;
|
|
|
+ // 读取文件,将文件内容放入到set中
|
|
|
+ while ((txt = bufferedReader.readLine()) != null) {
|
|
|
+ set.add(txt);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 关闭文件流
|
|
|
+ return set;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 字符串转set
|
|
|
+ */
|
|
|
+ private Set<String> strToSet(String str) {
|
|
|
+ if (StringUtils.isNotBlank(str)){
|
|
|
+ String[] split = str.split(",");
|
|
|
+ return new HashSet<>(Arrays.asList(split));
|
|
|
+ }
|
|
|
+ return new HashSet<>();
|
|
|
+ }
|
|
|
+}
|