Browse Source

Merge remote-tracking branch 'origin/master'

hgw 3 years ago
parent
commit
03f6b91e40

+ 15 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/EndType.java

@@ -0,0 +1,15 @@
+package com.yonge.cooleshow.biz.dal.wordfilter;
+
+/**
+ * 结束类型定义
+ *
+ * @author minghu.zhang
+ * @date 11:37 2020/11/11
+ **/
+public enum EndType {
+
+    /**
+     * 有下一个,结束
+     */
+    HAS_NEXT, IS_END
+}

+ 48 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/FlagIndex.java

@@ -0,0 +1,48 @@
+package com.yonge.cooleshow.biz.dal.wordfilter;
+
+import java.util.List;
+
+/**
+ * 敏感词标记
+ *
+ * @author minghu.zhang
+ */
+public class FlagIndex {
+
+    /**
+     * 标记结果
+     */
+    private boolean flag;
+    /**
+     * 是否黑名单词汇
+     */
+    private boolean isWhiteWord;
+    /**
+     * 标记索引
+     */
+    private List<Integer> index;
+
+    public boolean isFlag() {
+        return flag;
+    }
+
+    public void setFlag(boolean flag) {
+        this.flag = flag;
+    }
+
+    public List<Integer> getIndex() {
+        return index;
+    }
+
+    public void setIndex(List<Integer> index) {
+        this.index = index;
+    }
+
+    public boolean isWhiteWord() {
+        return isWhiteWord;
+    }
+
+    public void setWhiteWord(boolean whiteWord) {
+        isWhiteWord = whiteWord;
+    }
+}

+ 249 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/WordContext.java

@@ -0,0 +1,249 @@
+package com.yonge.cooleshow.biz.dal.wordfilter;
+
+import com.yonge.cooleshow.biz.dal.service.SysConfigService;
+import com.yonge.cooleshow.common.constant.SysConfigConstant;
+import com.yonge.toolset.base.exception.BizException;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Configuration;
+
+import javax.annotation.PostConstruct;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.*;
+
+/**
+ * 词库上下文环境
+ * <p>
+ * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
+ *
+ * @author minghu.zhang
+ */
+@SuppressWarnings({"rawtypes", "unchecked"})
+@Configuration
+public class WordContext {
+    private final static Logger log = LoggerFactory.getLogger(WordContext.class);
+
+    @Autowired
+    private SysConfigService sysConfigService;
+
+    /**
+     * 敏感词字典
+     */
+    private final Map wordMap = new HashMap(1024);
+
+    /**
+     * 是否已初始化
+     */
+    private boolean init;
+    /**
+     * 黑名单列表
+     */
+    private String blackList;
+    /**
+     * 白名单列表
+     */
+    private String whiteList;
+
+    @PostConstruct
+    public void init() {
+        this.blackList = sysConfigService.findConfigValue(SysConfigConstant.BLACK_LIST);
+        this.whiteList = sysConfigService.findConfigValue(SysConfigConstant.WHITE_LIST);
+        initKeyWord();
+    }
+
+    /**
+     * 获取初始化的敏感词列表
+     *
+     * @return 敏感词列表
+     */
+    public Map getWordMap() {
+        return wordMap;
+    }
+
+    /**
+     * 初始化
+     */
+    private synchronized void initKeyWord() {
+        try {
+            if (!init) {
+                // 将敏感词库加入到HashMap中
+                addWord(strToSet(blackList), WordType.BLACK);
+                // 将非敏感词库也加入到HashMap中
+                addWord(strToSet(whiteList), WordType.WHITE);
+            }
+            init = true;
+        } catch (Exception e) {
+            log.error("初始化失败:" + e);
+            throw new BizException("敏感词列表初始化失败");
+        }
+    }
+
+    /**
+     * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
+     * 中 = { isEnd = 0 国 = {<br>
+     * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 }
+     * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } }
+     */
+    public void addWord(Iterable<String> wordList, WordType wordType) {
+        Map nowMap;
+        Map<String, String> newWorMap;
+        // 迭代keyWordSet
+        for (String key : wordList) {
+            nowMap = wordMap;
+            for (int i = 0; i < key.length(); i++) {
+                // 转换成char型
+                char keyChar = key.charAt(i);
+                // 获取
+                Object wordMap = nowMap.get(keyChar);
+                // 如果存在该key,直接赋值
+                if (wordMap != null) {
+                    nowMap = (Map) wordMap;
+                } else {
+                    // 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个
+                    newWorMap = new HashMap<>(4);
+                    // 不是最后一个
+                    newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal()));
+                    nowMap.put(keyChar, newWorMap);
+                    nowMap = newWorMap;
+                }
+
+                if (i == key.length() - 1) {
+                    // 最后一个
+                    nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal()));
+                    nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal()));
+                }
+            }
+        }
+    }
+
+    /**
+     * 修改敏感词后初始化
+     */
+    public void mapInit() {
+        init = false;
+        this.blackList = sysConfigService.findConfigValue(SysConfigConstant.BLACK_LIST);
+        this.whiteList = sysConfigService.findConfigValue(SysConfigConstant.WHITE_LIST);
+        wordMap.clear();
+        initKeyWord();
+        /*Map nowMap;
+        for (String key : wordList) {
+            List<Map> cacheList = new ArrayList<>();
+            nowMap = wordMap;
+            for (int i = 0; i < key.length(); i++) {
+                char keyChar = key.charAt(i);
+
+                Object map = nowMap.get(keyChar);
+                if (map != null) {
+                    nowMap = (Map) map;
+                    cacheList.add(nowMap);
+                } else {
+                    return;
+                }
+
+                if (i == key.length() - 1) {
+                    char[] keys = key.toCharArray();
+                    boolean cleanable = false;
+                    char lastChar = 0;
+                    for (int j = cacheList.size() - 1; j >= 0; j--) {
+                        Map cacheMap = cacheList.get(j);
+                        if (j == cacheList.size() - 1) {
+                            if (String.valueOf(WordType.BLACK.ordinal()).equals(cacheMap.get("isWhiteWord"))) {
+                                if (wordType == WordType.WHITE) {
+                                    return;
+                                }
+                            }
+                            if (String.valueOf(WordType.WHITE.ordinal()).equals(cacheMap.get("isWhiteWord"))) {
+                                if (wordType == WordType.BLACK) {
+                                    return;
+                                }
+                            }
+                            cacheMap.remove("isWhiteWord");
+                            cacheMap.remove("isEnd");
+                            if (cacheMap.size() == 0) {
+                                cleanable = true;
+                                continue;
+                            }
+                        }
+                        if (cleanable) {
+                            Object isEnd = cacheMap.get("isEnd");
+                            if (String.valueOf(EndType.IS_END.ordinal()).equals(isEnd)) {
+                                cleanable = false;
+                            }
+                            cacheMap.remove(lastChar);
+                        }
+                        lastChar = keys[j];
+                    }
+
+                    if (cleanable) {
+                        wordMap.remove(lastChar);
+                    }
+                }
+            }
+        }*/
+    }
+
+    /**
+     * 删除敏感词
+     * @param paramName 黑/白名单(black_list/white_list)
+     * @param word 敏感词
+     */
+    public void removeWord(String paramName, String word) {
+        if (!paramName.equals(SysConfigConstant.BLACK_LIST) && !paramName.equals(SysConfigConstant.WHITE_LIST)) {
+            throw new BizException("paramName不合法");
+        }
+        Set<String> list = strToSet(sysConfigService.findConfigValue(paramName));
+        list.removeIf(s -> s.equals(word));
+        sysConfigService.updateByName(paramName, list.toString().replaceAll("(?:\\[|null|\\]| +)", ""));
+        mapInit();
+    }
+
+    /**
+     * 添加敏感词
+     * @param paramName 黑/白名单(black_list/white_list)
+     * @param word 敏感词
+     */
+    public void addWord(String paramName, String word) {
+        if (!paramName.equals(SysConfigConstant.BLACK_LIST) && !paramName.equals(SysConfigConstant.WHITE_LIST)) {
+            throw new BizException("paramName不合法");
+        }
+        Set<String> list = strToSet(sysConfigService.findConfigValue(paramName));
+        list.add(word);
+        sysConfigService.updateByName(paramName, list.toString().replaceAll("(?:\\[|null|\\]| +)", ""));
+        mapInit();
+    }
+
+    /**
+     * 读取敏感词库中的内容,将内容添加到set集合中
+     */
+    private Set<String> readWordFile(String file) throws Exception {
+        Set<String> set;
+        // 字符编码
+        String encoding = "UTF-8";
+        try (InputStreamReader read = new InputStreamReader(
+                this.getClass().getResourceAsStream(file), encoding)) {
+            set = new HashSet<>();
+            BufferedReader bufferedReader = new BufferedReader(read);
+            String txt;
+            // 读取文件,将文件内容放入到set中
+            while ((txt = bufferedReader.readLine()) != null) {
+                set.add(txt);
+            }
+        }
+        // 关闭文件流
+        return set;
+    }
+
+    /**
+     * 字符串转set
+     */
+    private Set<String> strToSet(String str) {
+        if (StringUtils.isNotBlank(str)){
+            String[] split = str.split(",");
+            return new HashSet<>(Arrays.asList(split));
+        }
+        return new HashSet<>();
+    }
+}

+ 216 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/WordFilter.java

@@ -0,0 +1,216 @@
+package com.yonge.cooleshow.biz.dal.wordfilter;
+
+import org.springframework.context.annotation.Configuration;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * 敏感词过滤器
+ *
+ * @author minghu.zhang
+ */
+@SuppressWarnings("rawtypes")
+@Configuration
+public class WordFilter {
+    /**
+     * 敏感词表
+     */
+    private final Map wordMap;
+
+    /**
+     * 构造函数
+     */
+    public WordFilter(WordContext context) {
+        this.wordMap = context.getWordMap();
+    }
+
+    /**
+     * 替换敏感词
+     *
+     * @param text 输入文本
+     */
+    public String replace(final String text) {
+        return replace(text, 0, '*');
+    }
+
+    /**
+     * 替换敏感词
+     *
+     * @param text   输入文本
+     * @param symbol 替换符号
+     */
+    public String replace(final String text, final char symbol) {
+        return replace(text, 0, symbol);
+    }
+
+    /**
+     * 替换敏感词
+     *
+     * @param text   输入文本
+     * @param skip   文本距离
+     * @param symbol 替换符号
+     */
+    public String replace(final String text, final int skip, final char symbol) {
+        char[] charset = text.toCharArray();
+        for (int i = 0; i < charset.length; i++) {
+            FlagIndex fi = getFlagIndex(charset, i, skip);
+            if (fi.isFlag()) {
+                if (!fi.isWhiteWord()) {
+                    for (int j : fi.getIndex()) {
+                        charset[j] = symbol;
+                    }
+                } else {
+                    i += fi.getIndex().size() - 1;
+                }
+            }
+        }
+        return new String(charset);
+    }
+
+    /**
+     * 是否包含敏感词
+     *
+     * @param text 输入文本
+     */
+    public boolean include(final String text) {
+        return include(text, 0);
+    }
+
+    /**
+     * 是否包含敏感词
+     *
+     * @param text 输入文本
+     * @param skip 文本距离
+     */
+    public boolean include(final String text, final int skip) {
+        boolean include = false;
+        char[] charset = text.toCharArray();
+        for (int i = 0; i < charset.length; i++) {
+            FlagIndex fi = getFlagIndex(charset, i, skip);
+            if (fi.isFlag()) {
+                if (fi.isWhiteWord()) {
+                    i += fi.getIndex().size() - 1;
+                } else {
+                    include = true;
+                    break;
+                }
+            }
+        }
+        return include;
+    }
+
+    /**
+     * 获取敏感词数量
+     *
+     * @param text 输入文本
+     */
+    public int wordCount(final String text) {
+        return wordCount(text, 0);
+    }
+
+    /**
+     * 获取敏感词数量
+     *
+     * @param text 输入文本
+     * @param skip 文本距离
+     */
+    public int wordCount(final String text, final int skip) {
+        int count = 0;
+        char[] charset = text.toCharArray();
+        for (int i = 0; i < charset.length; i++) {
+            FlagIndex fi = getFlagIndex(charset, i, skip);
+            if (fi.isFlag()) {
+                if (fi.isWhiteWord()) {
+                    i += fi.getIndex().size() - 1;
+                } else {
+                    count++;
+                }
+            }
+        }
+        return count;
+    }
+
+    /**
+     * 获取敏感词列表
+     *
+     * @param text 输入文本
+     */
+    public List<String> wordList(final String text) {
+        return wordList(text, 0);
+    }
+
+    /**
+     * 获取敏感词列表
+     *
+     * @param text 输入文本
+     * @param skip 文本距离
+     */
+    public List<String> wordList(final String text, final int skip) {
+        List<String> wordList = new ArrayList<>();
+        char[] charset = text.toCharArray();
+        for (int i = 0; i < charset.length; i++) {
+            FlagIndex fi = getFlagIndex(charset, i, skip);
+            if (fi.isFlag()) {
+                if (fi.isWhiteWord()) {
+                    i += fi.getIndex().size() - 1;
+                } else {
+                    StringBuilder builder = new StringBuilder();
+                    for (int j : fi.getIndex()) {
+                        char word = text.charAt(j);
+                        builder.append(word);
+                    }
+                    wordList.add(builder.toString());
+                }
+            }
+        }
+        return wordList;
+    }
+
+    /**
+     * 获取标记索引
+     *
+     * @param charset 输入文本
+     * @param begin   检测起始
+     * @param skip    文本距离
+     */
+    private FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) {
+        FlagIndex fi = new FlagIndex();
+
+        Map current = wordMap;
+        boolean flag = false;
+        int count = 0;
+        List<Integer> index = new ArrayList<>();
+        for (int i = begin; i < charset.length; i++) {
+            char word = charset[i];
+            Map mapTree = (Map) current.get(word);
+            if (count > skip || (i == begin && Objects.isNull(mapTree))) {
+                break;
+            }
+            if (Objects.nonNull(mapTree)) {
+                current = mapTree;
+                count = 0;
+                index.add(i);
+            } else {
+                count++;
+                if (flag && count > skip) {
+                    break;
+                }
+            }
+            if ("1".equals(current.get("isEnd"))) {
+                flag = true;
+            }
+            if ("1".equals(current.get("isWhiteWord"))) {
+                fi.setWhiteWord(true);
+                break;
+            }
+        }
+
+        fi.setFlag(flag);
+        fi.setIndex(index);
+
+        return fi;
+    }
+}

+ 15 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/WordType.java

@@ -0,0 +1,15 @@
+package com.yonge.cooleshow.biz.dal.wordfilter;
+
+/**
+ * 词汇类型
+ *
+ * @author minghu.zhang
+ * @date 11:37 2020/11/11
+ **/
+public enum WordType {
+
+    /**
+     * 黑名单/白名单
+     */
+    BLACK, WHITE
+}

+ 14 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/annotation/CheckWord.java

@@ -0,0 +1,14 @@
+package com.yonge.cooleshow.biz.dal.wordfilter.annotation;
+
+import java.lang.annotation.*;
+
+/**
+ * @Description: 敏感词校验/过滤
+ * @Author: cy
+ * @Date: 2022/7/6
+ */
+@Target({ElementType.METHOD})
+@Retention(RetentionPolicy.RUNTIME)
+@Documented
+public @interface CheckWord {
+}

+ 42 - 0
cooleshow-user/user-biz/src/main/java/com/yonge/cooleshow/biz/dal/wordfilter/annotation/CheckWordAspect.java

@@ -0,0 +1,42 @@
+package com.yonge.cooleshow.biz.dal.wordfilter.annotation;
+
+import com.alibaba.fastjson.JSON;
+import com.yonge.cooleshow.biz.dal.wordfilter.WordFilter;
+import com.yonge.toolset.base.exception.BizException;
+import org.aspectj.lang.ProceedingJoinPoint;
+import org.aspectj.lang.annotation.Around;
+import org.aspectj.lang.annotation.Aspect;
+import org.aspectj.lang.annotation.Pointcut;
+import org.redisson.api.RedissonClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.core.annotation.Order;
+import org.springframework.stereotype.Component;
+
+/**
+ * @Description: 敏感词过滤切面
+ * @Author: cy
+ * @Date: 2022/7/6
+ */
+@Aspect
+@Order(1)
+@Component
+public class CheckWordAspect {
+    @Autowired
+    private WordFilter wordFilter;
+    @Autowired
+    RedissonClient redissonClient;
+
+    @Pointcut("@annotation(com.yonge.cooleshow.biz.dal.wordfilter.annotation.CheckWord)")
+    private void checkWord() {
+    }
+
+    @Around("checkWord()")
+    public Object checkWord(ProceedingJoinPoint joinPoint) throws Throwable {
+        Object[] args = joinPoint.getArgs();
+        String text = JSON.toJSONString(args);
+        if (wordFilter.include(text, 3)) {
+            throw new BizException("文本包含敏感信息:{}", wordFilter.wordList(text));
+        }
+        return joinPoint.proceed();
+    }
+}