scan sensitive word use dfa
基于DFA的敏感词扫描模块
__author__ = "Black"
__email__ = "[email protected]"
__version = "V1.0"
- 支持自定义敏感词文件
dfa = DFA(filename)
- 支持动态添加单个敏感词
dfa.add_word(sensitive_word)
- 支持动态添加多个敏感词
dfa.add_words(sensitive_words)
from app.dfa import DFA
def main():
"""
主函数入口
"""
content = """
毒品包括海洛因,冰毒,大麻等,我国禁止AV,三级片,禁止未成年人吸食毒品,涉黄等行为
"""
filename = "SensitiveWord.txt"
dfa = DFA(filename)
dfa.add_word("毒品")
print(dfa.dfa(content))
if __name__ == '__main__':
main()
>>> (True, {'三级片', '毒品', '海洛因', '冰毒', '大麻'})