- data/wiki.zh.vec You can download pretrained-vectors, or you can train word vector by yourself.
python setup.py install
Use Spacy + jieba
language: "zh"
pipeline:
- name: "nlp_spacy"
model: "data/zh_models"
- name: "tokenizer_jieba"
- name: "intent_entity_featurizer_regex"
- name: "intent_featurizer_spacy"
- name: "ner_crf"
- name: "ner_synonyms"
- name: "intent_classifier_sklearn"
language: "zh"
pipeline:
- name: "nlp_spacy"
model: "data/zh_models"
- name: "tokenizer_jieba"
default_dict: "./default_dict.big"
user_dicts: "./jieba_userdict"
- name: "intent_entity_featurizer_regex"
- name: "intent_featurizer_spacy"
- name: "ner_crf"
- name: "ner_synonyms"
- name: "intent_classifier_sklearn"
python -m rasa_nlu.load_vector data/wiki.zh.vec zh zh_models
python -m rasa_nlu.train -c sample_configs/config_spacy_jieba.yml --data data/examples/rasa/demo-rasa_zh.json --path models
python -m rasa_nlu.server -c sample_configs/config_spacy_jieba.yml --path models
$ curl -XPOST localhost:5000/parse -d '{"q":"我发烧了该吃什么药?", "model": "model_20180821-115735"}' | python -mjson.tool
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 652 0 552 100 100 157 28 0:00:03 0:00:03 --:--:-- 157
{
"entities": [
{
"end": 3,
"entity": "disease",
"extractor": "ner_mitie",
"start": 1,
"value": "发烧"
}
],
"intent": {
"confidence": 0.5397186422631861,
"name": "medical"
},
"intent_ranking": [
{
"confidence": 0.5397186422631861,
"name": "medical"
},
{
"confidence": 0.16206323981749196,
"name": "restaurant_search"
},
{
"confidence": 0.1212448457737397,
"name": "affirm"
},
{
"confidence": 0.10333600028547868,
"name": "goodbye"
},
{
"confidence": 0.07363727186010374,
"name": "greet"
}
],
"text": "我发烧了该吃什么药?"
}