[~/views/word2vec (master *)]
$ git log | head -1
commit 80be14a89b260df5cfca19a65cbfe52ba15db7ba
$ git diff
diff --git a/src/word2vec.c b/src/word2vec.c
index 2f892ea..7bd6392 100644
--- a/src/word2vec.c
+++ b/src/word2vec.c
@@ -309,7 +309,11 @@ void LearnVocabFromTrainFile() {
} else vocab[i].cn++;
if (vocab_size > vocab_hash_size * 0.7) ReduceVocab();
}
+
+ printf("before: </s> index = %d\n", SearchVocab("</s>"));
SortVocab();
+ printf("after: </s> index = %d\n", SearchVocab("</s>"));
+
if (debug_mode > 0) {
printf("Vocab size: %lld\n", vocab_size);
printf("Words in train file: %lld\n", train_words);
$ make -C src
...
$ echo foo bar baz >in.txt
$ bin/word2vec -train in.txt
Starting training using file in.txt
before: </s> index = 0
after: </s> index = -1 <------- oops!
Vocab size: 1
Words in train file: 0