# -*- coding: utf-8 -*- # author: huihui # date: 2020/1/31 7:58 下午 ''' 根据语料训练词向量,并保存向量文件 ''' import os import sys import gensim os.reload(sys) sys.setdefaultencoding('utf-8') # 需要提前分词 input_file = "corp_seg.txt" sentences = gensim.models.word2vec.Text8Corpus(input_file) # 训练词向量 model = gensim.models.word2vec.Word2Vec(sentences, sg=1, size=100, window=5, min_count=1, negative=3, sample=0.001, hs=1, workers=40) # 保存词向量文件 model.save("corp_word2vec.model") model.wv.save_word2vec_format("corp_word2vec.txt") # 加载词向量文件 model = gensim.models.word2vec.Word2Vec.load("corp_word2vec.model") model = gensim.models.KeyedVectors.load_word2vec_format("corp_word2vec.txt")
来源:https://www.cnblogs.com/xuehuiping/p/12246510.html