I am currently using uni-grams in my word2vec model as follows.
def review_to_sentences( review, tokenizer, remove_stopwords=False ):
#Returns a list of sent
from gensim.models import Phrases
from gensim.models.phrases import Phraser
documents =
["the mayor of new york was there", "machine learning can be useful sometimes","new york mayor was present"]
sentence_stream = [doc.split(" ") for doc in documents]
print(sentence_stream)
bigram = Phrases(sentence_stream, min_count=1, threshold=2, delimiter=b' ')
bigram_phraser = Phraser(bigram)
print(bigram_phraser)
for sent in sentence_stream:
tokens_ = bigram_phraser[sent]
print(tokens_)