I have downloaded pretrained glove vector file from the internet. It is a .txt file. I am unable to load and access it. It is easy to load and access a word vector binary file u
This code takes some time to store glove embeddings on shelf, but loading it is quite faster as compared to other approaches.
import os
import numpy as np
from contextlib import closing
import shelve
def store_glove_to_shelf(glove_file_path,shelf):
print('Loading Glove')
with open(os.path.join(glove_file_path)) as f:
for line in f:
values = line.split()
word = values[0]
vec = np.asarray(values[1:], dtype='float32')
shelf[word] = vec
shelf_file_name = "glove_embeddings"
glove_file_path = "glove/glove.840B.300d.txt"
# Storing glove embeddings to shelf for faster load
with closing(shelve.open(shelf_file_name + '.shelf', 'c')) as shelf:
store_glove_to_shelf(glove_file_path,shelf)
print("Stored glove embeddings from {} to {}".format(glove_file_path,shelf_file_name+'.shelf'))
# To reuse the glove embeddings stored in shelf
with closing(shelve.open(shelf_file_name + '.shelf')) as embeddings_index:
# USE embeddings_index here , which is a dictionary
print("Loaded glove embeddings from {}".format(shelf_file_name+'.shelf'))
print("Found glove embeddings with {} words".format(len(embeddings_index)))