diff --git a/skipthoughts.py b/skipthoughts.py index 1a6011d..2f00c07 100644 --- a/skipthoughts.py +++ b/skipthoughts.py @@ -6,7 +6,7 @@ import theano import theano.tensor as tensor -import cPickle as pkl +import _pickle as pkl import numpy import copy import nltk @@ -16,16 +16,16 @@ from nltk.tokenize import word_tokenize profile = False - +dir_path = os.getcwd() #-----------------------------------------------------------------------------# # Specify model and table locations here #-----------------------------------------------------------------------------# -path_to_models = '/u/rkiros/public_html/models/' -path_to_tables = '/u/rkiros/public_html/models/' +path_to_models = dir_path+'/' +path_to_tables = dir_path+'/' #-----------------------------------------------------------------------------# -path_to_umodel = path_to_models + 'uni_skip.npz' -path_to_bmodel = path_to_models + 'bi_skip.npz' +path_to_umodel = dir_path + '/uni_skip.npz' +path_to_bmodel = dir_path + '/bi_skip.npz' def load_model(): @@ -33,7 +33,8 @@ def load_model(): Load the model with saved tables """ # Load model options - print 'Loading model parameters...' +# print(path_to_models,path_to_umodel) +# print('Loading model parameters...') with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: @@ -48,18 +49,18 @@ def load_model(): btparams = init_tparams(bparams) # Extractor functions - print 'Compiling encoders...' +# print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables - print 'Loading tables...' +# print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary - print 'Packing up...' +# print('Packing up...') model = {} model['uoptions'] = uoptions model['boptions'] = boptions @@ -76,8 +77,9 @@ def load_tables(): Load the tables """ words = [] - utable = numpy.load(path_to_tables + 'utable.npy') - btable = numpy.load(path_to_tables + 'btable.npy') +# print(path_to_tables) + utable = numpy.load(path_to_tables + 'utable.npy',encoding='latin1') + btable = numpy.load(path_to_tables + 'btable.npy',encoding='latin1') f = open(path_to_tables + 'dictionary.txt', 'rb') for line in f: words.append(line.decode('utf-8').strip()) @@ -125,8 +127,8 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False) # Get features. This encodes by length, in order to avoid wasting computation for k in ds.keys(): if verbose: - print k - numbatches = len(ds[k]) / batch_size + 1 + print(k) + numbatches = len(ds[k]) // batch_size + 1 for minibatch in range(numbatches): caps = ds[k][minibatch::numbatches] @@ -194,10 +196,10 @@ def nn(model, text, vectors, query, k=5): scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, s in enumerate(sentences): - print s, sorted_args[i] + print(s, sorted_args[i]) def word_features(table): @@ -221,10 +223,10 @@ def nn_words(table, wordvecs, query, k=10): scores = numpy.dot(qf, wordvecs.T).flatten() sorted_args = numpy.argsort(scores)[::-1] words = [keys[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, w in enumerate(words): - print w + print(w) def _p(pp, name): @@ -239,7 +241,7 @@ def init_tparams(params): initialize Theano shared variables according to the initial parameters """ tparams = OrderedDict() - for kk, pp in params.iteritems(): + for kk, pp in params.items(): tparams[kk] = theano.shared(params[kk], name=kk) return tparams @@ -249,7 +251,9 @@ def load_params(path, params): load parameters """ pp = numpy.load(path) - for kk, vv in params.iteritems(): + print(params.items) + print(pp) + for kk, vv in params.items(): if kk not in pp: warnings.warn('%s is not in the archive'%kk) continue