import numpy as np import chainer from chainer import cuda, Function,Variable,optimizers,serializers,utils from chainer import Link,Chain,ChainList import chainer.functions as F import chainer.links as L xp = cuda.cupy vocab = {} def load_data(filename): global vocab words = open(filename).read().replace('\n','<eos>').strip().split() dataset = xp.ndarray((len(words),), dtype=xp.int32) for i, word in enumerate(words): if word not in vocab: vocab[word] = len(vocab) dataset[i] = vocab[word] return dataset train_data = load_data('ptb.train.txt') eos_id = vocab['<eos>'] class MyLSTM(chainer.Chain): def __init__(self,layer,v,k,dout): super(MyLSTM,self).__init__(embed = L.EmbedID(v,k), H = L.NStepLSTM(layer,k,k,dout), W = L.Linear(k,v),) def __call__(self,hx,cx,xs,t): accum_loss = None xembs = [ self.embed(x) for x in xs] xss = tuple(xembs) hy,cy,ys = self.H(hx,cx,xss) y = [self.W(item) for item in ys] for i in range(len(y)): tx = Variable(xp.array(t[i],dtype = xp.int32)) loss = F.softmax_cross_entropy(y[i],tx) accum_loss = loss if accum_loss is None else accum_loss + loss return accum_loss demb = 100 model = MyLSTM(2,len(vocab),demb,0.5) cuda.get_device(0).use() model.to_gpu() optimizer = optimizers.Adam() optimizer.setup(model) bc = 0 xs = [] t = [] for epoch in range(3): s = [] for pos in range(len(train_data)): id = train_data[pos] if(id != eos_id): s += [id] else: bc += 1 next_s = s[1:] next_s += [ eos_id ] xs += [ xp.asarray(s, dtype = xp.int32)] t += [ xp.asanyarray(next_s,dtype = xp.int32)] s = [] if (bc == 10): model.cleargrads() hx = Variable(xp.zeros((2,len(xs),demb),dtype = xp.float32)) cx = Variable(xp.zeros((2,len(xs),demb),dtype = xp.float32)) loss = model(hx,cx,xs,t) loss.backward() optimizer.update() xs = [] t = [] bc = 0 if (pos % 100 == 0): print (pos, "/", len(train_data)," finished") outfile = "nsbatch-" + str(epoch) + ".model" serializers.save_npz(outfile,model)
上記のプログラムで1epochごとにその時点のmodelをファイルに書き出しています
下記のプログラムで言語モデルの評価をしようとしているのですが、うまくいきません。
https://github.com/tomsercu/lstm のdata/ptb.train.txt,ptb.test.txtを用いています
import numpy as np import chainer from chainer import cuda, Function,Variable,optimizers,serializers,utils from chainer import Link,Chain,ChainList import chainer.functions as F import chainer.links as L import math import sys argvs = sys.argv xp = cuda.cupy vocab = {} def load_data(filename): global vocab words = open(filename).read().replace('\n','<eos>').strip().split() dataset = xp.ndarray((len(words),), dtype=xp.int32) for i, word in enumerate(words): if word not in vocab: vocab[word] = len(vocab) dataset[i] = vocab[word] return dataset class MyLSTM(chainer.Chain): def __init__(self,layer,v,k,dout): super(MyLSTM,self).__init__(embed = L.EmbedID(v,k), H = L.NStepLSTM(layer,k,k,dout), W = L.Linear(k,v),) def __call__(self,hx,cx,xs,t): accum_loss = None xembs = [ self.embed(x) for x in xs] xss = tuple(xembs) hy,cy,ys = self.H(hx,cx,xss) y = [self.W(item) for item in ys] for i in range(len(y)): tx = Variable(xp.array(t[i],dtype = xp.int32)) loss = F.softmax_cross_entropy(y[i],tx) accum_loss = loss if accum_loss is None else accum_loss + loss return accum_loss train_data = load_data('ptb.train.txt') demb = 100 def cal_ps(model,s): h = Variable(xp.zeros((1,demb),dtype=xp.float32)) sum = 0.0 for i in range (1,len(s)): w1,w2=s[i-1],s[i] # x_k = model.embed(Variable(xp.array([w1],dtype=xp.int32))) # h = F.tanh(x_k + model.H(h)) yv = F.softmax(model.W(h)) pi = yv.data[0][w2] sum -= math.log(pi,2) return sum eos_id = vocab['<eos>'] max_id = len(vocab) test_data = load_data('ptb.test.txt') test_data = test_data[0:1000] model = MyLSTM(2,len(vocab),demb,0.5) cuda.get_device(0).use() model.to_gpu() #optimizer = optimizers.Adam() #optimizer.setup(model) serializers.load_npz('nsbatch-1.model',model) sum = 0.0 wnum = 0 bc = 0 xs = [] t = [] s = [] unk_word = 0 for pos in range(len(test_data)): id = test_data[pos] if(id > max_id): unk_word = 1 else: bc += 1 next_s = s[1:] next_s += [ eos_id ] xs += [ xp.asarray(s, dtype = xp.int32)] t += [ xp.asanyarray(next_s,dtype = xp.int32)] s = [] if (bc == 10): if(unk_word != 1): ps = cal_ps(model,s) sum += ps wnum += len(s) - 1 else: unk_word = 0 bc = 0 xs = [] t = [] print (math.pow(2,sum/wnum))
あなたの回答
tips
プレビュー