pythonでのファイル読み込み時に、変数の更新が行われない

###前提・実現したいこと
pythonで機械学習の勉強をしています
その時に学習を重ねようとfor文でコードを回しています

###発生している問題・エラーメッセージ
1回目はファイルが読み込まれ、変数の値の更新が行われると問題ないのですが、2回目の繰り返しからコードが飛ばされてしまいます

###該当のソースコード
NAISTのnlpチュートリアルを参考に勉強させてもらっています。NLPチュートリアルの7番目です
pythonを初めて数ヶ月なのでコードがぐちゃぐちゃで読みにくかったり、ここは変えた方がいいと思われるところがあると思うので
そう言ったことも言ってもらえると助かります
また、このチュートリアルには学習方法は乗っているのですがテスト方法が載っていないので自分なりの考え方でテストを行ってしまっているので間違っていたら正しい方法を教えてもらいたいです。
（追記）後半の方にあるc,num, netといった変数が更新されません
一回実行してみるとよく分かります
まずnetには各層の重みベクトル、cにはテストケースにおける二元符号{1, -1}のうちの1が出てくる回数が,numにはテストケースを実行した際の結果で0よりも大きくなる場合をカウントしています。1回目はcとnumの二つの値が各回数分変更がかかり数字が更新されます。netもupdate_weightsメソッドによって変更が加えられ値の更新が行われます。ですが、2回目からどの変数の値も変わらず、何度やっても結果は1回目のものから変わらなくなってしまいます。

python
1# -*- coding utf-8 -*-
2from collections import defaultdict
3import math
4import numpy as np
5
6def word2id(input_file):
7    ids = defaultdict(lambda: len(ids))
8    for line in input_file:
9        y, x = line.rstrip().split('\t')
10        words = x.split()
11        for word in words:
12            ids["UNI:" + word]
13    return ids
14
15def word2id_test(ids, line):
16    words = line.rstrip().split()
17    for word in words:
18        ids["UNI:" + word]
19
20def create_feature(x, ids):
21    phi = [0] * len(ids)
22    words = x.rstrip().split()
23    for word in words:
24        phi[ids["UNI:" + word]] += 1
25    return phi
26
27def forward_nn(net, phi0):
28    phi_list = [phi0, "*", "*"]
29    for i, layer in enumerate(net):
30        w = layer["w"]
31        b = layer["b"]
32        phi_list[i + 1] = np.tanh(np.dot(w, phi_list[i]) + b)
33    return phi_list
34
35def backward_nn(net, phi, y):
36    J = len(net)
37    delta = [0, 0, np.array([y - phi[J][0]])]
38    delta_prime = [0] * (J + 1)
39    for i in range(J - 1, 0, -1):
40        delta_prime[i + 1] = delta[i + 1] * (1 - pow(phi[i + 1], 2))
41        w = net[i]["w"]
42        b = net[i]["b"]
43        delta[i] = np.dot(delta_prime[i + 1], w)
44    return delta_prime
45
46def update_weights(net, phi, delta_prime, _lambda):
47    for i in range(len(net) - 1):
48        w = net[i]["w"]
49        b = net[i]["b"]
50        w += _lambda * np.outer(delta_prime[i + 1], phi[i])
51        b += _lambda * delta_prime[i + 1]
52
53
54if __name__ == "__main__":
55    input_file = open('data/titles-en-train.labeled') 
56    test_file = open("data/titles-en-test.word")
57    ans_file = open('data/titles-en-test.labeled')
58    
59    ids = word2id(input_file)
60    
61    for line in test_file:
62        word2id_test(ids, line)
63    input_file.seek(0)
64    test_file.seek(0)
65
66    feat_lab = []
67
68    for line in input_file:
69        y, x = line.rstrip().split('\t')
70        phi = create_feature(x, ids)
71        feat_lab.append((phi, float(y)))
72 
73    net = [{"w": np.random.rand(2, len(ids)) - 0.5, "b": np.random.rand(2) - 0.5}, {"w": np.random.rand(1, 2) - 0.5, "b": np.random.rand(1) - 0.5}]
74    
75    print(net)
76    
77    ans_file = open("data/titles-en-test.labeled")
78    
79    iterations = 5
80
81    #学習する
82    for i in range(iterations):
83        c = 0
84        num = 0
85        test_file.seek(0)
86        ans_file.seek(0)
87        
88        for phi, y in feat_lab:
89            phi_list = forward_nn(net, phi)
90            delta_prime = backward_nn(net, phi_list, y)
91            update_weights(net, phi, delta_prime, 0.1)
92        
93        for line, line2 in zip(test_file, ans_file):
94            phi = create_feature(line, ids)
95            phi_list = forward_nn(net, phi)
96            y, x = line2.rstrip().split('\t')
97            if float(y) == 1:
98                num += 1
99            if float(phi_list[len(phi_list) - 1][0]) > 0 and float(y) == 1:
100                c += 1
101        print(c / num)

###試したこと
ファイルをseekメソッドを使って先頭に戻すことによってファイルの読み込みはうまくいくようになった？のですが
変数の更新が行われないです
（追記）テストを行わずに学習だけ行うといったこと、つまり

# -*- coding utf-8 -*-
from collections import defaultdict
import math
import numpy as np

def word2id(input_file):
    ids = defaultdict(lambda: len(ids))
    for line in input_file:
        y, x = line.rstrip().split('\t')
        words = x.split()
        for word in words:
            ids["UNI:" + word]
    return ids

def word2id_test(ids, line):
    words = line.rstrip().split()
    for word in words:
        ids["UNI:" + word]

def create_feature(x, ids):
    phi = [0] * len(ids)
    words = x.rstrip().split()
    for word in words:
        phi[ids["UNI:" + word]] += 1
    return phi

def forward_nn(net, phi0):
    phi_list = [phi0, "*", "*"]
    for i, layer in enumerate(net):
        w = layer["w"]
        b = layer["b"]
        phi_list[i + 1] = np.tanh(np.dot(w, phi_list[i]) + b)
    return phi_list

def backward_nn(net, phi, y):
    J = len(net)
    delta = [0, 0, np.array([y - phi[J][0]])]
    delta_prime = [0] * (J + 1)
    for i in range(J - 1, 0, -1):
        delta_prime[i + 1] = delta[i + 1] * (1 - pow(phi[i + 1], 2))
        w = net[i]["w"]
        b = net[i]["b"]
        delta[i] = np.dot(delta_prime[i + 1], w)
    return delta_prime

def update_weights(net, phi, delta_prime, _lambda):
    for i in range(len(net) - 1):
        w = net[i]["w"]
        b = net[i]["b"]
        w += _lambda * np.outer(delta_prime[i + 1], phi[i])
        b += _lambda * delta_prime[i + 1]


if __name__ == "__main__":
    input_file = open('data/titles-en-train.labeled') 
    test_file = open("data/titles-en-test.word")
    ans_file = open('data/titles-en-test.labeled')
    
    ids = word2id(input_file)
    
    for line in test_file:
        word2id_test(ids, line)
    input_file.seek(0)
    test_file.seek(0)

    feat_lab = []

    for line in input_file:
        y, x = line.rstrip().split('\t')
        phi = create_feature(x, ids)
        feat_lab.append((phi, float(y)))
 
    net = [{"w": np.random.rand(2, len(ids)) - 0.5, "b": np.random.rand(2) - 0.5}, {"w": np.random.rand(1, 2) - 0.5, "b": np.random.rand(1) - 0.5}]
    
    print(net)
    
    ans_file = open("data/titles-en-test.labeled")
    
    iterations = 5

    #学習する
    for i in range(iterations):
        c = 0
        num = 0
        test_file.seek(0)
        ans_file.seek(0)
        
        for phi, y in feat_lab:
            phi_list = forward_nn(net, phi)
            delta_prime = backward_nn(net, phi_list, y)
            update_weights(net, phi, delta_prime, 0.1)

のような場合はしっかりとnetの値は更新されます
そのため、このコード以外の場所に問題があると考えました

###補足情報(言語/FW/ツール等のバージョンなど)
python3 ver3.5.2を使用しています
numpyは ver1.12.1を使用しています

tell_k

2017/05/24 08:45

もうすこし具体的に上記コードのどの変数がどのように更新されていないのか書くと回答がつきやすいと思います。

AltT

2017/05/24 22:52

問題の箇所となる値について追記しました。分かりにくくてすいません。

行動規範の内容に同意します

回答2件

自己解決

ファイルの読み込みをしたもののcloseメソッドを使用しなかったためにこういった問題が起こったらしいです。

hoge = open("fuga", "r")

だけあって閉じてないので

hoge = open("fuga", "r")
try:
  ......
finally:
  hoge.close()

または

with open("hoge", "r") as fuga:
  .........

みたいに書けば問題なく動きました

投稿2017/05/26 04:26

AltT

総合スコア7

forループの中に下記コードが含まれているのに、numの値が変わらない（ずっと０のまま）というのであれば、yにどのような値が代入されているのかを調べる事で現象が理解できるのではないでしょうか？

            y, x = line2.rstrip().split('\t')
            if float(y) == 1:
                num += 1

print文をif文の手前に入れて、ｙの値を表示させてみるのが手っ取り早いと思います。 cも変わらないのですから、xの値も併せて表示させると良いです。

投稿2017/05/25 01:26

coco_bauer

総合スコア6915

AltT

2017/05/25 04:25

これは文章から人物について書かれているか書かれていないかという判定をするプログラムでline2にはテストケースとなる文章の答えと同じ文章が一行に書かれています。そのため、yの値には二元符号{-1, 1}のどちらかが格納されています。xには判定する文が格納されています。これらの値は変わることがないのでcとnumの値が変更されないといった問題と関係がないと考えました。おそらく今回問題となっている点はphi_listの最後に格納されている値が更新されないといったものだと考えました。さらに、この値が変更されないということはupdate_weightsメソッドが使用されず、netに格納されているニューラルネットの各層の重みが変更されていないことになるので、なぜにupdate_weightsで変更がかからないのかわからない状況です。