file(filename)がエラーになってしまう。

Question

現在、オライリーの集合知プログラミングで階層クラスリングを学ぶ際に、データファイルを読み込むための関数を作る際にfile(filename)がエラーになってしまいます。

python
1from math import sqrt
2
3
4def readfile(filename):
5    lines = [line for line in file(filename)]
6    # 最初の行は列のタイトル
7    colnames = lines[0].strip().split('\t')[1:]
8    rownames = []
9    data = []
10    for line in lines[1:]:
11        p = line.strip().split('\t')
12        # それぞれの行の最初の列は行の名前
13        rownames.append(p[0])
14        # 行の残りの部分がそのぎょうのデータ
15        data.append([float(x) for x in p[1:]])
16    return rownames, colnames, data
17
18
19# ピアソンによる相関関係のスコアを調べる
20
21
22def pearson(v1, v2):
23    # 単純な合計
24    sum1 = sum(v1)
25    sum2 = sum(v2)
26
27    # 平方の合計
28    sum1Sq = sum([pow(v, 2) for v in v1])
29    sum2Sq = sum([pow(v, 2) for v in v2])
30
31    # 積の合計
32    pSum = sum([v1[i] * v2[i] for i in range(len(v1))])
33
34    # ピアソンによるスコアを算出
35    num = pSum - (sum1 * sum2 / len(v1))
36    den = sqrt((sum1Sq - pow(sum1, 2) / len(v1))
37               * (sum2Sq - pow(sum2, 2) / len(v1)))
38    if den == 0:
39        return 0
40
41    return 1.0 - num / den
42
43# 階層的なツリーを表現するにおいて全てのプロパティを持つ クラスタ型
44
45
46class bicluster:
47    def __init__(self, vec, left=None, right=None, distance=0.0, id=None):
48        self.left = left
49        self.right = right
50        self.vec = vec
51        self.id = id
52        self.distance = distance
53
54
55def hcluster(rows, distance=pearson):
56    distances = {}
57    currentclustid = -1
58    # クラスタは最初は行たち
59    clust = [bicluster(rows[i], id=i) for i in range(len(rows))]
60    while len(clust) > 1:
61        lowestpair = (0, 1)
62        closest = distance(clust[0].vec, clust[1].vec)
63        # 全ての組をループし、もっとも距離の近い組を探す
64        for i in range(len(clust)):
65            for j in range(i + 1, len(clust)):
66                # 距離をキャッシュしてあればそれを使う
67                if (clust[i].id, clust[j].id) not in distances:
68                    distances[(clust[i].id, clust[j].id)] = distance(
69                        clust[i].vec, clust[j].vec)
70                d = distances[(clust[i].id, clust[j].id)]
71
72                if d < closest:
73                    closest = d
74                    lowestpair = (i, j)
75        # 二つのクラスタの平均を計算する
76        mergevec = [
77            (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0
78            for i in range(len(clust[0].vec))]
79        newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
80                               right=clust[lowestpair[1]],
81                               distance=closest, id=currentclustid)
82        # 元のセットではないクラスタのIDは負にする
83        currentclustid -= 1
84        del clust[lowestpair[1]]
85        del clust[lowestpair[0]]
86        clust.append(newcluster)
87    return clust[0]
88

のコードの中の

python
1def readfile(filename):
2    lines = [line for line in file(filename)]
3    # 最初の行は列のタイトル
4    colnames = lines[0].strip().split('\t')[1:]
5    rownames = []
6    data = []
7    for line in lines[1:]:
8        p = line.strip().split('\t')
9        # それぞれの行の最初の列は行の名前
10        rownames.append(p[0])
11        # 行の残りの部分がそのぎょうのデータ
12        data.append([float(x) for x in p[1:]])
13    return rownames, colnames, data

ここのfile(filename)がエラーになってしまいます。

terminal
1 File "<stdin>", line 1, in <module>
2  File "/Users/kajimurarita/Desktop/python_project/clusters.py", line 6, in readfile
3    lines = [line for line in file(filename)]
4NameError: name 'file' is not defined

申し訳ないのですが助言がいただければと思います。よろしくお願いします。

Accepted Answer

```python
   lines = open(filename).readlines()
```

関連した質問