teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

2

とりあえず

2019/02/10 15:21

投稿

hayataka2049
hayataka2049

スコア30939

answer CHANGED
@@ -26,6 +26,6 @@
26
26
  cd = "ぱす〜〜/Wikipedia_dump190121/extracted/"
27
27
  targets = [path for path in files(cd) if "wiki_" in path]
28
28
  q = "恋するフォーチュンクッキー"
29
- result = [x for x in p.map(process, ((t, q) for x in targets)) if x]
29
+ result = [x for x in p.map(process, ((t, q) for t in targets)) if x]
30
30
  print(result)
31
31
  ```

1

下だった

2019/02/10 15:21

投稿

hayataka2049
hayataka2049

スコア30939

answer CHANGED
@@ -9,13 +9,13 @@
9
9
 
10
10
  from bs4 import BeautifulSoup
11
11
 
12
- def files(args):
12
+ def files(path):
13
- path, q = args
14
13
  for pathname, dirnames, filenames in os.walk(path):
15
14
  for filename in filenames:
16
15
  yield os.path.join(pathname, filename)
17
16
 
18
- def process(path):
17
+ def process(args):
18
+ path, q = args
19
19
  with open(path) as f:
20
20
  soup = BeautifulSoup(f, "html.parser")
21
21
  article = soup.find_all("doc", title=q)