回答編集履歴
2
とりあえず
answer
CHANGED
@@ -26,6 +26,6 @@
|
|
26
26
|
cd = "ぱす〜〜/Wikipedia_dump190121/extracted/"
|
27
27
|
targets = [path for path in files(cd) if "wiki_" in path]
|
28
28
|
q = "恋するフォーチュンクッキー"
|
29
|
-
result = [x for x in p.map(process, ((t, q) for
|
29
|
+
result = [x for x in p.map(process, ((t, q) for t in targets)) if x]
|
30
30
|
print(result)
|
31
31
|
```
|
1
下だった
answer
CHANGED
@@ -9,13 +9,13 @@
|
|
9
9
|
|
10
10
|
from bs4 import BeautifulSoup
|
11
11
|
|
12
|
-
def files(
|
12
|
+
def files(path):
|
13
|
-
path, q = args
|
14
13
|
for pathname, dirnames, filenames in os.walk(path):
|
15
14
|
for filename in filenames:
|
16
15
|
yield os.path.join(pathname, filename)
|
17
16
|
|
18
|
-
def process(
|
17
|
+
def process(args):
|
18
|
+
path, q = args
|
19
19
|
with open(path) as f:
|
20
20
|
soup = BeautifulSoup(f, "html.parser")
|
21
21
|
article = soup.find_all("doc", title=q)
|