編集履歴

質問編集履歴

ソースコードの誤りを是正

2018/11/30 02:31

投稿

gran-1123

スコア18

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -106,21 +106,21 @@
                     results_t.append(re.findall(absobe_pattern_title, text))#results_tに格納
-                    results_t2.append(re.findall(absobe_pattern_title2, text))
+                    results_t.append(re.findall(absobe_pattern_title2, text))
-                    results_t3.append(re.findall(absobe_pattern_title3, text))
+                    results_t.append(re.findall(absobe_pattern_title3, text))
-                    results_t4.append(re.findall(absobe_pattern_title4, text))
+                    results_t.append(re.findall(absobe_pattern_title4, text))
                     results_ha.append(re.findall(absobe_pattern_happen, text))
-                    results_ha2.append(re.findall(absobe_pattern_happen2, text))
+                    results_ha.append(re.findall(absobe_pattern_happen2, text))
-                    results_ha3.append(re.findall(absobe_pattern_happen3, text))
+                    results_ha.append(re.findall(absobe_pattern_happen3, text))
-                    results_ha4.append(re.findall(absobe_pattern_happen4, text))
+                    results_ha.append(re.findall(absobe_pattern_happen4, text))
     f.close()

回答者への更なる質問へ対応するためにソースコード変更、画像追加

2018/11/30 02:31

投稿

gran-1123

スコア18

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -44,6 +44,8 @@
 from bs4 import BeautifulSoup
+import pandas as pd
 results_hi = []#年号を格納しておく関数
@@ -52,24 +54,10 @@
 results_t = []#作品タイトルを格納しておく関数パターン１
-results_t2 = []#作品タイトルを格納しておく関数パターン２
-results_t3 = []#作品タイトルを格納しておく関数パターン３
-results_t4 = []#作品タイトルを格納しておく関数パターン４
 results_ha = []#出来事を格納しておく関数パターン１
-results_ha2 = []#出来事を格納しておく関数パターン２
-results_ha3 = []#出来事を格納しておく関数パターン３
-results_ha4 = []#出来事を格納しておく関数パターン４
 absobe_pattern_history = '^U.C.[0-9]{4}'#宇宙世紀の年号
@@ -114,21 +102,25 @@
                     results_hi.append(re.findall(absobe_pattern_history, text))#条件に適合した文を抽出、results_hiに格納
                     results_t.append(re.findall(absobe_pattern_title, text))#results_tに格納
-                    results_t2.append(re.findall(absobe_pattern_title2, text))#results_t2に格納
+                    results_t2.append(re.findall(absobe_pattern_title2, text))
-                    results_t3.append(re.findall(absobe_pattern_title3, text))#results_t3に格納
+                    results_t3.append(re.findall(absobe_pattern_title3, text))
-                    results_t4.append(re.findall(absobe_pattern_title4, text))#results_t4に格納
+                    results_t4.append(re.findall(absobe_pattern_title4, text))
-                    results_ha.append(re.findall(absobe_pattern_happen, text))#results_haに格納
+                    results_ha.append(re.findall(absobe_pattern_happen, text))
-                    results_ha2.append(re.findall(absobe_pattern_happen2, text))#results_ha2に格納
+                    results_ha2.append(re.findall(absobe_pattern_happen2, text))
-                    results_ha3.append(re.findall(absobe_pattern_happen3, text))#results_ha3に格納
+                    results_ha3.append(re.findall(absobe_pattern_happen3, text))
-                    results_ha4.append(re.findall(absobe_pattern_happen4, text))#results_ha4に格納
+                    results_ha4.append(re.findall(absobe_pattern_happen4, text))
     f.close()
@@ -140,45 +132,17 @@
     UC_title = [t[0] for t in results_t if  t != []]#抽出データから空っぽのデータを取り除いた上で、入れ子構造を解消
-    UC_title2 = [t[0] for t in results_t2 if  t != []]#上に同じ
-    UC_title3 = [t[0] for t in results_t3 if  t != []]#上に同じ
-    UC_title4 = [t[0] for t in results_t4 if  t != []]#上に同じ
     UC_happen = [ha[0] for ha in results_ha if  ha != []]#抽出データから空っぽのデータを取り除いた上で、入れ子構造を解消
-    UC_happen2 = [ha[0] for ha in results_ha2 if  ha != []]#上に同じ
-    UC_happen3 = [ha[0] for ha in results_ha3 if  ha != []]#上に同じ
-    UC_happen4 = [ha[0] for ha in results_ha4 if  ha != []]#上に同じ
-    tit = UC_title + UC_title2#作品名パターンを一つのリストに統合
-    tit2 = tit + UC_title3
-    tit3 = tit2 + UC_title4
-    hap = UC_happen + UC_happen2
-    hap2 = hap + UC_happen3
-    hap3 = hap2 + UC_happen4
     his_list = sorted(set(UC_history), key=UC_history.index)#重複を排除
-    tit_list = sorted(set(tit3), key=tit3.index)#上に同じ
+    tit_list = sorted(set(UC_title), key=UC_title.index)#上に同じ
-    hap_list = sorted(set(hap3), key=hap3.index)#上に同じ
+    hap_list = sorted(set(UC_happen), key=UC_happen.index)#上に同じ
@@ -194,6 +158,28 @@
+# 最長のリストにあわせNone要素を詰め込む
+his_l,tit_l,hap_l = [],[],[]
+from itertools import zip_longest
+for his,tit,hap in zip_longest(his_list,tit_list,hap_list):
+    his_l.append(his)
+    tit_l.append(tit)
+    hap_l.append(hap)
+df = pd.DataFrame({'年代':his_l,'出来事':hap_l,'作品名':tit_l},columns=['年代','出来事','作品名'])
+df.to_html('ret.html',index=False)
 ```
 ###出力結果
@@ -221,3 +207,9 @@
 /home/urai/crawling/gandamu/gandamu uchuuseiki/1.htmlの内容は
 Wikipedia記事「宇宙世紀」のHTMLデータとなっております。
+解答によって年表形式で出力できるようになると思ったのですが、回答者のように出力できません。
+何が問題でしょうか？![イメージ説明](d6c898bfaf60d62dca812368cf149489.png)