HTMLからのテキスト抽出に関する問題

前提・実現したいこと

現在HTMLデータから特定の条件にマッチしたテキストのみを正規表現を用いて抽出し、年表形式で出力するプログラムを作成しています。

しかし自分が設定した正規表現の抽出パターン以外にも有用な抽出テキストを見落としている可能性を加味して新たな抽出パターンを模索していた際に私は、「年号を抽出する正規表現パターンの「absobe_pattern_history」が抽出された際に存在する前後の文も抽出すれば年表に有用な情報を抽出できるのではないかと考えつきました。

ですが、それを実現させるソースコードの書き方が分からないため、そのような挙動を実現させる手法の回答をお願いしたいです。

###ソースコード

#coding:utf-8
import re
from bs4 import BeautifulSoup
import pandas as pd

results_hi = []#年号を格納しておく関数
results_t = []#作品タイトルを格納しておく関数
results_ha = []#出来事を格納しておく関数

absobe_pattern_history = '^U.C.[0-9]{4}'#宇宙世紀の年号

absobe_pattern_title = '^機動戦士ガンダム'#作品タイトル抽出パターン１
absobe_pattern_title2 = '^機動戦士[\t\n\r\f\v一-龥ぁ-んァ-ンa-zA-ZΖ0-9_ー・.]{1,}ガンダム'#作品タイトル抽出パターン２
absobe_pattern_title3 = '^機動戦士ガンダム[ \t\n\r\f\v一-龥ぁ-んァ-ンa-zA-ZΖ0-9_ー・.]{1,}'#作品タイトル抽出パターン３
absobe_pattern_title4 = '^機動戦士[\t\n\r\f\v一-龥ぁ-んァ-ンa-zA-ZΖ0-9_ー・.]{1,}ガンダム[　\t\n\r\f\v一-龥ァ-ンa-xA-ZΖ0-9_ー・]{1,}'#作品タイトル抽出パターン４

absobe_pattern_happen = '[\t\n\r\f\v一-龥ァ-ンa-zA-Z0-9_ー・]{1,}戦争'#出来事抽出パターン１
absobe_pattern_happen2 = '[\t\n\r\f\v一-龥ァ-ンa-zA-Z0-9_ー・]{1,}戦役'#出来事抽出パターン２
absobe_pattern_happen3 = '[\t\n\r\f\v一-龥ァ-ンa-zA-Z0-9_ー・]{1,}紛争'#出来事抽出パターン３
absobe_pattern_happen4 = '[\t\n\r\f\v一-龥ァ-ンa-zA-Z0-9_ー・]{1,}抗争'#出来事抽出パターン４
absobe_pattern_happen5 = '[\t\n\r\f\v一-龥ァ-ンa-zA-Z0-9_ー・]{1,}事件'#出来事抽出パターン５

# encoding は取得したページの文字コードを選択
filepath = '/home/urai/crawling/gandamu/gandamu uchuuseiki/1.html'
with open(filepath , encoding='utf-8') as f:
    html = f.readlines()
    for i in html:
        soup = BeautifulSoup(i, 'html.parser')#SoupでHTMLからデータを取る
        for text in soup.find_all(text=True):#Tabを取り除く
                if text.strip():#テキストであるか判定
                    results_hi.append(re.findall(absobe_pattern_history, text))#条件に適合した文（年号）を抽出、results_hiに格納
                    
                    results_t.append(re.findall(absobe_pattern_title, text))#同上（作品名）、results_tに格納
                    results_t.append(re.findall(absobe_pattern_title2, text))
                    results_t.append(re.findall(absobe_pattern_title3, text))
                    results_t.append(re.findall(absobe_pattern_title4, text))
                    
                    results_ha.append(re.findall(absobe_pattern_happen, text))#同上（出来事）、results_haに格納
                    results_ha.append(re.findall(absobe_pattern_happen2, text))
                    results_ha.append(re.findall(absobe_pattern_happen3, text))
                    results_ha.append(re.findall(absobe_pattern_happen4, text))
                    results_ha.append(re.findall(absobe_pattern_happen5, text))
    f.close()
    
    UC_history = [hi[0] for hi in results_hi if  hi != []]#抽出データから空のリストを取り除いた上で、入れ子構造を解消
    
    UC_title = [t[0] for t in results_t if  t != []]#抽出データから空のリストを取り除いた上で、入れ子構造を解消
    
    UC_happen = [ha[0] for ha in results_ha if  ha != []]#抽出データから空のリストを取り除いた上で、入れ子構造を解消
    
    his_list = sorted(set(UC_history), key=UC_history.index)#重複を排除
    tit_list = sorted(set(UC_title), key=UC_title.index)#上に同じ
    hap_list = sorted(set(UC_happen), key=UC_happen.index)#上に同じ
    
    his_list.sort()#年代順に並び替える
    
# 最長のリストにあわせNone要素を詰め込む
his_l,tit_l,hap_l = [],[],[]
from itertools import zip_longest
for his,tit,hap in zip_longest(his_list,tit_list,hap_list):
    his_l.append(his)#年号のリストの中身を格納
    tit_l.append(tit)#作品名のリストの中身を格納
    hap_l.append(hap)#出来事のリストの中身を格納

df = pd.DataFrame({'年代':his_l,'出来事':hap_l,'作品名':tit_l},columns=['年代','出来事','作品名'])
df.to_html('ret.html',index=False)

補足情報（FW/ツールのバージョンなど）

'/home/urai/crawling/gandamu/gandamu uchuuseiki/1.html'はWikipediaの「宇宙世紀」ページのHTMLデータになります。
元のソースコードは可能な限り消さないようにお願いします。
また、抽出をお願いしたデータはどの関数に格納したのかの明言もお願いします。