前提
windows10
python
google colab
コード
python
1コメントに記載 2
発生している問題・エラーメッセージ
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-42-aeceac67f609>", line 249, in <module> meisi2_list = [literal_eval(i) for i in meisi_list] File "<ipython-input-42-aeceac67f609>", line 249, in <listcomp> meisi2_list = [literal_eval(i) for i in meisi_list] File "/usr/lib/python3.8/ast.py", line 59, in literal_eval node_or_string = parse(node_or_string, mode='eval') File "/usr/lib/python3.8/ast.py", line 47, in parse return compile(source, filename, mode, flags, File "<unknown>", line unknown ^ SyntaxError: unexpected EOF while parsing
pycharmでの実行時はエラーは出ませんでした
試したこと
こちら(https://blog.pyq.jp/entry/python_kaiketsu_181120) や(https://teratail.com/questions/101679) を基に文法がおかしいところは警告文でも直しました.
補足情報(FW/ツールのバージョンなど)
ここにより詳細な情報を記載してください。
コードを提示しよう
import collections
import MeCab
import csv
import sqlite3
import re
import time
import linecache
from ast import literal_eval
#from fabric.colors import red, blue
t1 = time.time()
print('”表記ゆれを提示する”')
color_dic = {'yellow':'\033[43m', 'red':'\033[31m', 'blue':'\033[34m', 'end':'\033[0m'}
#キーワードを赤く表示する。
def print_tag(text, keyword,color="red"):
keyword.sort(key=len, reverse=True)
pat = f'({"|".join(keyword)})' # (母親|母)
ret = re.sub(pat, lambda m: m.group(1), text)
print(ret,)
print(f"時間:{t1}")
# 音声ガイド台本の読み込み textファイル
# ****には保存したtxtファイルのパス名を入れてください
f = open('/content/drive/My Drive/hyouki.txt', 'r', encoding='UTF-8')
text = f.read()
f.close()
#読み込んだtextファイルで形態素解析を行う
tagger =MeCab.Tagger()
tagger.parse('')
node = tagger.parseToNode(text)
print(f"時間2:{t1}")
#取り出す品詞を決めている.今回は名詞
word_list=[]
while node:
word_type = node.feature.split(',')[0]
#名詞の他にも動詞や形容詞なども追加できる
if word_type in ["名詞"]:
word_list.append(node.surface)
node=node.next
word_chain=' '.join(word_list)
data_list=[]
for i in range(len(word_list)):
data_list.append([word_list[i]])
with open("/content/drive/My Drive/mecab.csv", "w") as f:
w = csv.writer(f, delimiter=",")
for new_data_list in data_list :
w.writerow(new_data_list)
conn = sqlite3.connect("/content/drive/My Drive/wnjpn.db")
csvfile = '/content/drive/My Drive/mecab.csv'
outfile = '/content/drive/My Drive/similar_words.txt'
def csv_input(path_name):
rows = []
with open(path_name, encoding='"UTF-8"') as f:
reader = csv.reader(f)
for row in reader:
rows.append(row)
return rows
def searchsimilarwords(word):
word = ','.join(word)
cur = conn.execute("select wordid from word where lemma='%s'" % word)
word_id = 99999999 #temp
for row in cur:
word_id = row[0]
# Wordnetに存在する語であるかの判定
if word_id==99999999:
return
cur = conn.execute("select synset from sense where wordid='%s'" % word_id)
synsets = []
for row in cur:
synsets.append(row[0])
simdict = []
for synset in synsets:
cur1 = conn.execute("select name from synset where synset='%s'" % synset)
cur2 = conn.execute("select def from synset_def where (synset='%s' and lang='jpn')" % synset)
cur3 = conn.execute("select wordid from sense where (synset='%s' and wordid!=%s)" % (synset, word_id))
for row3 in cur3:
target_word_id = row3[0]
cur3_1 = conn.execute("select lemma from word where wordid=%s" % target_word_id)
for row3_1 in cur3_1:
# 類似語をリストに格納
simdict.append(row3_1[0])
return simdict
def create_similar_wordlst(full_word):
parent = []
child = []
with open(csvfile, encoding='"UTF-8"') as f:
reader = csv.reader(f)
for row in reader:
child = []
synonym = searchsimilarwords(row)
if not synonym is None:
row = ','.join(row)
child.append(row)
for f_row in full_word:
f_row = ','.join(f_row)
for syn in synonym:
if f_row == syn:
child.append(syn)
if len(child) > 1:
parent.append(set(child))
# print(parent)
return parent
def save_synonyms(lst):
norlst = []
for row in lst:
row = list(row)
row = ','.join(row)
norlst.append(row)
norlst = set(norlst)
with open(outfile, mode='w') as f ,open('/content/drive/My Drive/類義語3.txt', 'w', encoding='UTF-8') as f1:
for row in norlst:
f.write(row+'\n')
kw_list = row.split(",")
##print(kw_list)
print(kw_list , file=f1)
print(kw_list)
##print(row)
print("以下が類義語と検出されました")
txt_list = text.split('\n')
with open('/content/drive/My Drive/名詞.txt', 'w', encoding='UTF-8') as f3:
for n in range(len(txt_list)):
# 読み込んだtextファイルで形態素解析を行う
tagger = MeCab.Tagger()
tagger.parse('')
node = tagger.parseToNode(txt_list[n])
# 取り出す品詞を決めている.今回は名詞
word_list = []
while node:
word_type = node.feature.split(',')[0]
# 名詞の他にも動詞や形容詞なども追加できる
if word_type in ["名詞"]:
word_list.append(node.surface)
keitaiso_list = []
for element in word_list:
if element not in keitaiso_list:
keitaiso_list.append(element)
node = node.next
word_chain = ' '.join(word_list)
##print(word_chain,file=f1)
##print(word_list)
print(keitaiso_list,file=f3)
def main():
full_word = csv_input(csvfile)
save_synonyms(create_similar_wordlst(full_word))
if __name__ == "__main__":
main()
with open('/content/drive/My Drive/類義語3.txt', 'r', encoding='UTF-8') as f1, open('/content/drive/My Drive/名詞.txt', 'r', encoding='UTF-8') as f2 ,open('/content/drive/My Drive/削除済み.txt', 'w', encoding='UTF-8') as f3:
total_lines = sum(1 for line in f1)
total_lines2 = sum(1 for line in f2)
ruigigo_list = []
ruigigo2_list = []
ruigigo3_list = []
for m in range(total_lines2):
meisi_list = []
data2 = linecache.getline('/content/drive/My Drive/名詞.txt', m + 1).strip()
meisi_list.append(data2)
meisi2_list = [literal_eval(i) for i in meisi_list]
meisi3_list = []
for e in meisi2_list:
if isinstance(e, list):
meisi3_list += e
else:
meisi3_list.append(e)
##print(meisi3_list)
for k in range(total_lines):
ruigigo_list = []
data = linecache.getline('/content/drive/My Drive/類義語3.txt', k + 1).strip()
ruigigo_list.append(data)
ruigigo2_list = [literal_eval(i) for i in ruigigo_list]
ruigigo3_list = []
for e in ruigigo2_list:
if isinstance(e, list):
ruigigo3_list += e
else:
ruigigo3_list.append(e)
n = set(word for word in meisi3_list if word in ruigigo3_list)
if len(n) >= 2:
meisi3_list = [word for word in meisi3_list if word not in n]
print(meisi3_list , file=f3)
f1.close()
f2.close()
f3.close()
with open('/content/drive/My Drive/hyouki.txt', 'r', encoding='UTF-8') as f1, open('/content/drive/My Drive/二つの文章.txt', 'w', encoding='UTF-8') as f2:
total_lines = sum(1 for line in f1)
##print(total_lines)
ruigigo1_list = []
for i in range(total_lines):
ruigigo4_list = []
data = linecache.getline('/content/drive/My Drive/hyouki.txt', i + 1).strip()
"""data2 = linecache.getline('hyouki.txt', i + 2).strip()
print(data, data2, file=f2)"""
for m in range(total_lines):
if m + 1 > i + 1:
data2 = linecache.getline('/content/drive/My Drive/hyouki.txt', m + 1).strip()
print(data,'///',data2 , file=f2)
f1.close()
f2.close()
with open('/content/drive/My Drive/削除済み.txt', 'r', encoding='UTF-8') as f1,open('/content/drive/My Drive/二つの形態素.txt', 'w', encoding='UTF-8') as f2:
total_lines = sum(1 for line in f1)
##for i in range(total_lines):
for i in range(total_lines):
data = linecache.getline('/content/drive/My Drive/削除済み.txt', i + 1).strip()
##print(data)
for m in range(total_lines):
txt_list = []
if m + 1 > i + 1:
data2 = linecache.getline('/content/drive/My Drive/削除済み.txt', m + 1).strip()
txt_list.append(data)
txt_list.append(data2)
##print(txt_list)
txt2_list = [literal_eval(i) for i in txt_list]
txt3_list = []
for e in txt2_list:
if isinstance(e, list):
txt3_list += e
else:
txt3_list.append(e)
txt4_list = []
for element in txt3_list:
if element not in txt4_list:
txt4_list.append(element)
print(txt4_list , file = f2)
f1.close()
f2.close()
with open('/content/drive/My Drive/二つの形態素.txt', 'r', encoding='UTF-8') as f1,open('/content/drive/My Drive/類義語3.txt', 'r', encoding='UTF-8') as f2,open('/content/drive/My Drive/hyouki.txt','r',encoding='UTF-8') as f3,open('/content/drive/My Drive/二つの文章.txt', 'r', encoding='UTF-8') as f4:
total_lines = sum(1 for line in f1)
total_lines2 = sum(1 for line in f2)
a = sum(1 for line in f3)
for i in range(total_lines2):
ruigigo_list = []
data = linecache.getline('/content/drive/My Drive/類義語3.txt', i + 1).strip()
ruigigo_list.append(data)
ruigigo2_list = [literal_eval(i) for i in ruigigo_list]
ruigigo3_list = []
for e in ruigigo2_list:
if isinstance(e, list):
ruigigo3_list += e
else:
ruigigo3_list.append(e)
print('\n')
print(ruigigo3_list)
for m in range(total_lines):
txt5_list = []
data2 = linecache.getline('/content/drive/My Drive/二つの形態素.txt', m + 1).strip()
txt5_list.append(data)
txt5_list.append(data2)
txt6_list = [literal_eval(i) for i in txt5_list]
txt7_list = []
for e in txt6_list:
if isinstance(e, list):
txt7_list += e
else:
txt7_list.append(e)
##print(txt7_list)
txt8_list = [k for k, v in collections.Counter(txt7_list).items() if v >= 2]
if len(txt8_list) >= 2:
for t in range(1,a):#読み取る台本の行数
if (t - 1)*(2 * a - t)+2 <= 2*(m + 1) <= t*((2 * a - 1) - t):
質問文は編集できるので、そっちの方に追記しよう。
コードは、「コードの挿入」でいれよーねっ
文字数が多くなりすぎてしまい挿入できませんでした.
申し訳ありません
