python
1import fileinput 2import re 3import json 4import time 5 6 7def remove_markup(str): 8 str1 = re.sub(r"{{.*?}}", "", str) 9 str1 = re.sub(r"'{2,5}", r"", str1) 10 str1 = re.sub(r"[[(.*?)]]", r"\1", str1) 11 str1 = re.sub(r"*", "", str1) 12 str1 = re.sub(r"<a href=.*?>", "", str1) 13 str1 = re.sub(r"<ref.*?>", "", str1) 14 str1 = re.sub(r"<ref[^>]*>.+</ref>", "", str1) 15 str1 = re.sub(r"====", "", str1) 16 str1 = re.sub(r"===", "", str1) 17 str1 = re.sub(r"==", "", str1) 18 str1 = re.sub(r"[http.*?]", "", str1) 19 20 return str1 21 22 23if __name__ == '__main__': 24 start = time.time() 25 docs = {} 26 for line in fileinput.input('-'): 27 obj = json.loads(line) 28 docs[obj['title']] = obj['text'] 29 for lines in docs[obj['title']].split('\t'): 30 removed_str = remove_markup(lines) 31 line_number = 0 #行数 32 allkanji = 0 33 line_number += 1 34 rkan = re.compile("[一-龥]") 35 foundkanji = rkan.findall(removed_str) 36 oneline_kanji = len(foundkanji) / len(removed_str) #漢字の割合 37 allkanji += oneline_kanji 38 per = round((allkanji / line_number) * 100, 1) 39 docs['per'] = per 40 data = sorted(docs.items(), key=lambda docs: docs['per']) 41 print(data, '%') 42 finish_time = time.time() - start 43 print(finish_time, 'sec')
辞書型のdocsにtitleとプログラムで求めたperという値をいれperの値によってソートしたいのですがエラーが出て表示もソートもされません
ソートの仕方を教えてください
エラーはこのようなエラーです
Traceback (most recent call last):
File "kadai.py", line 49, in <module>
data = sorted(docs.items(), key=lambda docs: docs['per'])
File "kadai.py", line 49, in <lambda>
data = sorted(docs.items(), key=lambda docs: docs['per'])
TypeError: tuple indices must be integers or slices, not str