ヘディングのテキスト### 前提・実現したいこと
huggingfaceからDLしたText Classification on GLUEというものを、UNIXのローカルターミナルで実行し、結果として表示されるeval_lossだけを出力し、学習曲線を作成しようとしております。
発生している問題・エラーメッセージ
該当のソースコード
Python
1import datasets 2import numpy as np 3import transformers 4GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"] 5task = "cola" 6model_checkpoint = "distilbert-base-uncased" 7#batch_size = 64 8batch_size = 4 9from datasets import load_dataset, load_metric 10actual_task = "mnli" if task == "mnli-mm" else task 11dataset = load_dataset("glue", actual_task) 12metric = load_metric('glue', actual_task) 13import datasets 14import random 15import pandas as pd 16from IPython.display import display, HTML 17def show_random_elements(dataset, num_examples=10): 18 assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset." 19 picks = [] 20 for _ in range(num_examples): 21 pick = random.randint(0, len(dataset)-1) 22 while pick in picks: 23 pick = random.randint(0, len(dataset)-1) 24 picks.append(pick) 25 26 df = pd.DataFrame(dataset[picks]) 27 for column, typ in dataset.features.items(): 28 if isinstance(typ, datasets.ClassLabel): 29 df[column] = df[column].transform(lambda i: typ.names[i]) 30 display(HTML(df.to_html())) 31from transformers import AutoTokenizer 32tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True) 33task_to_keys = { 34 "cola": ("sentence", None), 35 "mnli": ("premise", "hypothesis"), 36 "mnli-mm": ("premise", "hypothesis"), 37 "mrpc": ("sentence1", "sentence2"), 38 "qnli": ("question", "sentence"), 39 "qqp": ("question1", "question2"), 40 "rte": ("sentence1", "sentence2"), 41 "sst2": ("sentence", None), 42 "stsb": ("sentence1", "sentence2"), 43 "wnli": ("sentence1", "sentence2"), 44} 45sentence1_key, sentence2_key = task_to_keys[task] 46 47def preprocess_function(examples): 48 if sentence2_key is None: 49 return tokenizer(examples[sentence1_key], truncation=True) 50 return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True) 51 52encoded_dataset = dataset.map(preprocess_function, batched=True) 53 54from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer 55 56num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2 57model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels) 58 59metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy" 60model_name = model_checkpoint.split("/")[-1] 61 62args = TrainingArguments( 63 f"{model_name}-finetuned-{task}", 64 evaluation_strategy = "epoch", 65 save_strategy = "epoch", 66 learning_rate=2e-5, 67 per_device_train_batch_size=batch_size, 68 per_device_eval_batch_size=batch_size, 69 num_train_epochs=2, 70 weight_decay=0.01, 71 load_best_model_at_end=True, 72 metric_for_best_model=metric_name, 73# push_to_hub=True, 74) 75 76def compute_metrics(eval_pred): 77 predictions, labels = eval_pred 78 if task != "stsb": 79 predictions = np.argmax(predictions, axis=1) 80 else: 81 predictions = predictions[:, 0] 82# print('=======================>',eval_pred) 83# print('=======================>') 84 return metric.compute(predictions=predictions, references=labels) 85 86validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation" 87trainer = Trainer( 88 model, 89 args, 90 train_dataset=encoded_dataset["train"].select(range(8)), 91 eval_dataset=encoded_dataset[validation_key].select(range(8)), 92 tokenizer=tokenizer, 93 compute_metrics=compute_metrics 94) 95 96trainer.train() 97 98 99 100import ast 101with open ('d2.txt')as f: 102 for line in f: 103 di = ast.literal-eval(line) 104 print(di['eval_loss']) 105
試したこと
初歩的なことかと思われますが、何卒よろしくお願いいたします。
補足情報(FW/ツールのバージョンなど)
また、sed -e '$d' d.txt > d2.txt、最後の1行を削除するプログラムを実行すると下記のようにevalが頭につくものがd2.txtにすべて抽出されます。
最後に、
import ast
with open ('d2.txt')as f:
for line in f:
di = ast.literal-eval(line)
print(di['eval_loss'])
をa.pyとし、実行すると2つあるeval_lossのうちの1つしか表示されず、またd2.txtにこの結果が反映されません。
私が望む条件といたしましては、実行されたすべてのeval_lossを.txt化したく思います。
回答1件
あなたの回答
tips
プレビュー