特定のテキストを抽出したい

ヘディングのテキスト### 前提・実現したいこと

huggingfaceからDLしたText Classification on GLUEというものを、UNIXのローカルターミナルで実行し、結果として表示されるeval_lossだけを出力し、学習曲線を作成しようとしております。

発生している問題・エラーメッセージ

該当のソースコード

Python
1import datasets
2import numpy as np
3import transformers
4GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
5task = "cola"
6model_checkpoint = "distilbert-base-uncased"
7#batch_size = 64
8batch_size = 4
9from datasets import load_dataset, load_metric
10actual_task = "mnli" if task == "mnli-mm" else task
11dataset = load_dataset("glue", actual_task)
12metric = load_metric('glue', actual_task)
13import datasets
14import random
15import pandas as pd
16from IPython.display import display, HTML
17def show_random_elements(dataset, num_examples=10):
18    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
19    picks = []
20    for _ in range(num_examples):
21        pick = random.randint(0, len(dataset)-1)
22        while pick in picks:
23            pick = random.randint(0, len(dataset)-1)
24        picks.append(pick)
25
26    df = pd.DataFrame(dataset[picks])
27    for column, typ in dataset.features.items():
28        if isinstance(typ, datasets.ClassLabel):
29            df[column] = df[column].transform(lambda i: typ.names[i])
30    display(HTML(df.to_html()))
31from transformers import AutoTokenizer
32tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
33task_to_keys = {
34    "cola": ("sentence", None),
35    "mnli": ("premise", "hypothesis"),
36    "mnli-mm": ("premise", "hypothesis"),
37    "mrpc": ("sentence1", "sentence2"),
38    "qnli": ("question", "sentence"),
39    "qqp": ("question1", "question2"),
40    "rte": ("sentence1", "sentence2"),
41    "sst2": ("sentence", None),
42    "stsb": ("sentence1", "sentence2"),
43    "wnli": ("sentence1", "sentence2"),
44}
45sentence1_key, sentence2_key = task_to_keys[task]
46
47def preprocess_function(examples):
48    if sentence2_key is None:
49        return tokenizer(examples[sentence1_key], truncation=True)
50    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)
51
52encoded_dataset = dataset.map(preprocess_function, batched=True)
53
54from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
55
56num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
57model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
58
59metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
60model_name = model_checkpoint.split("/")[-1]
61
62args = TrainingArguments(
63    f"{model_name}-finetuned-{task}",
64    evaluation_strategy = "epoch",
65    save_strategy = "epoch",
66    learning_rate=2e-5,
67    per_device_train_batch_size=batch_size,
68    per_device_eval_batch_size=batch_size,
69    num_train_epochs=2,
70    weight_decay=0.01,
71    load_best_model_at_end=True,
72    metric_for_best_model=metric_name,
73#   push_to_hub=True,
74)
75
76def compute_metrics(eval_pred):
77    predictions, labels = eval_pred
78    if task != "stsb":
79        predictions = np.argmax(predictions, axis=1)
80    else:
81        predictions = predictions[:, 0]
82#   print('=======================>',eval_pred)
83#   print('=======================>')
84    return metric.compute(predictions=predictions, references=labels)
85
86validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
87trainer = Trainer(
88    model,
89    args,
90    train_dataset=encoded_dataset["train"].select(range(8)),
91    eval_dataset=encoded_dataset[validation_key].select(range(8)),
92    tokenizer=tokenizer,
93    compute_metrics=compute_metrics
94)
95
96trainer.train()
97
98     
99
100import ast
101with open ('d2.txt')as f:
102     for line in f:    
103        di = ast.literal-eval(line)
104     print(di['eval_loss'])
105