過学習の対処法を理解したい

前提・実現したいこと

下記を実行し、まとめたところVal＿Lossの値が上昇傾向に見られました。
このことから、過学習と判断したため、どのようにすれば下降に向かうか教えていただきたいです。

該当のソースコード

python
1#! pip install datasets transformers
2from huggingface_hub import notebook_login
3
4notebook_login()
5# !apt install git-lfs
6
7import transformers
8
9print(transformers.__version__)
10
11GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
12
13task = "cola"
14model_checkpoint = "distilbert-base-uncased"
15batch_size = 16
16
17from datasets import load_dataset, load_metric
18
19actual_task = "mnli" if task == "mnli-mm" else task
20dataset = load_dataset("glue", actual_task)
21metric = load_metric('glue', actual_task)
22
23dataset
24
25dataset["train"][0]
26
27import datasets
28import random
29import pandas as pd
30from IPython.display import display, HTML
31
32def show_random_elements(dataset, num_examples=10):
33    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
34    picks = []
35    for _ in range(num_examples):
36        pick = random.randint(0, len(dataset)-1)
37        while pick in picks:
38            pick = random.randint(0, len(dataset)-1)
39        picks.append(pick)
40
41    df = pd.DataFrame(dataset[picks])
42    for column, typ in dataset.features.items():
43        if isinstance(typ, datasets.ClassLabel):
44            df[column] = df[column].transform(lambda i: typ.names[i])
45    display(HTML(df.to_html()))
46
47show_random_elements(dataset["train"])
48
49metric
50
51import numpy as np
52
53fake_preds = np.random.randint(0, 2, size=(64,))
54fake_labels = np.random.randint(0, 2, size=(64,))
55metric.compute(predictions=fake_preds, references=fake_labels)
56
57from transformers import AutoTokenizer
58
59tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
60
61tokenizer("Hello, this one sentence!", "And this sentence goes with it.")
62
63task_to_keys = {
64    "cola": ("sentence", None),
65    "mnli": ("premise", "hypothesis"),
66    "mnli-mm": ("premise", "hypothesis"),
67    "mrpc": ("sentence1", "sentence2"),
68    "qnli": ("question", "sentence"),
69    "qqp": ("question1", "question2"),
70    "rte": ("sentence1", "sentence2"),
71    "sst2": ("sentence", None),
72    "stsb": ("sentence1", "sentence2"),
73    "wnli": ("sentence1", "sentence2"),
74}
75
76sentence1_key, sentence2_key = task_to_keys[task]
77if sentence2_key is None:
78    print(f"Sentence: {dataset['train'][0][sentence1_key]}")
79else:
80    print(f"Sentence 1: {dataset['train'][0][sentence1_key]}")
81    print(f"Sentence 2: {dataset['train'][0][sentence2_key]}")
82
83def preprocess_function(examples):
84    if sentence2_key is None:
85        return tokenizer(examples[sentence1_key], truncation=True)
86    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)
87
88preprocess_function(dataset['train'][:5])
89
90
91encoded_dataset = dataset.map(preprocess_function, batched=True)
92
93from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
94
95num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
96model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
97
98metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
99model_name = model_checkpoint.split("/")[-1]
100
101args = TrainingArguments(
102    f"{model_name}-finetuned-{task}",
103    evaluation_strategy = "epoch",
104    save_strategy = "epoch",
105    learning_rate=2e-5,
106    per_device_train_batch_size=batch_size,
107    per_device_eval_batch_size=batch_size,
108    num_train_epochs=5,
109    weight_decay=0.01,
110    load_best_model_at_end=True,
111    metric_for_best_model=metric_name,
112    push_to_hub=True,
113)
114
115def compute_metrics(eval_pred):
116    predictions, labels = eval_pred
117    if task != "stsb":
118        predictions = np.argmax(predictions, axis=1)
119    else:
120        predictions = predictions[:, 0]
121    return metric.compute(predictions=predictions, references=labels)
122
123validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
124trainer = Trainer(
125    model,
126    args,
127    train_dataset=encoded_dataset["train"],
128    eval_dataset=encoded_dataset[validation_key],
129    tokenizer=tokenizer,
130    compute_metrics=compute_metrics
131)
132
133trainer.train()
134