質問編集履歴
4
他サイトでも相談させていただいている旨追記。
test
CHANGED
File without changes
|
test
CHANGED
@@ -158,7 +158,9 @@
|
|
158
158
|
|
159
159
|
こちらをmodel.tar.gzに圧縮してs3に設置しています。
|
160
160
|
|
161
|
-
|
161
|
+
急ぎの案件のため、他サイトでも相談させてもらっています。
|
162
|
+
進捗があった際にはこちらにも共有いたします。
|
163
|
+
https://ja.stackoverflow.com/questions/91142/sagemaker%e7%92%b0%e5%a2%83%e3%81%ab%e3%81%a6%e3%83%87%e3%83%97%e3%83%ad%e3%82%a4%e3%81%ab%e5%a4%b1%e6%95%97%e3%81%99%e3%82%8b
|
162
164
|
|
163
165
|
対処方法をご存知の方がいましたら、ご教授いただけますと幸いです。
|
164
166
|
|
3
entry_pointのコード追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -26,6 +26,131 @@
|
|
26
26
|
predictor = pytorch_model.deploy(instance_type='ml.t2.2xlarge', initial_instance_count=1)
|
27
27
|
```
|
28
28
|
|
29
|
+
※entry_pointの推論コード
|
30
|
+
```iference.py
|
31
|
+
import os
|
32
|
+
import time
|
33
|
+
import torch
|
34
|
+
import pyopenjtalk
|
35
|
+
from espnet2.bin.tts_inference import Text2Speech
|
36
|
+
import matplotlib.pyplot as plt
|
37
|
+
from espnet2.tasks.tts import TTSTask
|
38
|
+
from espnet2.text.token_id_converter import TokenIDConverter
|
39
|
+
import numpy as np
|
40
|
+
|
41
|
+
import argparse
|
42
|
+
import text_processing as texp
|
43
|
+
import os
|
44
|
+
|
45
|
+
import boto3
|
46
|
+
|
47
|
+
prosodic = True
|
48
|
+
|
49
|
+
model_dir = "model/"
|
50
|
+
vocoder_dir = "vocoder/"
|
51
|
+
CONTENT_TYPE = "text/plain"
|
52
|
+
|
53
|
+
train_config = "model/config.yaml"
|
54
|
+
model_file = "model/50epoch.pth"
|
55
|
+
# train_config=""
|
56
|
+
# model_file="
|
57
|
+
|
58
|
+
vocoder_tag = "parallel_wavegan/jsut_hifigan.v1"
|
59
|
+
# ボコーダを指定
|
60
|
+
vocoder_config = "vocoder/config.yaml"
|
61
|
+
vocoder_file = "vocoder/50epoch.pth"
|
62
|
+
|
63
|
+
|
64
|
+
def model_fn(model_dir):
|
65
|
+
print(model_dir + "config.yaml")
|
66
|
+
print(model_dir + "100epoch.pth")
|
67
|
+
model = Text2Speech.from_pretrained(
|
68
|
+
train_config=model_dir + "config.yaml",
|
69
|
+
model_file=model_dir + "100epoch.pth",
|
70
|
+
vocoder_tag=vocoder_tag,
|
71
|
+
device="cpu",
|
72
|
+
speed_control_alpha=1.0,
|
73
|
+
noise_scale=0.333,
|
74
|
+
noise_scale_dur=0.333,
|
75
|
+
)
|
76
|
+
|
77
|
+
return model
|
78
|
+
|
79
|
+
|
80
|
+
def input_fn(request_body, content_type=CONTENT_TYPE):
|
81
|
+
input_data = "あいうえお"
|
82
|
+
return input_data
|
83
|
+
|
84
|
+
|
85
|
+
def predict_fn(input_data, model):
|
86
|
+
import torch
|
87
|
+
import os
|
88
|
+
import numpy as np
|
89
|
+
|
90
|
+
x = "デモテキスト"
|
91
|
+
|
92
|
+
# model, train_args = TTSTask.build_model_from_file(
|
93
|
+
# train_config, model_file, "cuda"
|
94
|
+
# )
|
95
|
+
|
96
|
+
token_id_converter = TokenIDConverter(
|
97
|
+
token_list=model.train_args.token_list,
|
98
|
+
unk_symbol="<unk>",
|
99
|
+
)
|
100
|
+
|
101
|
+
text = x
|
102
|
+
if prosodic:
|
103
|
+
tokens = texp.a2p(x)
|
104
|
+
text_ints = token_id_converter.tokens2ids(tokens)
|
105
|
+
text = np.array(text_ints)
|
106
|
+
else:
|
107
|
+
print("\npyopenjtalk_accent_with_pauseによる解析結果:")
|
108
|
+
print(texp.text2yomi(x), "\n")
|
109
|
+
|
110
|
+
# synthesis
|
111
|
+
with torch.no_grad():
|
112
|
+
start = time.time()
|
113
|
+
data = model(text)
|
114
|
+
wav = data["wav"]
|
115
|
+
# print(text2speech.preprocess_fn("<dummy>",dict(text=x))["text"])
|
116
|
+
rtf = (time.time() - start) / (len(wav) / model.fs)
|
117
|
+
print(f"RTF = {rtf:5f}")
|
118
|
+
|
119
|
+
if not os.path.isdir("generated_wav"):
|
120
|
+
os.makedirs("generated_wav")
|
121
|
+
|
122
|
+
# let us listen to generated samples
|
123
|
+
from IPython.display import display, Audio
|
124
|
+
import numpy as np
|
125
|
+
#display(Audio(wav.view(-1).cpu().numpy(), rate=text2speech.fs))
|
126
|
+
#Audio(wav.view(-1).cpu().numpy(), rate=text2speech.fs)
|
127
|
+
np_wav = wav.view(-1).cpu().numpy()
|
128
|
+
|
129
|
+
fs = 48000
|
130
|
+
print("サンプリングレート", fs, "で出力します。")
|
131
|
+
from scipy.io.wavfile import write
|
132
|
+
samplerate = fs
|
133
|
+
t = np.linspace(0., 1., samplerate)
|
134
|
+
amplitude = np.iinfo(np.int16).max
|
135
|
+
data = amplitude * np_wav/np.max(np.abs(np_wav))
|
136
|
+
write("espnet/egs2/jsut/tts1/generated_wav/"+x +
|
137
|
+
".wav", samplerate, data.astype(np.int16))
|
138
|
+
print("\n\n\n")
|
139
|
+
|
140
|
+
# バケットへの接続
|
141
|
+
s3 = boto3.resource('s3')
|
142
|
+
bucket = s3.Bucket('alterly-source')
|
143
|
+
bucket.upload_file("espnet/egs2/jsut/tts1/generated_wav/" +
|
144
|
+
x + ".wav", "source/"+x+".wav")
|
145
|
+
|
146
|
+
x = "exit"
|
147
|
+
|
148
|
+
|
149
|
+
input_object = input_fn("あいうえお", "text/plain")
|
150
|
+
model = model_fn(model_dir)
|
151
|
+
prediction = predict_fn(input_object, model)
|
152
|
+
```
|
153
|
+
|
29
154
|
|
30
155
|
### 補足情報
|
31
156
|
圧縮前のディレクトリ構造は下記です。
|
2
デフォルトの質問フォーマットの消し忘れがあったため削除
test
CHANGED
File without changes
|
test
CHANGED
@@ -27,7 +27,7 @@
|
|
27
27
|
```
|
28
28
|
|
29
29
|
|
30
|
-
### 補足情報
|
30
|
+
### 補足情報
|
31
31
|
圧縮前のディレクトリ構造は下記です。
|
32
32
|

|
33
33
|
|
1
一部書き損じがあったため修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -9,9 +9,9 @@
|
|
9
9
|
Please specify --force/-f option to overwrite the model archive output file.
|
10
10
|
See -h/--help for more details./.sagemaker/mms/models/model
|
11
11
|
ERROR - %s already exists.
|
12
|
+
|
13
|
+
※以下、デプロイ用コード
|
12
14
|
```
|
13
|
-
###デプロイ時のコード
|
14
|
-
```ここに言語を入力
|
15
15
|
from sagemaker import get_execution_role
|
16
16
|
from sagemaker.pytorch.model import PyTorchModel
|
17
17
|
|