ゼロから作る Deep Learning 2/Seq2Seq
Posted on
7章: RNNによる文章生成
ゼロから作るDeep Learning (2)の読書メモです。6章ではゲートと呼ばれる仕組みを導入することで長期的な依存関係を学習できる LSTM の実装について学びました。7章では前章で実装した言語モデルを利用して RNN による文章生成を行い、時系列データを別の時系列データに変換できる Seq2Seq と呼ばれる手法をみていきます。
参考実装
%sh rm -rf /tmp/deep-learning-from-scratch-2 git clone https://github.com/oreilly-japan/deep-learning-from-scratch-2 /tmp/deep-learning-from-scratch-2
Cloning into '/tmp/deep-learning-from-scratch-2'...
必要なモジュールを入れる
%sh pip3 install numpy matplotlib
Requirement already satisfied: numpy in /usr/lib64/python3.6/dist-packages Requirement already satisfied: matplotlib in /usr/lib64/python3.6/dist-packages Requirement already satisfied: pytz in /usr/lib/python3.6/dist-packages (from matplotlib) Requirement already satisfied: six>=1.10 in /usr/lib/python3.6/dist-packages (from matplotlib) Requirement already satisfied: cycler>=0.10 in /usr/lib/python3.6/dist-packages (from matplotlib) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/lib64/python3.6/dist-packages (from matplotlib) Requirement already satisfied: python-dateutil>=2.1 in /usr/lib/python3.6/dist-packages (from matplotlib) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/lib/python3.6/dist-packages (from matplotlib) Requirement already satisfied: setuptools in /usr/lib/python3.6/dist-packages (from kiwisolver>=1.0.1->matplotlib) You are using pip version 9.0.3, however version 19.0.1 is available. You should consider upgrading via the 'pip install --upgrade pip' command.
7.1: 言語モデルを使った文章生成
- 前章で実装した言語モデルを使って文章を生成する
%python3 import sys sys.path.append('/tmp/deep-learning-from-scratch-2')
LSTMレイヤ
%python3 from common.functions import sigmoid class LSTM: def __init__(self, Wx, Wh, b): self.params = [Wx, Wh, b] self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)] self.cache = None def forward(self, x, h_prev, c_prev): Wx, Wh, b = self.params N, H = h_prev.shape A = np.dot(x, Wx) + np.dot(h_prev, Wh) + b f = A[:, :H] g = A[:, H:2*H] i = A[:, 2*H:3*H] o = A[:, 3*H:] f = sigmoid(f) g = np.tanh(g) i = sigmoid(i) o = sigmoid(o) c_next = f * c_prev + g * i h_next = o * np.tanh(c_next) self.cache = (x, h_prev, c_prev, i, f, g, o, c_next) return h_next, c_next def backward(self, dh_next, dc_next): Wx, Wh, b = self.params x, h_prev, c_prev, i, f, g, o, c_next = self.cache tanh_c_next = np.tanh(c_next) ds = dc_next + (dh_next * o) * (1 - tanh_c_next ** 2) dc_prev = ds * f di = ds * g df = ds * c_prev do = dh_next * tanh_c_next dg = ds * i di *= i * (1 - i) df *= f * (1 - f) do *= o * (1 - o) dg *= (1 - g ** 2) dA = np.hstack((df, dg, di, do)) dWh = np.dot(h_prev.T, dA) dWx = np.dot(x.T, dA) db = dA.sum(axis=0) self.grads[0][...] = dWx self.grads[1][...] = dWh self.grads[2][...] = db dx = np.dot(dA, Wx.T) dh_prev = np.dot(dA, Wh.T) return dx, dh_prev, dc_prev
%python3 class TimeLSTM: def __init__(self, Wx, Wh, b, stateful=False): self.params = [Wx, Wh, b] self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)] self.layers = None self.h, self.c = None, None self.dh = None self.stateful = stateful def forward(self, xs): Wx, Wh, b = self.params N, T, D = xs.shape H = Wh.shape[0] self.layers = [] hs = np.empty((N, T, H), dtype='f') if not self.stateful or self.h is None: self.h = np.zeros((N, H), dtype='f') if not self.stateful or self.c is None: self.c = np.zeros((N, H), dtype='f') for t in range(T): layer = LSTM(*self.params) self.h, self.c = layer.forward(xs[:, t, :], self.h, self.c) hs[:, t, :] = self.h self.layers.append(layer) return hs def backward(self, dhs): Wx, Wh, b = self.params N, T, H = dhs.shape D = Wx.shape[0] dxs = np.empty((N, T, D), dtype='f') dh, dc = 0, 0 grads = [0, 0, 0] for t in reversed(range(T)): layer = self.layers[t] dx, dh, dc = layer.backward(dhs[:, t, :] + dh, dc) dxs[:, t, :] = dx for i, grad in enumerate(layer.grads): grads[i] += grad for i, grad in enumerate(grads): self.grads[i][...] = grad self.dh = dh return dxs def set_state(self, h, c=None): self.h, self.c = h, c def reset_state(self): self.h, self.c = None, None
Rnnlm
%python3 import pickle from common.time_layers import TimeSoftmaxWithLoss, TimeEmbedding, TimeAffine class Rnnlm: def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.layers = [ TimeEmbedding(embed_W), TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True), TimeAffine(affine_W, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.lstm_layer = self.layers[1] self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, xs): for layer in self.layers: xs = layer.forward(xs) return xs def forward(self, xs, ts): score = self.predict(xs) loss = self.loss_layer.forward(score, ts) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout def reset_state(self): self.lstm_layer.reset_state() def save_params(self, file_name='Rnnlm.pkl'): with open(file_name, 'wb') as f: pickle.dump(self.params, f) def load_params(self, file_name='Rnnlm.pkl'): with open(file_name, 'rb') as f: self.params = pickle.load(f)
BetterRnnlm
Rnnlmとの違い:
- LSTM レイヤの多層化
- Dropout を使用
- 重みを共有
%python3 from common.time_layers import TimeEmbedding, TimeDropout, TimeAffine, TimeSoftmaxWithLoss from common.np import * from common.base_model import BaseModel class BetterRnnlm(BaseModel): def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx1 = (rn(D, 4*H) / np.sqrt(D)).astype('f') lstm_Wh1 = (rn(H, 4*H) / np.sqrt(H)).astype('f') lstm_b1 = np.zeros(4*H).astype('f') lstm_Wx2 = (rn(D, 4*H) / np.sqrt(D)).astype('f') lstm_Wh2 = (rn(H, 4*H) / np.sqrt(H)).astype('f') lstm_b2 = np.zeros(4*H).astype('f') affine_b = np.zeros(V).astype('f') self.layers = [ TimeEmbedding(embed_W), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True), TimeDropout(dropout_ratio), TimeAffine(embed_W.T, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.lstm_layers = [self.layers[2], self.layers[4]] self.drop_layers = [self.layers[1], self.layers[3], self.layers[5]] self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, xs, train_flg=False): for layer in self.drop_layers: layer.train_flg = train_flg for layer in self.layers: xs = layer.forward(xs) return xs def forward(self, xs, ts, train_flg=True): score = self.predict(xs, train_flg) loss = self.loss_layer.forward(score, ts) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout def reset_state(self): for layer in self.lstm_layers: layer.reset_state()
文章生成
- np.random.choice は指定の確率分布に従って適当に選ぶやつ
%python3 import numpy as np from common.functions import softmax class RnnlmGen(Rnnlm): def generate(self, start_id, skip_ids=None, sample_size=100): word_ids = [start_id] x = start_id while len(word_ids) < sample_size: x = np.array(x).reshape(1, 1) score = self.predict(x) p = softmax(score.flatten()) sampled = np.random.choice(len(p), size=1, p=p) if (skip_ids is None) or (sampled not in skip_ids): x = sampled word_ids.append(int(x)) return word_ids
%python3 from dataset import ptb corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) corpus_size = len(corpus) model = RnnlmGen() start_word = 'you' start_id = word_to_id[start_word] skip_words = ['N', '<unk>', '$'] skip_ids = [word_to_id[w] for w in skip_words] word_ids = model.generate(start_id, skip_ids) txt = ' '.join([id_to_word[i] for i in word_ids]) txt = txt.replace(' <eos>', '.\n') print(txt)
you march instrument quickly dual overly t. resident shirts benign attendants takeover-stock accountability guard deregulation recoup mean corners impressed operation negotiated incorrectly reservations ssangyong mandate discretion alliances touched authority cathcart know-how treaty disagreement falls dai-ichi polyethylene multiple diplomat goldsmith airplanes murdoch durkin cool naturally truce setbacks small heating rico crazy scarce confronted circumstances leslie force said prudential arms cholesterol happening surfaced parties security tendered week declaring earthquake intimate visible backing rank seismic hair divisive know-how prints yard whooping worry hills accepting mich. banning free-market 500-stock charity earlier integrity place leads mediator benjamin indicated alleviate kick concert desire conditions budgetary chose
- 上の結果はトレーニングしてないモデルの出力なのですごい適当
%python3 model.load_params('/tmp/deep-learning-from-scratch-2/ch06/Rnnlm.pkl')
%python3 start_word = 'you' start_id = word_to_id[start_word] skip_words = ['N', '<unk>', '$'] skip_ids = [word_to_id[w] for w in skip_words] word_ids = model.generate(start_id, skip_ids) txt = ' '.join([id_to_word[i] for i in word_ids]) txt = txt.replace(' <eos>', '.\n') print(txt)
you serve illinois wiped reportedly mid-1980s substantially f emhart expression macmillan than automatic derivative appetite rhone-poulenc spots following wisconsin 1960s computer-driven ec application massage 's harmful worse announced deliberately mission wars institutional ehrlich chosen exception hut arbitragers stepping compete chris supplying carbide initiatives went feelings overhead customers today accelerated unfilled system iras drinks low licenses judge doctor rural widen look cms hotels products towns hearst grown dominion placing tell privately workstation responsibility formation unpublished cloud makers stick allies jay conspiring co. nih gen-probe confronted bankers unfriendly become magnified calgary enserch apt abortion beauty reruns deal constitution itself rebounded fare jeff
- トレーニング済みの重みを入れてもそんなに変わらず
%sh curl https://www.oreilly.co.jp/pub/9784873118369/BetterRnnlm.pkl > /tmp/BetterRnnlm.pkl
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 0 37.7M 0 236k 0 0 159k 0 0:04:02 0:00:01 0:04:01 159k 6 37.7M 6 2460k 0 0 985k 0 0:00:39 0:00:02 0:00:37 984k 13 37.7M 13 5164k 0 0 1474k 0 0:00:26 0:00:03 0:00:23 1474k 18 37.7M 18 7180k 0 0 1593k 0 0:00:24 0:00:04 0:00:20 1593k 24 37.7M 24 9372k 0 0 1700k 0 0:00:22 0:00:05 0:00:17 1860k 30 37.7M 30 11.3M 0 0 1789k 0 0:00:21 0:00:06 0:00:15 2271k 36 37.7M 36 13.6M 0 0 1860k 0 0:00:20 0:00:07 0:00:13 2296k 42 37.7M 42 15.9M 0 0 1913k 0 0:00:20 0:00:08 0:00:12 2219k 48 37.7M 48 18.1M 0 0 1958k 0 0:00:19 0:00:09 0:00:10 2288k 54 37.7M 54 20.4M 0 0 1993k 0 0:00:19 0:00:10 0:00:09 2317k 60 37.7M 60 22.7M 0 0 2026k 0 0:00:19 0:00:11 0:00:08 2336k 66 37.7M 66 25.1M 0 0 2062k 0 0:00:18 0:00:12 0:00:06 2366k 73 37.7M 73 27.7M 0 0 2110k 0 0:00:18 0:00:13 0:00:05 2450k 81 37.7M 81 30.6M 0 0 2165k 0 0:00:17 0:00:14 0:00:03 2559k 88 37.7M 88 33.3M 0 0 2207k 0 0:00:17 0:00:15 0:00:02 2659k 95 37.7M 95 35.9M 0 0 2233k 0 0:00:17 0:00:16 0:00:01 2710k 100 37.7M 100 37.7M 0 0 2240k 0 0:00:17 0:00:17 --:--:-- 2710k
%python3 import numpy as np from common.functions import softmax class BetterRnnlmGen(BetterRnnlm): def generate(self, start_id, skip_ids=None, sample_size=100): word_ids = [start_id] x = start_id while len(word_ids) < sample_size: x = np.array(x).reshape(1, 1) score = self.predict(x) p = softmax(score.flatten()) sampled = np.random.choice(len(p), size=1, p=p) if (skip_ids is None) or (sampled not in skip_ids): x = sampled word_ids.append(int(x)) return word_ids
%python3 model = BetterRnnlmGen() model.load_params('/tmp/BetterRnnlm.pkl') start_word = 'you' start_id = word_to_id[start_word] skip_words = ['N', '<unk>', '$'] skip_ids = [word_to_id[w] for w in skip_words] word_ids = model.generate(start_id, skip_ids) txt = ' '.join([id_to_word[i] for i in word_ids]) txt = txt.replace(' <eos>', '.\n') print(txt)
you want to raise their eggs. the gradual results exceeding the market have lent an unprecedented number of plants held during the past five years at the top rate of economists and energy casualty sales april. the bank also said output of unfilled orders have hit a surge in expenses of business as an increase of sales by the navy 's and prepared assets. eight business projects went across the area ahead the new package cited texas continental corp. 's chairman frederick a. robinson and nl. these days drexel 's clients are a brand attitude to
- BetterRnnlmの方はだいぶ英語っぽい感じになっている
7.2: seq2seq
足し算を文字のリストとみて結果への変換をやる
使用するデータセット
%sh cat /tmp/deep-learning-from-scratch-2/dataset/addition.txt | head
16+75 _91 52+607 _659 75+22 _97 63+22 _85 795+3 _798 706+796_1502 8+4 _12 84+317 _401 9+3 _12 6+2 _8
%python3 from dataset import sequence (x_train, t_train), (x_test, t_test) = \ sequence.load_data('addition.txt', seed=1984) char_to_id, id_to_char = sequence.get_vocab() print(x_train.shape, t_train.shape) print(x_test.shape, t_test.shape) print(x_train[0]) print(t_train[0]) print(''.join([id_to_char[c] for c in x_train[0]])) print(''.join([id_to_char[c] for c in t_train[0]]))
(45000, 7) (45000, 5) (5000, 7) (5000, 5) [ 3 0 2 0 0 11 5] [ 6 0 11 7 5] 71+118 _189
Encoderの実装
%python3 class Encoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) self.params = self.embed.params + self.lstm.params self.grads = self.embed.grads + self.lstm.grads self.hs = None def forward(self, xs): xs = self.embed.forward(xs) hs = self.lstm.forward(xs) self.hs = hs return hs[:, -1, :] def backward(self, dh): dhs = np.zeros_like(self.hs) dhs[:, -1, :] = dh dout = self.lstm.backward(dhs) dout = self.embed.backward(dout) return dout
%python3 class Decoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(D)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params, self.grads = [], [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads def forward(self, xs, h): self.lstm.set_state(h) out = self.embed.forward(xs) out = self.lstm.forward(out) score = self.affine.forward(out) return score def backward(self, dscore): dout = self.affine.backward(dscore) dout = self.lstm.backward(dout) dout = self.embed.backward(dout) dh = self.lstm.dh return dh def generate(self, h, start_id, sample_size): sampled = [] sample_id = start_id self.lstm.set_state(h) for _ in range(sample_size): x = np.array(sample_id).reshape((1, 1)) out = self.embed.forward(x) out = self.lstm.forward(out) score = self.affine.forward(out) sample_id = np.argmax(score.flatten()) sampled.append(int(sample_id)) return sampled
%python3 from common.base_model import BaseModel class Seq2Seq(BaseModel): def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size self.encoder = Encoder(V, D, H) self.decoder = Decoder(V, D, H) self.softmax = TimeSoftmaxWithLoss() self.params = self.encoder.params + self.decoder.params self.grads = self.encoder.grads + self.decoder.grads def forward(self, xs, ts): decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:] h = self.encoder.forward(xs) score = self.decoder.forward(decoder_xs, h) loss = self.softmax.forward(score, decoder_ts) return loss def backward(self, dout=1): dout = self.softmax.backward(dout) dh = self.decoder.backward(dout) dout = self.encoder.backward(dh) return dout def generate(self, xs, start_id, sample_size): h = self.encoder.forward(xs) sampled = self.decoder.generate(h, start_id, sample_size) return sampled
Seq2Seqモデルのトレーニング
%python3 import numpy as np import matplotlib.pyplot as plt from dataset import sequence from common.optimizer import Adam from common.trainer import Trainer from common.util import eval_seq2seq (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 model = Seq2Seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100))
| epoch 1 | iter 1 / 351 | time 0[s] | loss 2.56 | epoch 1 | iter 21 / 351 | time 1[s] | loss 2.44 | epoch 1 | iter 41 / 351 | time 2[s] | loss 2.07 | epoch 1 | iter 61 / 351 | time 3[s] | loss 1.93 | epoch 1 | iter 81 / 351 | time 4[s] | loss 1.88 | epoch 1 | iter 101 / 351 | time 5[s] | loss 1.82 | epoch 1 | iter 121 / 351 | time 6[s] | loss 1.80 | epoch 1 | iter 141 / 351 | time 8[s] | loss 1.78 | epoch 1 | iter 161 / 351 | time 9[s] | loss 1.77 | epoch 1 | iter 181 / 351 | time 10[s] | loss 1.76 | epoch 1 | iter 201 / 351 | time 11[s] | loss 1.76 | epoch 1 | iter 221 / 351 | time 12[s] | loss 1.75 | epoch 1 | iter 241 / 351 | time 13[s] | loss 1.74 | epoch 1 | iter 261 / 351 | time 14[s] | loss 1.74 | epoch 1 | iter 281 / 351 | time 16[s] | loss 1.73 | epoch 1 | iter 301 / 351 | time 17[s] | loss 1.72 | epoch 1 | iter 321 / 351 | time 18[s] | loss 1.72 | epoch 1 | iter 341 / 351 | time 19[s] | loss 1.72 Q 77+85 T 162 [91m☒[0m 100 --- Q 975+164 T 1139 [91m☒[0m 1000 --- Q 582+84 T 666 [91m☒[0m 100 --- Q 8+155 T 163 [91m☒[0m 100 --- Q 367+55 T 422 [91m☒[0m 100 --- Q 600+257 T 857 [91m☒[0m 1000 --- Q 761+292 T 1053 [91m☒[0m 1000 --- Q 830+597 T 1427 [91m☒[0m 1000 --- Q 26+838 T 864 [91m☒[0m 100 --- Q 143+93 T 236 [91m☒[0m 200 --- val acc 0.220% | epoch 2 | iter 1 / 351 | time 0[s] | loss 1.72 | epoch 2 | iter 21 / 351 | time 1[s] | loss 1.71 | epoch 2 | iter 41 / 351 | time 2[s] | loss 1.72 | epoch 2 | iter 61 / 351 | time 3[s] | loss 1.70 | epoch 2 | iter 81 / 351 | time 4[s] | loss 1.69 | epoch 2 | iter 101 / 351 | time 5[s] | loss 1.70 | epoch 2 | iter 121 / 351 | time 7[s] | loss 1.69 | epoch 2 | iter 141 / 351 | time 8[s] | loss 1.69 | epoch 2 | iter 161 / 351 | time 9[s] | loss 1.68 | epoch 2 | iter 181 / 351 | time 10[s] | loss 1.68 | epoch 2 | iter 201 / 351 | time 11[s] | loss 1.68 | epoch 2 | iter 221 / 351 | time 12[s] | loss 1.68 | epoch 2 | iter 241 / 351 | time 13[s] | loss 1.66 | epoch 2 | iter 261 / 351 | time 15[s] | loss 1.65 | epoch 2 | iter 281 / 351 | time 16[s] | loss 1.64 | epoch 2 | iter 301 / 351 | time 17[s] | loss 1.63 | epoch 2 | iter 321 / 351 | time 18[s] | loss 1.61 | epoch 2 | iter 341 / 351 | time 19[s] | loss 1.59 Q 77+85 T 162 [91m☒[0m 100 --- Q 975+164 T 1139 [91m☒[0m 1000 --- Q 582+84 T 666 [91m☒[0m 700 --- Q 8+155 T 163 [91m☒[0m 100 --- Q 367+55 T 422 [91m☒[0m 400 --- Q 600+257 T 857 [91m☒[0m 800 --- Q 761+292 T 1053 [91m☒[0m 1000 --- Q 830+597 T 1427 [91m☒[0m 1207 --- Q 26+838 T 864 [91m☒[0m 700 --- Q 143+93 T 236 [91m☒[0m 400 --- val acc 0.220% | epoch 3 | iter 1 / 351 | time 0[s] | loss 1.57 | epoch 3 | iter 21 / 351 | time 1[s] | loss 1.57 | epoch 3 | iter 41 / 351 | time 2[s] | loss 1.55 | epoch 3 | iter 61 / 351 | time 3[s] | loss 1.54 | epoch 3 | iter 81 / 351 | time 4[s] | loss 1.52 | epoch 3 | iter 101 / 351 | time 5[s] | loss 1.51 | epoch 3 | iter 121 / 351 | time 7[s] | loss 1.49 | epoch 3 | iter 141 / 351 | time 8[s] | loss 1.48 | epoch 3 | iter 161 / 351 | time 9[s] | loss 1.46 | epoch 3 | iter 181 / 351 | time 10[s] | loss 1.44 | epoch 3 | iter 201 / 351 | time 11[s] | loss 1.43 | epoch 3 | iter 221 / 351 | time 12[s] | loss 1.41 | epoch 3 | iter 241 / 351 | time 14[s] | loss 1.39 | epoch 3 | iter 261 / 351 | time 15[s] | loss 1.39 | epoch 3 | iter 281 / 351 | time 16[s] | loss 1.37 | epoch 3 | iter 301 / 351 | time 17[s] | loss 1.37 | epoch 3 | iter 321 / 351 | time 18[s] | loss 1.35 | epoch 3 | iter 341 / 351 | time 20[s] | loss 1.34 Q 77+85 T 162 [91m☒[0m 136 --- Q 975+164 T 1139 [91m☒[0m 1169 --- Q 582+84 T 666 [91m☒[0m 668 --- Q 8+155 T 163 [91m☒[0m 128 --- Q 367+55 T 422 [91m☒[0m 446 --- Q 600+257 T 857 [91m☒[0m 839 --- Q 761+292 T 1053 [91m☒[0m 1009 --- Q 830+597 T 1427 [91m☒[0m 1468 --- Q 26+838 T 864 [91m☒[0m 808 --- Q 143+93 T 236 [91m☒[0m 228 --- val acc 0.940% | epoch 4 | iter 1 / 351 | time 0[s] | loss 1.34 | epoch 4 | iter 21 / 351 | time 1[s] | loss 1.33 | epoch 4 | iter 41 / 351 | time 2[s] | loss 1.32 | epoch 4 | iter 61 / 351 | time 3[s] | loss 1.30 | epoch 4 | iter 81 / 351 | time 4[s] | loss 1.30 | epoch 4 | iter 101 / 351 | time 5[s] | loss 1.29 | epoch 4 | iter 121 / 351 | time 7[s] | loss 1.28 | epoch 4 | iter 141 / 351 | time 8[s] | loss 1.27 | epoch 4 | iter 161 / 351 | time 9[s] | loss 1.26 | epoch 4 | iter 181 / 351 | time 10[s] | loss 1.25 | epoch 4 | iter 201 / 351 | time 11[s] | loss 1.25 | epoch 4 | iter 221 / 351 | time 13[s] | loss 1.25 | epoch 4 | iter 241 / 351 | time 14[s] | loss 1.23 | epoch 4 | iter 261 / 351 | time 15[s] | loss 1.22 | epoch 4 | iter 281 / 351 | time 16[s] | loss 1.22 | epoch 4 | iter 301 / 351 | time 17[s] | loss 1.21 | epoch 4 | iter 321 / 351 | time 18[s] | loss 1.20 | epoch 4 | iter 341 / 351 | time 20[s] | loss 1.20 Q 77+85 T 162 [91m☒[0m 156 --- Q 975+164 T 1139 [91m☒[0m 1222 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [91m☒[0m 199 --- Q 367+55 T 422 [91m☒[0m 402 --- Q 600+257 T 857 [91m☒[0m 902 --- Q 761+292 T 1053 [91m☒[0m 1006 --- Q 830+597 T 1427 [91m☒[0m 1525 --- Q 26+838 T 864 [91m☒[0m 826 --- Q 143+93 T 236 [91m☒[0m 205 --- val acc 2.400% | epoch 5 | iter 1 / 351 | time 0[s] | loss 1.19 | epoch 5 | iter 21 / 351 | time 1[s] | loss 1.18 | epoch 5 | iter 41 / 351 | time 2[s] | loss 1.17 | epoch 5 | iter 61 / 351 | time 3[s] | loss 1.16 | epoch 5 | iter 81 / 351 | time 4[s] | loss 1.17 | epoch 5 | iter 101 / 351 | time 5[s] | loss 1.15 | epoch 5 | iter 121 / 351 | time 7[s] | loss 1.15 | epoch 5 | iter 141 / 351 | time 8[s] | loss 1.15 | epoch 5 | iter 161 / 351 | time 9[s] | loss 1.13 | epoch 5 | iter 181 / 351 | time 10[s] | loss 1.13 | epoch 5 | iter 201 / 351 | time 11[s] | loss 1.13 | epoch 5 | iter 221 / 351 | time 12[s] | loss 1.12 | epoch 5 | iter 241 / 351 | time 14[s] | loss 1.12 | epoch 5 | iter 261 / 351 | time 15[s] | loss 1.11 | epoch 5 | iter 281 / 351 | time 16[s] | loss 1.11 | epoch 5 | iter 301 / 351 | time 17[s] | loss 1.10 | epoch 5 | iter 321 / 351 | time 18[s] | loss 1.09 | epoch 5 | iter 341 / 351 | time 20[s] | loss 1.09 Q 77+85 T 162 [91m☒[0m 155 --- Q 975+164 T 1139 [91m☒[0m 1165 --- Q 582+84 T 666 [91m☒[0m 645 --- Q 8+155 T 163 [91m☒[0m 160 --- Q 367+55 T 422 [91m☒[0m 421 --- Q 600+257 T 857 [91m☒[0m 882 --- Q 761+292 T 1053 [91m☒[0m 1015 --- Q 830+597 T 1427 [91m☒[0m 1444 --- Q 26+838 T 864 [91m☒[0m 846 --- Q 143+93 T 236 [91m☒[0m 221 --- val acc 4.360% | epoch 6 | iter 1 / 351 | time 0[s] | loss 1.06 | epoch 6 | iter 21 / 351 | time 1[s] | loss 1.07 | epoch 6 | iter 41 / 351 | time 2[s] | loss 1.07 | epoch 6 | iter 61 / 351 | time 3[s] | loss 1.08 | epoch 6 | iter 81 / 351 | time 4[s] | loss 1.08 | epoch 6 | iter 101 / 351 | time 5[s] | loss 1.08 | epoch 6 | iter 121 / 351 | time 7[s] | loss 1.06 | epoch 6 | iter 141 / 351 | time 8[s] | loss 1.05 | epoch 6 | iter 161 / 351 | time 9[s] | loss 1.05 | epoch 6 | iter 181 / 351 | time 10[s] | loss 1.06 | epoch 6 | iter 201 / 351 | time 11[s] | loss 1.05 | epoch 6 | iter 221 / 351 | time 12[s] | loss 1.05 | epoch 6 | iter 241 / 351 | time 14[s] | loss 1.04 | epoch 6 | iter 261 / 351 | time 15[s] | loss 1.04 | epoch 6 | iter 281 / 351 | time 16[s] | loss 1.04 | epoch 6 | iter 301 / 351 | time 17[s] | loss 1.03 | epoch 6 | iter 321 / 351 | time 18[s] | loss 1.04 | epoch 6 | iter 341 / 351 | time 20[s] | loss 1.01 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [91m☒[0m 1119 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [91m☒[0m 166 --- Q 367+55 T 422 [91m☒[0m 410 --- Q 600+257 T 857 [92m☑[0m 857 --- Q 761+292 T 1053 [91m☒[0m 1009 --- Q 830+597 T 1427 [91m☒[0m 1412 --- Q 26+838 T 864 [91m☒[0m 867 --- Q 143+93 T 236 [91m☒[0m 246 --- val acc 4.400% | epoch 7 | iter 1 / 351 | time 0[s] | loss 1.02 | epoch 7 | iter 21 / 351 | time 1[s] | loss 1.01 | epoch 7 | iter 41 / 351 | time 2[s] | loss 1.00 | epoch 7 | iter 61 / 351 | time 3[s] | loss 1.00 | epoch 7 | iter 81 / 351 | time 4[s] | loss 1.00 | epoch 7 | iter 101 / 351 | time 5[s] | loss 1.00 | epoch 7 | iter 121 / 351 | time 7[s] | loss 1.00 | epoch 7 | iter 141 / 351 | time 8[s] | loss 0.98 | epoch 7 | iter 161 / 351 | time 9[s] | loss 1.00 | epoch 7 | iter 181 / 351 | time 10[s] | loss 0.99 | epoch 7 | iter 201 / 351 | time 11[s] | loss 0.98 | epoch 7 | iter 221 / 351 | time 13[s] | loss 1.00 | epoch 7 | iter 241 / 351 | time 14[s] | loss 1.02 | epoch 7 | iter 261 / 351 | time 15[s] | loss 1.00 | epoch 7 | iter 281 / 351 | time 16[s] | loss 0.97 | epoch 7 | iter 301 / 351 | time 17[s] | loss 0.97 | epoch 7 | iter 321 / 351 | time 18[s] | loss 0.96 | epoch 7 | iter 341 / 351 | time 20[s] | loss 0.96 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [91m☒[0m 1175 --- Q 582+84 T 666 [91m☒[0m 667 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 430 --- Q 600+257 T 857 [91m☒[0m 886 --- Q 761+292 T 1053 [91m☒[0m 1076 --- Q 830+597 T 1427 [91m☒[0m 1444 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 5.100% | epoch 8 | iter 1 / 351 | time 0[s] | loss 1.01 | epoch 8 | iter 21 / 351 | time 1[s] | loss 0.95 | epoch 8 | iter 41 / 351 | time 2[s] | loss 0.96 | epoch 8 | iter 61 / 351 | time 3[s] | loss 0.95 | epoch 8 | iter 81 / 351 | time 4[s] | loss 0.95 | epoch 8 | iter 101 / 351 | time 5[s] | loss 0.96 | epoch 8 | iter 121 / 351 | time 7[s] | loss 0.95 | epoch 8 | iter 141 / 351 | time 8[s] | loss 0.95 | epoch 8 | iter 161 / 351 | time 9[s] | loss 0.95 | epoch 8 | iter 181 / 351 | time 10[s] | loss 0.94 | epoch 8 | iter 201 / 351 | time 11[s] | loss 0.93 | epoch 8 | iter 221 / 351 | time 13[s] | loss 0.93 | epoch 8 | iter 241 / 351 | time 14[s] | loss 0.93 | epoch 8 | iter 261 / 351 | time 15[s] | loss 0.95 | epoch 8 | iter 281 / 351 | time 16[s] | loss 0.94 | epoch 8 | iter 301 / 351 | time 18[s] | loss 0.92 | epoch 8 | iter 321 / 351 | time 19[s] | loss 0.92 | epoch 8 | iter 341 / 351 | time 20[s] | loss 0.92 Q 77+85 T 162 [91m☒[0m 160 --- Q 975+164 T 1139 [91m☒[0m 1130 --- Q 582+84 T 666 [91m☒[0m 668 --- Q 8+155 T 163 [91m☒[0m 158 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 858 --- Q 761+292 T 1053 [91m☒[0m 1009 --- Q 830+597 T 1427 [91m☒[0m 1431 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 232 --- val acc 5.440% | epoch 9 | iter 1 / 351 | time 0[s] | loss 0.92 | epoch 9 | iter 21 / 351 | time 1[s] | loss 0.91 | epoch 9 | iter 41 / 351 | time 2[s] | loss 0.90 | epoch 9 | iter 61 / 351 | time 3[s] | loss 0.90 | epoch 9 | iter 81 / 351 | time 4[s] | loss 0.89 | epoch 9 | iter 101 / 351 | time 5[s] | loss 0.91 | epoch 9 | iter 121 / 351 | time 7[s] | loss 0.90 | epoch 9 | iter 141 / 351 | time 8[s] | loss 0.89 | epoch 9 | iter 161 / 351 | time 9[s] | loss 0.94 | epoch 9 | iter 181 / 351 | time 10[s] | loss 0.90 | epoch 9 | iter 201 / 351 | time 12[s] | loss 0.90 | epoch 9 | iter 221 / 351 | time 13[s] | loss 0.91 | epoch 9 | iter 241 / 351 | time 14[s] | loss 0.89 | epoch 9 | iter 261 / 351 | time 15[s] | loss 0.90 | epoch 9 | iter 281 / 351 | time 16[s] | loss 0.90 | epoch 9 | iter 301 / 351 | time 17[s] | loss 0.88 | epoch 9 | iter 321 / 351 | time 19[s] | loss 0.87 | epoch 9 | iter 341 / 351 | time 20[s] | loss 0.87 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [92m☑[0m 1139 --- Q 582+84 T 666 [91m☒[0m 667 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 427 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1069 --- Q 830+597 T 1427 [91m☒[0m 1421 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 248 --- val acc 7.680% | epoch 10 | iter 1 / 351 | time 0[s] | loss 0.84 | epoch 10 | iter 21 / 351 | time 1[s] | loss 0.86 | epoch 10 | iter 41 / 351 | time 2[s] | loss 0.87 | epoch 10 | iter 61 / 351 | time 3[s] | loss 0.87 | epoch 10 | iter 81 / 351 | time 4[s] | loss 0.86 | epoch 10 | iter 101 / 351 | time 6[s] | loss 0.86 | epoch 10 | iter 121 / 351 | time 7[s] | loss 0.86 | epoch 10 | iter 141 / 351 | time 8[s] | loss 0.87 | epoch 10 | iter 161 / 351 | time 9[s] | loss 0.85 | epoch 10 | iter 181 / 351 | time 10[s] | loss 0.88 | epoch 10 | iter 201 / 351 | time 11[s] | loss 0.85 | epoch 10 | iter 221 / 351 | time 13[s] | loss 0.86 | epoch 10 | iter 241 / 351 | time 14[s] | loss 0.86 | epoch 10 | iter 261 / 351 | time 15[s] | loss 0.85 | epoch 10 | iter 281 / 351 | time 16[s] | loss 0.85 | epoch 10 | iter 301 / 351 | time 17[s] | loss 0.84 | epoch 10 | iter 321 / 351 | time 18[s] | loss 0.84 | epoch 10 | iter 341 / 351 | time 20[s] | loss 0.84 Q 77+85 T 162 [91m☒[0m 160 --- Q 975+164 T 1139 [91m☒[0m 1130 --- Q 582+84 T 666 [91m☒[0m 663 --- Q 8+155 T 163 [91m☒[0m 165 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1039 --- Q 830+597 T 1427 [91m☒[0m 1409 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 8.840% | epoch 11 | iter 1 / 351 | time 0[s] | loss 0.80 | epoch 11 | iter 21 / 351 | time 1[s] | loss 0.85 | epoch 11 | iter 41 / 351 | time 2[s] | loss 0.83 | epoch 11 | iter 61 / 351 | time 3[s] | loss 0.83 | epoch 11 | iter 81 / 351 | time 4[s] | loss 0.83 | epoch 11 | iter 101 / 351 | time 6[s] | loss 0.82 | epoch 11 | iter 121 / 351 | time 7[s] | loss 0.82 | epoch 11 | iter 141 / 351 | time 8[s] | loss 0.81 | epoch 11 | iter 161 / 351 | time 9[s] | loss 0.81 | epoch 11 | iter 181 / 351 | time 11[s] | loss 0.81 | epoch 11 | iter 201 / 351 | time 12[s] | loss 0.81 | epoch 11 | iter 221 / 351 | time 13[s] | loss 0.82 | epoch 11 | iter 241 / 351 | time 15[s] | loss 0.81 | epoch 11 | iter 261 / 351 | time 16[s] | loss 0.81 | epoch 11 | iter 281 / 351 | time 17[s] | loss 0.82 | epoch 11 | iter 301 / 351 | time 18[s] | loss 0.83 | epoch 11 | iter 321 / 351 | time 19[s] | loss 0.80 | epoch 11 | iter 341 / 351 | time 21[s] | loss 0.81 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [91m☒[0m 1183 --- Q 582+84 T 666 [91m☒[0m 658 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [92m☑[0m 422 --- Q 600+257 T 857 [91m☒[0m 851 --- Q 761+292 T 1053 [91m☒[0m 1073 --- Q 830+597 T 1427 [91m☒[0m 1425 --- Q 26+838 T 864 [91m☒[0m 861 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 8.020% | epoch 12 | iter 1 / 351 | time 0[s] | loss 0.80 | epoch 12 | iter 21 / 351 | time 1[s] | loss 0.79 | epoch 12 | iter 41 / 351 | time 2[s] | loss 0.80 | epoch 12 | iter 61 / 351 | time 3[s] | loss 0.80 | epoch 12 | iter 81 / 351 | time 4[s] | loss 0.79 | epoch 12 | iter 101 / 351 | time 6[s] | loss 0.79 | epoch 12 | iter 121 / 351 | time 7[s] | loss 0.78 | epoch 12 | iter 141 / 351 | time 8[s] | loss 0.79 | epoch 12 | iter 161 / 351 | time 9[s] | loss 0.79 | epoch 12 | iter 181 / 351 | time 10[s] | loss 0.82 | epoch 12 | iter 201 / 351 | time 11[s] | loss 0.79 | epoch 12 | iter 221 / 351 | time 13[s] | loss 0.77 | epoch 12 | iter 241 / 351 | time 14[s] | loss 0.78 | epoch 12 | iter 261 / 351 | time 15[s] | loss 0.78 | epoch 12 | iter 281 / 351 | time 16[s] | loss 0.78 | epoch 12 | iter 301 / 351 | time 17[s] | loss 0.77 | epoch 12 | iter 321 / 351 | time 19[s] | loss 0.78 | epoch 12 | iter 341 / 351 | time 20[s] | loss 0.77 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [91m☒[0m 1129 --- Q 582+84 T 666 [91m☒[0m 669 --- Q 8+155 T 163 [91m☒[0m 166 --- Q 367+55 T 422 [91m☒[0m 423 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1039 --- Q 830+597 T 1427 [91m☒[0m 1421 --- Q 26+838 T 864 [91m☒[0m 867 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 12.200% | epoch 13 | iter 1 / 351 | time 0[s] | loss 0.75 | epoch 13 | iter 21 / 351 | time 1[s] | loss 0.77 | epoch 13 | iter 41 / 351 | time 2[s] | loss 0.75 | epoch 13 | iter 61 / 351 | time 3[s] | loss 0.76 | epoch 13 | iter 81 / 351 | time 4[s] | loss 0.76 | epoch 13 | iter 101 / 351 | time 5[s] | loss 0.76 | epoch 13 | iter 121 / 351 | time 7[s] | loss 0.79 | epoch 13 | iter 141 / 351 | time 8[s] | loss 0.76 | epoch 13 | iter 161 / 351 | time 9[s] | loss 0.75 | epoch 13 | iter 181 / 351 | time 10[s] | loss 0.80 | epoch 13 | iter 201 / 351 | time 11[s] | loss 0.76 | epoch 13 | iter 221 / 351 | time 13[s] | loss 0.76 | epoch 13 | iter 241 / 351 | time 14[s] | loss 0.75 | epoch 13 | iter 261 / 351 | time 15[s] | loss 0.77 | epoch 13 | iter 281 / 351 | time 16[s] | loss 0.75 | epoch 13 | iter 301 / 351 | time 17[s] | loss 0.74 | epoch 13 | iter 321 / 351 | time 18[s] | loss 0.74 | epoch 13 | iter 341 / 351 | time 20[s] | loss 0.73 Q 77+85 T 162 [91m☒[0m 160 --- Q 975+164 T 1139 [92m☑[0m 1139 --- Q 582+84 T 666 [91m☒[0m 664 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 851 --- Q 761+292 T 1053 [91m☒[0m 1063 --- Q 830+597 T 1427 [91m☒[0m 1421 --- Q 26+838 T 864 [91m☒[0m 861 --- Q 143+93 T 236 [91m☒[0m 239 --- val acc 12.460% | epoch 14 | iter 1 / 351 | time 0[s] | loss 0.74 | epoch 14 | iter 21 / 351 | time 1[s] | loss 0.73 | epoch 14 | iter 41 / 351 | time 2[s] | loss 0.73 | epoch 14 | iter 61 / 351 | time 3[s] | loss 0.72 | epoch 14 | iter 81 / 351 | time 4[s] | loss 0.73 | epoch 14 | iter 101 / 351 | time 5[s] | loss 0.74 | epoch 14 | iter 121 / 351 | time 7[s] | loss 0.74 | epoch 14 | iter 141 / 351 | time 8[s] | loss 0.72 | epoch 14 | iter 161 / 351 | time 9[s] | loss 0.72 | epoch 14 | iter 181 / 351 | time 10[s] | loss 0.71 | epoch 14 | iter 201 / 351 | time 11[s] | loss 0.71 | epoch 14 | iter 221 / 351 | time 13[s] | loss 0.73 | epoch 14 | iter 241 / 351 | time 14[s] | loss 0.73 | epoch 14 | iter 261 / 351 | time 15[s] | loss 0.72 | epoch 14 | iter 281 / 351 | time 16[s] | loss 0.71 | epoch 14 | iter 301 / 351 | time 17[s] | loss 0.71 | epoch 14 | iter 321 / 351 | time 18[s] | loss 0.71 | epoch 14 | iter 341 / 351 | time 20[s] | loss 0.70 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1179 --- Q 582+84 T 666 [91m☒[0m 658 --- Q 8+155 T 163 [91m☒[0m 166 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1065 --- Q 830+597 T 1427 [91m☒[0m 1418 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 233 --- val acc 10.060% | epoch 15 | iter 1 / 351 | time 0[s] | loss 0.73 | epoch 15 | iter 21 / 351 | time 1[s] | loss 0.70 | epoch 15 | iter 41 / 351 | time 2[s] | loss 0.71 | epoch 15 | iter 61 / 351 | time 3[s] | loss 0.69 | epoch 15 | iter 81 / 351 | time 4[s] | loss 0.70 | epoch 15 | iter 101 / 351 | time 5[s] | loss 0.69 | epoch 15 | iter 121 / 351 | time 7[s] | loss 0.69 | epoch 15 | iter 141 / 351 | time 8[s] | loss 0.70 | epoch 15 | iter 161 / 351 | time 9[s] | loss 0.71 | epoch 15 | iter 181 / 351 | time 10[s] | loss 0.73 | epoch 15 | iter 201 / 351 | time 11[s] | loss 0.73 | epoch 15 | iter 221 / 351 | time 13[s] | loss 0.72 | epoch 15 | iter 241 / 351 | time 14[s] | loss 0.71 | epoch 15 | iter 261 / 351 | time 15[s] | loss 0.69 | epoch 15 | iter 281 / 351 | time 16[s] | loss 0.71 | epoch 15 | iter 301 / 351 | time 17[s] | loss 0.68 | epoch 15 | iter 321 / 351 | time 18[s] | loss 0.69 | epoch 15 | iter 341 / 351 | time 20[s] | loss 0.69 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1130 --- Q 582+84 T 666 [91m☒[0m 668 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 423 --- Q 600+257 T 857 [91m☒[0m 851 --- Q 761+292 T 1053 [91m☒[0m 1062 --- Q 830+597 T 1427 [91m☒[0m 1444 --- Q 26+838 T 864 [91m☒[0m 861 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 14.080% | epoch 16 | iter 1 / 351 | time 0[s] | loss 0.67 | epoch 16 | iter 21 / 351 | time 1[s] | loss 0.68 | epoch 16 | iter 41 / 351 | time 2[s] | loss 0.68 | epoch 16 | iter 61 / 351 | time 3[s] | loss 0.67 | epoch 16 | iter 81 / 351 | time 4[s] | loss 0.67 | epoch 16 | iter 101 / 351 | time 6[s] | loss 0.68 | epoch 16 | iter 121 / 351 | time 7[s] | loss 0.68 | epoch 16 | iter 141 / 351 | time 8[s] | loss 0.67 | epoch 16 | iter 161 / 351 | time 9[s] | loss 0.66 | epoch 16 | iter 181 / 351 | time 10[s] | loss 0.67 | epoch 16 | iter 201 / 351 | time 11[s] | loss 0.66 | epoch 16 | iter 221 / 351 | time 13[s] | loss 0.66 | epoch 16 | iter 241 / 351 | time 14[s] | loss 0.65 | epoch 16 | iter 261 / 351 | time 15[s] | loss 0.66 | epoch 16 | iter 281 / 351 | time 16[s] | loss 0.67 | epoch 16 | iter 301 / 351 | time 17[s] | loss 0.65 | epoch 16 | iter 321 / 351 | time 19[s] | loss 0.65 | epoch 16 | iter 341 / 351 | time 20[s] | loss 0.64 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1129 --- Q 582+84 T 666 [91m☒[0m 669 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 850 --- Q 761+292 T 1053 [91m☒[0m 1044 --- Q 830+597 T 1427 [91m☒[0m 1418 --- Q 26+838 T 864 [91m☒[0m 861 --- Q 143+93 T 236 [91m☒[0m 237 --- val acc 15.680% | epoch 17 | iter 1 / 351 | time 0[s] | loss 0.65 | epoch 17 | iter 21 / 351 | time 1[s] | loss 0.64 | epoch 17 | iter 41 / 351 | time 2[s] | loss 0.70 | epoch 17 | iter 61 / 351 | time 3[s] | loss 0.69 | epoch 17 | iter 81 / 351 | time 4[s] | loss 0.64 | epoch 17 | iter 101 / 351 | time 5[s] | loss 0.63 | epoch 17 | iter 121 / 351 | time 7[s] | loss 0.64 | epoch 17 | iter 141 / 351 | time 8[s] | loss 0.64 | epoch 17 | iter 161 / 351 | time 9[s] | loss 0.65 | epoch 17 | iter 181 / 351 | time 10[s] | loss 0.65 | epoch 17 | iter 201 / 351 | time 11[s] | loss 0.64 | epoch 17 | iter 221 / 351 | time 13[s] | loss 0.64 | epoch 17 | iter 241 / 351 | time 14[s] | loss 0.65 | epoch 17 | iter 261 / 351 | time 15[s] | loss 0.65 | epoch 17 | iter 281 / 351 | time 16[s] | loss 0.64 | epoch 17 | iter 301 / 351 | time 17[s] | loss 0.64 | epoch 17 | iter 321 / 351 | time 19[s] | loss 0.64 | epoch 17 | iter 341 / 351 | time 20[s] | loss 0.66 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1138 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [91m☒[0m 166 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1044 --- Q 830+597 T 1427 [91m☒[0m 1424 --- Q 26+838 T 864 [91m☒[0m 861 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 16.240% | epoch 18 | iter 1 / 351 | time 0[s] | loss 0.62 | epoch 18 | iter 21 / 351 | time 1[s] | loss 0.62 | epoch 18 | iter 41 / 351 | time 2[s] | loss 0.62 | epoch 18 | iter 61 / 351 | time 3[s] | loss 0.62 | epoch 18 | iter 81 / 351 | time 4[s] | loss 0.64 | epoch 18 | iter 101 / 351 | time 6[s] | loss 0.64 | epoch 18 | iter 121 / 351 | time 7[s] | loss 0.61 | epoch 18 | iter 141 / 351 | time 8[s] | loss 0.65 | epoch 18 | iter 161 / 351 | time 9[s] | loss 0.65 | epoch 18 | iter 181 / 351 | time 10[s] | loss 0.62 | epoch 18 | iter 201 / 351 | time 12[s] | loss 0.61 | epoch 18 | iter 221 / 351 | time 13[s] | loss 0.61 | epoch 18 | iter 241 / 351 | time 14[s] | loss 0.64 | epoch 18 | iter 261 / 351 | time 15[s] | loss 0.61 | epoch 18 | iter 281 / 351 | time 16[s] | loss 0.61 | epoch 18 | iter 301 / 351 | time 17[s] | loss 0.61 | epoch 18 | iter 321 / 351 | time 19[s] | loss 0.61 | epoch 18 | iter 341 / 351 | time 20[s] | loss 0.61 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1143 --- Q 582+84 T 666 [91m☒[0m 661 --- Q 8+155 T 163 [91m☒[0m 162 --- Q 367+55 T 422 [92m☑[0m 422 --- Q 600+257 T 857 [91m☒[0m 851 --- Q 761+292 T 1053 [91m☒[0m 1049 --- Q 830+597 T 1427 [91m☒[0m 1424 --- Q 26+838 T 864 [91m☒[0m 867 --- Q 143+93 T 236 [91m☒[0m 239 --- val acc 16.620% | epoch 19 | iter 1 / 351 | time 0[s] | loss 0.62 | epoch 19 | iter 21 / 351 | time 1[s] | loss 0.63 | epoch 19 | iter 41 / 351 | time 2[s] | loss 0.61 | epoch 19 | iter 61 / 351 | time 3[s] | loss 0.59 | epoch 19 | iter 81 / 351 | time 4[s] | loss 0.60 | epoch 19 | iter 101 / 351 | time 5[s] | loss 0.61 | epoch 19 | iter 121 / 351 | time 7[s] | loss 0.60 | epoch 19 | iter 141 / 351 | time 8[s] | loss 0.59 | epoch 19 | iter 161 / 351 | time 9[s] | loss 0.60 | epoch 19 | iter 181 / 351 | time 10[s] | loss 0.59 | epoch 19 | iter 201 / 351 | time 11[s] | loss 0.58 | epoch 19 | iter 221 / 351 | time 13[s] | loss 0.60 | epoch 19 | iter 241 / 351 | time 14[s] | loss 0.58 | epoch 19 | iter 261 / 351 | time 15[s] | loss 0.59 | epoch 19 | iter 281 / 351 | time 16[s] | loss 0.59 | epoch 19 | iter 301 / 351 | time 17[s] | loss 0.59 | epoch 19 | iter 321 / 351 | time 19[s] | loss 0.59 | epoch 19 | iter 341 / 351 | time 20[s] | loss 0.59 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1129 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 423 --- Q 600+257 T 857 [91m☒[0m 852 --- Q 761+292 T 1053 [92m☑[0m 1053 --- Q 830+597 T 1427 [91m☒[0m 1421 --- Q 26+838 T 864 [91m☒[0m 867 --- Q 143+93 T 236 [91m☒[0m 235 --- val acc 19.760% | epoch 20 | iter 1 / 351 | time 0[s] | loss 0.56 | epoch 20 | iter 21 / 351 | time 1[s] | loss 0.58 | epoch 20 | iter 41 / 351 | time 2[s] | loss 0.59 | epoch 20 | iter 61 / 351 | time 3[s] | loss 0.58 | epoch 20 | iter 81 / 351 | time 5[s] | loss 0.59 | epoch 20 | iter 101 / 351 | time 6[s] | loss 0.57 | epoch 20 | iter 121 / 351 | time 7[s] | loss 0.58 | epoch 20 | iter 141 / 351 | time 8[s] | loss 0.60 | epoch 20 | iter 161 / 351 | time 9[s] | loss 0.63 | epoch 20 | iter 181 / 351 | time 10[s] | loss 0.58 | epoch 20 | iter 201 / 351 | time 12[s] | loss 0.59 | epoch 20 | iter 221 / 351 | time 13[s] | loss 0.58 | epoch 20 | iter 241 / 351 | time 14[s] | loss 0.62 | epoch 20 | iter 261 / 351 | time 15[s] | loss 0.62 | epoch 20 | iter 281 / 351 | time 16[s] | loss 0.61 | epoch 20 | iter 301 / 351 | time 18[s] | loss 0.60 | epoch 20 | iter 321 / 351 | time 19[s] | loss 0.61 | epoch 20 | iter 341 / 351 | time 20[s] | loss 0.56 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1141 --- Q 582+84 T 666 [91m☒[0m 665 --- Q 8+155 T 163 [91m☒[0m 164 --- Q 367+55 T 422 [92m☑[0m 422 --- Q 600+257 T 857 [91m☒[0m 852 --- Q 761+292 T 1053 [92m☑[0m 1053 --- Q 830+597 T 1427 [91m☒[0m 1424 --- Q 26+838 T 864 [91m☒[0m 862 --- Q 143+93 T 236 [91m☒[0m 235 --- val acc 22.020% | epoch 21 | iter 1 / 351 | time 0[s] | loss 0.55 | epoch 21 | iter 21 / 351 | time 1[s] | loss 0.56 | epoch 21 | iter 41 / 351 | time 2[s] | loss 0.56 | epoch 21 | iter 61 / 351 | time 3[s] | loss 0.57 | epoch 21 | iter 81 / 351 | time 4[s] | loss 0.56 | epoch 21 | iter 101 / 351 | time 5[s] | loss 0.56 | epoch 21 | iter 121 / 351 | time 7[s] | loss 0.58 | epoch 21 | iter 141 / 351 | time 8[s] | loss 0.58 | epoch 21 | iter 161 / 351 | time 9[s] | loss 0.55 | epoch 21 | iter 181 / 351 | time 10[s] | loss 0.57 | epoch 21 | iter 201 / 351 | time 11[s] | loss 0.56 | epoch 21 | iter 221 / 351 | time 13[s] | loss 0.56 | epoch 21 | iter 241 / 351 | time 14[s] | loss 0.55 | epoch 21 | iter 261 / 351 | time 15[s] | loss 0.56 | epoch 21 | iter 281 / 351 | time 16[s] | loss 0.58 | epoch 21 | iter 301 / 351 | time 17[s] | loss 0.56 | epoch 21 | iter 321 / 351 | time 18[s] | loss 0.55 | epoch 21 | iter 341 / 351 | time 20[s] | loss 0.57 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1144 --- Q 582+84 T 666 [91m☒[0m 667 --- Q 8+155 T 163 [91m☒[0m 165 --- Q 367+55 T 422 [91m☒[0m 423 --- Q 600+257 T 857 [91m☒[0m 850 --- Q 761+292 T 1053 [91m☒[0m 1055 --- Q 830+597 T 1427 [92m☑[0m 1427 --- Q 26+838 T 864 [91m☒[0m 867 --- Q 143+93 T 236 [91m☒[0m 237 --- val acc 14.560% | epoch 22 | iter 1 / 351 | time 0[s] | loss 0.61 | epoch 22 | iter 21 / 351 | time 1[s] | loss 0.58 | epoch 22 | iter 41 / 351 | time 2[s] | loss 0.57 | epoch 22 | iter 61 / 351 | time 3[s] | loss 0.58 | epoch 22 | iter 81 / 351 | time 4[s] | loss 0.57 | epoch 22 | iter 101 / 351 | time 5[s] | loss 0.58 | epoch 22 | iter 121 / 351 | time 7[s] | loss 0.56 | epoch 22 | iter 141 / 351 | time 8[s] | loss 0.54 | epoch 22 | iter 161 / 351 | time 9[s] | loss 0.55 | epoch 22 | iter 181 / 351 | time 10[s] | loss 0.55 | epoch 22 | iter 201 / 351 | time 11[s] | loss 0.54 | epoch 22 | iter 221 / 351 | time 12[s] | loss 0.53 | epoch 22 | iter 241 / 351 | time 14[s] | loss 0.53 | epoch 22 | iter 261 / 351 | time 15[s] | loss 0.54 | epoch 22 | iter 281 / 351 | time 16[s] | loss 0.54 | epoch 22 | iter 301 / 351 | time 17[s] | loss 0.54 | epoch 22 | iter 321 / 351 | time 18[s] | loss 0.54 | epoch 22 | iter 341 / 351 | time 20[s] | loss 0.54 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1141 --- Q 582+84 T 666 [91m☒[0m 665 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 421 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1050 --- Q 830+597 T 1427 [91m☒[0m 1424 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 235 --- val acc 24.840% | epoch 23 | iter 1 / 351 | time 0[s] | loss 0.52 | epoch 23 | iter 21 / 351 | time 1[s] | loss 0.53 | epoch 23 | iter 41 / 351 | time 2[s] | loss 0.53 | epoch 23 | iter 61 / 351 | time 3[s] | loss 0.57 | epoch 23 | iter 81 / 351 | time 4[s] | loss 0.57 | epoch 23 | iter 101 / 351 | time 5[s] | loss 0.56 | epoch 23 | iter 121 / 351 | time 7[s] | loss 0.51 | epoch 23 | iter 141 / 351 | time 8[s] | loss 0.53 | epoch 23 | iter 161 / 351 | time 9[s] | loss 0.54 | epoch 23 | iter 181 / 351 | time 10[s] | loss 0.54 | epoch 23 | iter 201 / 351 | time 11[s] | loss 0.53 | epoch 23 | iter 221 / 351 | time 13[s] | loss 0.52 | epoch 23 | iter 241 / 351 | time 14[s] | loss 0.53 | epoch 23 | iter 261 / 351 | time 15[s] | loss 0.55 | epoch 23 | iter 281 / 351 | time 16[s] | loss 0.53 | epoch 23 | iter 301 / 351 | time 17[s] | loss 0.52 | epoch 23 | iter 321 / 351 | time 18[s] | loss 0.52 | epoch 23 | iter 341 / 351 | time 20[s] | loss 0.52 Q 77+85 T 162 [92m☑[0m 162 --- Q 975+164 T 1139 [91m☒[0m 1143 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 420 --- Q 600+257 T 857 [92m☑[0m 857 --- Q 761+292 T 1053 [91m☒[0m 1055 --- Q 830+597 T 1427 [91m☒[0m 1424 --- Q 26+838 T 864 [91m☒[0m 862 --- Q 143+93 T 236 [91m☒[0m 233 --- val acc 25.740% | epoch 24 | iter 1 / 351 | time 0[s] | loss 0.50 | epoch 24 | iter 21 / 351 | time 1[s] | loss 0.51 | epoch 24 | iter 41 / 351 | time 2[s] | loss 0.54 | epoch 24 | iter 61 / 351 | time 3[s] | loss 0.50 | epoch 24 | iter 81 / 351 | time 4[s] | loss 0.51 | epoch 24 | iter 101 / 351 | time 5[s] | loss 0.52 | epoch 24 | iter 121 / 351 | time 7[s] | loss 0.53 | epoch 24 | iter 141 / 351 | time 8[s] | loss 0.51 | epoch 24 | iter 161 / 351 | time 9[s] | loss 0.55 | epoch 24 | iter 181 / 351 | time 10[s] | loss 0.52 | epoch 24 | iter 201 / 351 | time 12[s] | loss 0.51 | epoch 24 | iter 221 / 351 | time 13[s] | loss 0.51 | epoch 24 | iter 241 / 351 | time 14[s] | loss 0.52 | epoch 24 | iter 261 / 351 | time 15[s] | loss 0.52 | epoch 24 | iter 281 / 351 | time 17[s] | loss 0.52 | epoch 24 | iter 301 / 351 | time 18[s] | loss 0.51 | epoch 24 | iter 321 / 351 | time 19[s] | loss 0.51 | epoch 24 | iter 341 / 351 | time 20[s] | loss 0.50 Q 77+85 T 162 [91m☒[0m 165 --- Q 975+164 T 1139 [91m☒[0m 1140 --- Q 582+84 T 666 [91m☒[0m 669 --- Q 8+155 T 163 [92m☑[0m 163 --- Q 367+55 T 422 [91m☒[0m 423 --- Q 600+257 T 857 [92m☑[0m 857 --- Q 761+292 T 1053 [91m☒[0m 1055 --- Q 830+597 T 1427 [92m☑[0m 1427 --- Q 26+838 T 864 [91m☒[0m 865 --- Q 143+93 T 236 [91m☒[0m 235 --- val acc 25.760% | epoch 25 | iter 1 / 351 | time 0[s] | loss 0.49 | epoch 25 | iter 21 / 351 | time 1[s] | loss 0.48 | epoch 25 | iter 41 / 351 | time 2[s] | loss 0.49 | epoch 25 | iter 61 / 351 | time 3[s] | loss 0.49 | epoch 25 | iter 81 / 351 | time 4[s] | loss 0.49 | epoch 25 | iter 101 / 351 | time 6[s] | loss 0.49 | epoch 25 | iter 121 / 351 | time 7[s] | loss 0.50 | epoch 25 | iter 141 / 351 | time 8[s] | loss 0.52 | epoch 25 | iter 161 / 351 | time 9[s] | loss 0.49 | epoch 25 | iter 181 / 351 | time 10[s] | loss 0.49 | epoch 25 | iter 201 / 351 | time 11[s] | loss 0.50 | epoch 25 | iter 221 / 351 | time 13[s] | loss 0.52 | epoch 25 | iter 241 / 351 | time 14[s] | loss 0.55 | epoch 25 | iter 261 / 351 | time 15[s] | loss 0.53 | epoch 25 | iter 281 / 351 | time 16[s] | loss 0.53 | epoch 25 | iter 301 / 351 | time 17[s] | loss 0.53 | epoch 25 | iter 321 / 351 | time 18[s] | loss 0.53 | epoch 25 | iter 341 / 351 | time 20[s] | loss 0.53 Q 77+85 T 162 [91m☒[0m 161 --- Q 975+164 T 1139 [91m☒[0m 1141 --- Q 582+84 T 666 [92m☑[0m 666 --- Q 8+155 T 163 [91m☒[0m 164 --- Q 367+55 T 422 [92m☑[0m 422 --- Q 600+257 T 857 [91m☒[0m 859 --- Q 761+292 T 1053 [91m☒[0m 1055 --- Q 830+597 T 1427 [91m☒[0m 1425 --- Q 26+838 T 864 [91m☒[0m 862 --- Q 143+93 T 236 [91m☒[0m 238 --- val acc 28.500%
- 最初は100とか1200とかしか答えられないのにepochが進むにつれて表現力があがって正解できるようになる(かわいい)
- そしてやはり最後のひと桁が合わない
%python3 plt.ylim(0, 1) plt.plot(acc_list)
[<matplotlib.lines.Line2D object at 0x7fb61fa29c18>]
%python3 acc_list_baseline = acc_list
改善: 入力を反転させる
入力を反転させるだけで精度が上がるらしい
%python3 (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] model = Seq2Seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list_reversed = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list_reversed.append(acc) print('val acc %.3f%%' % (acc * 100))
| epoch 1 | iter 1 / 351 | time 0[s] | loss 2.56 | epoch 1 | iter 21 / 351 | time 1[s] | loss 2.43 | epoch 1 | iter 41 / 351 | time 2[s] | loss 2.07 | epoch 1 | iter 61 / 351 | time 3[s] | loss 1.94 | epoch 1 | iter 81 / 351 | time 4[s] | loss 1.87 | epoch 1 | iter 101 / 351 | time 5[s] | loss 1.81 | epoch 1 | iter 121 / 351 | time 6[s] | loss 1.79 | epoch 1 | iter 141 / 351 | time 8[s] | loss 1.77 | epoch 1 | iter 161 / 351 | time 9[s] | loss 1.76 | epoch 1 | iter 181 / 351 | time 10[s] | loss 1.75 | epoch 1 | iter 201 / 351 | time 11[s] | loss 1.75 | epoch 1 | iter 221 / 351 | time 12[s] | loss 1.74 | epoch 1 | iter 241 / 351 | time 13[s] | loss 1.73 | epoch 1 | iter 261 / 351 | time 14[s] | loss 1.72 | epoch 1 | iter 281 / 351 | time 16[s] | loss 1.71 | epoch 1 | iter 301 / 351 | time 17[s] | loss 1.71 | epoch 1 | iter 321 / 351 | time 18[s] | loss 1.71 | epoch 1 | iter 341 / 351 | time 19[s] | loss 1.70 Q 58+77 T 162 [91m☒[0m 100 --- Q 461+579 T 1139 [91m☒[0m 1000 --- Q 48+285 T 666 [91m☒[0m 700 --- Q 551+8 T 163 [91m☒[0m 101 --- Q 55+763 T 422 [91m☒[0m 700 --- Q 752+006 T 857 [91m☒[0m 1000 --- Q 292+167 T 1053 [91m☒[0m 1000 --- Q 795+038 T 1427 [91m☒[0m 1101 --- Q 838+62 T 864 [91m☒[0m 710 --- Q 39+341 T 236 [91m☒[0m 211 --- val acc 0.360% | epoch 2 | iter 1 / 351 | time 0[s] | loss 1.69 | epoch 2 | iter 21 / 351 | time 1[s] | loss 1.67 | epoch 2 | iter 41 / 351 | time 2[s] | loss 1.68 | epoch 2 | iter 61 / 351 | time 3[s] | loss 1.66 | epoch 2 | iter 81 / 351 | time 4[s] | loss 1.65 | epoch 2 | iter 101 / 351 | time 5[s] | loss 1.64 | epoch 2 | iter 121 / 351 | time 6[s] | loss 1.63 | epoch 2 | iter 141 / 351 | time 8[s] | loss 1.62 | epoch 2 | iter 161 / 351 | time 9[s] | loss 1.61 | epoch 2 | iter 181 / 351 | time 10[s] | loss 1.60 | epoch 2 | iter 201 / 351 | time 11[s] | loss 1.59 | epoch 2 | iter 221 / 351 | time 12[s] | loss 1.58 | epoch 2 | iter 241 / 351 | time 13[s] | loss 1.56 | epoch 2 | iter 261 / 351 | time 14[s] | loss 1.55 | epoch 2 | iter 281 / 351 | time 16[s] | loss 1.54 | epoch 2 | iter 301 / 351 | time 17[s] | loss 1.52 | epoch 2 | iter 321 / 351 | time 18[s] | loss 1.50 | epoch 2 | iter 341 / 351 | time 19[s] | loss 1.48 Q 58+77 T 162 [91m☒[0m 145 --- Q 461+579 T 1139 [91m☒[0m 1004 --- Q 48+285 T 666 [91m☒[0m 544 --- Q 551+8 T 163 [91m☒[0m 124 --- Q 55+763 T 422 [91m☒[0m 300 --- Q 752+006 T 857 [91m☒[0m 800 --- Q 292+167 T 1053 [91m☒[0m 1000 --- Q 795+038 T 1427 [91m☒[0m 1574 --- Q 838+62 T 864 [91m☒[0m 700 --- Q 39+341 T 236 [91m☒[0m 300 --- val acc 0.660% | epoch 3 | iter 1 / 351 | time 0[s] | loss 1.45 | epoch 3 | iter 21 / 351 | time 1[s] | loss 1.45 | epoch 3 | iter 41 / 351 | time 2[s] | loss 1.43 | epoch 3 | iter 61 / 351 | time 3[s] | loss 1.41 | epoch 3 | iter 81 / 351 | time 4[s] | loss 1.39 | epoch 3 | iter 101 / 351 | time 5[s] | loss 1.38 | epoch 3 | iter 121 / 351 | time 7[s] | loss 1.36 | epoch 3 | iter 141 / 351 | time 8[s] | loss 1.35 | epoch 3 | iter 161 / 351 | time 9[s] | loss 1.33 | epoch 3 | iter 181 / 351 | time 10[s] | loss 1.31 | epoch 3 | iter 201 / 351 | time 11[s] | loss 1.30 | epoch 3 | iter 221 / 351 | time 12[s] | loss 1.28 | epoch 3 | iter 241 / 351 | time 14[s] | loss 1.27 | epoch 3 | iter 261 / 351 | time 15[s] | loss 1.26 | epoch 3 | iter 281 / 351 | time 16[s] | loss 1.23 | epoch 3 | iter 301 / 351 | time 17[s] | loss 1.23 | epoch 3 | iter 321 / 351 | time 18[s] | loss 1.21 | epoch 3 | iter 341 / 351 | time 20[s] | loss 1.20 Q 58+77 T 162 [91m☒[0m 158 --- Q 461+579 T 1139 [91m☒[0m 1148 --- Q 48+285 T 666 [91m☒[0m 664 --- Q 551+8 T 163 [91m☒[0m 164 --- Q 55+763 T 422 [91m☒[0m 408 --- Q 752+006 T 857 [91m☒[0m 878 --- Q 292+167 T 1053 [91m☒[0m 1024 --- Q 795+038 T 1427 [91m☒[0m 1448 --- Q 838+62 T 864 [91m☒[0m 875 --- Q 39+341 T 236 [91m☒[0m 238 --- val acc 3.300% | epoch 4 | iter 1 / 351 | time 0[s] | loss 1.20 | epoch 4 | iter 21 / 351 | time 1[s] | loss 1.17 | epoch 4 | iter 41 / 351 | time 2[s] | loss 1.14 | epoch 4 | iter 61 / 351 | time 3[s] | loss 1.13 | epoch 4 | iter 81 / 351 | time 4[s] | loss 1.12 | epoch 4 | iter 101 / 351 | time 5[s] | loss 1.10 | epoch 4 | iter 121 / 351 | time 7[s] | loss 1.08 | epoch 4 | iter 141 / 351 | time 8[s] | loss 1.07 | epoch 4 | iter 161 / 351 | time 9[s] | loss 1.05 | epoch 4 | iter 181 / 351 | time 10[s] | loss 1.04 | epoch 4 | iter 201 / 351 | time 11[s] | loss 1.02 | epoch 4 | iter 221 / 351 | time 12[s] | loss 1.01 | epoch 4 | iter 241 / 351 | time 14[s] | loss 0.98 | epoch 4 | iter 261 / 351 | time 15[s] | loss 0.98 | epoch 4 | iter 281 / 351 | time 16[s] | loss 0.97 | epoch 4 | iter 301 / 351 | time 17[s] | loss 0.95 | epoch 4 | iter 321 / 351 | time 18[s] | loss 0.94 | epoch 4 | iter 341 / 351 | time 20[s] | loss 0.93 Q 58+77 T 162 [91m☒[0m 158 --- Q 461+579 T 1139 [91m☒[0m 1222 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 156 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 862 --- Q 292+167 T 1053 [91m☒[0m 1153 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 862 --- Q 39+341 T 236 [91m☒[0m 238 --- val acc 7.860% | epoch 5 | iter 1 / 351 | time 0[s] | loss 0.93 | epoch 5 | iter 21 / 351 | time 1[s] | loss 0.90 | epoch 5 | iter 41 / 351 | time 2[s] | loss 0.90 | epoch 5 | iter 61 / 351 | time 3[s] | loss 0.88 | epoch 5 | iter 81 / 351 | time 4[s] | loss 0.88 | epoch 5 | iter 101 / 351 | time 5[s] | loss 0.86 | epoch 5 | iter 121 / 351 | time 7[s] | loss 0.87 | epoch 5 | iter 141 / 351 | time 8[s] | loss 0.86 | epoch 5 | iter 161 / 351 | time 9[s] | loss 0.84 | epoch 5 | iter 181 / 351 | time 10[s] | loss 0.84 | epoch 5 | iter 201 / 351 | time 11[s] | loss 0.83 | epoch 5 | iter 221 / 351 | time 12[s] | loss 0.82 | epoch 5 | iter 241 / 351 | time 14[s] | loss 0.81 | epoch 5 | iter 261 / 351 | time 15[s] | loss 0.80 | epoch 5 | iter 281 / 351 | time 16[s] | loss 0.80 | epoch 5 | iter 301 / 351 | time 17[s] | loss 0.79 | epoch 5 | iter 321 / 351 | time 18[s] | loss 0.78 | epoch 5 | iter 341 / 351 | time 19[s] | loss 0.78 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1134 --- Q 48+285 T 666 [91m☒[0m 662 --- Q 551+8 T 163 [91m☒[0m 156 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 855 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [91m☒[0m 1425 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [91m☒[0m 231 --- val acc 12.480% | epoch 6 | iter 1 / 351 | time 0[s] | loss 0.78 | epoch 6 | iter 21 / 351 | time 1[s] | loss 0.75 | epoch 6 | iter 41 / 351 | time 2[s] | loss 0.75 | epoch 6 | iter 61 / 351 | time 3[s] | loss 0.75 | epoch 6 | iter 81 / 351 | time 4[s] | loss 0.74 | epoch 6 | iter 101 / 351 | time 5[s] | loss 0.74 | epoch 6 | iter 121 / 351 | time 7[s] | loss 0.73 | epoch 6 | iter 141 / 351 | time 8[s] | loss 0.73 | epoch 6 | iter 161 / 351 | time 9[s] | loss 0.72 | epoch 6 | iter 181 / 351 | time 10[s] | loss 0.72 | epoch 6 | iter 201 / 351 | time 11[s] | loss 0.72 | epoch 6 | iter 221 / 351 | time 12[s] | loss 0.72 | epoch 6 | iter 241 / 351 | time 14[s] | loss 0.71 | epoch 6 | iter 261 / 351 | time 15[s] | loss 0.70 | epoch 6 | iter 281 / 351 | time 16[s] | loss 0.69 | epoch 6 | iter 301 / 351 | time 17[s] | loss 0.69 | epoch 6 | iter 321 / 351 | time 18[s] | loss 0.68 | epoch 6 | iter 341 / 351 | time 20[s] | loss 0.68 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1137 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 855 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [91m☒[0m 1426 --- Q 838+62 T 864 [91m☒[0m 861 --- Q 39+341 T 236 [91m☒[0m 239 --- val acc 16.960% | epoch 7 | iter 1 / 351 | time 0[s] | loss 0.65 | epoch 7 | iter 21 / 351 | time 1[s] | loss 0.66 | epoch 7 | iter 41 / 351 | time 2[s] | loss 0.66 | epoch 7 | iter 61 / 351 | time 3[s] | loss 0.65 | epoch 7 | iter 81 / 351 | time 4[s] | loss 0.64 | epoch 7 | iter 101 / 351 | time 5[s] | loss 0.65 | epoch 7 | iter 121 / 351 | time 7[s] | loss 0.65 | epoch 7 | iter 141 / 351 | time 8[s] | loss 0.64 | epoch 7 | iter 161 / 351 | time 9[s] | loss 0.63 | epoch 7 | iter 181 / 351 | time 10[s] | loss 0.63 | epoch 7 | iter 201 / 351 | time 11[s] | loss 0.63 | epoch 7 | iter 221 / 351 | time 13[s] | loss 0.63 | epoch 7 | iter 241 / 351 | time 14[s] | loss 0.63 | epoch 7 | iter 261 / 351 | time 15[s] | loss 0.62 | epoch 7 | iter 281 / 351 | time 16[s] | loss 0.62 | epoch 7 | iter 301 / 351 | time 17[s] | loss 0.61 | epoch 7 | iter 321 / 351 | time 18[s] | loss 0.61 | epoch 7 | iter 341 / 351 | time 20[s] | loss 0.61 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1140 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 420 --- Q 752+006 T 857 [91m☒[0m 855 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 863 --- Q 39+341 T 236 [91m☒[0m 239 --- val acc 16.400% | epoch 8 | iter 1 / 351 | time 0[s] | loss 0.62 | epoch 8 | iter 21 / 351 | time 1[s] | loss 0.60 | epoch 8 | iter 41 / 351 | time 2[s] | loss 0.59 | epoch 8 | iter 61 / 351 | time 3[s] | loss 0.59 | epoch 8 | iter 81 / 351 | time 4[s] | loss 0.59 | epoch 8 | iter 101 / 351 | time 6[s] | loss 0.58 | epoch 8 | iter 121 / 351 | time 7[s] | loss 0.58 | epoch 8 | iter 141 / 351 | time 8[s] | loss 0.58 | epoch 8 | iter 161 / 351 | time 9[s] | loss 0.58 | epoch 8 | iter 181 / 351 | time 10[s] | loss 0.58 | epoch 8 | iter 201 / 351 | time 11[s] | loss 0.57 | epoch 8 | iter 221 / 351 | time 13[s] | loss 0.57 | epoch 8 | iter 241 / 351 | time 14[s] | loss 0.57 | epoch 8 | iter 261 / 351 | time 15[s] | loss 0.57 | epoch 8 | iter 281 / 351 | time 16[s] | loss 0.57 | epoch 8 | iter 301 / 351 | time 17[s] | loss 0.57 | epoch 8 | iter 321 / 351 | time 19[s] | loss 0.55 | epoch 8 | iter 341 / 351 | time 20[s] | loss 0.55 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1134 --- Q 48+285 T 666 [91m☒[0m 668 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 420 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 22.620% | epoch 9 | iter 1 / 351 | time 0[s] | loss 0.53 | epoch 9 | iter 21 / 351 | time 1[s] | loss 0.54 | epoch 9 | iter 41 / 351 | time 2[s] | loss 0.54 | epoch 9 | iter 61 / 351 | time 3[s] | loss 0.55 | epoch 9 | iter 81 / 351 | time 4[s] | loss 0.55 | epoch 9 | iter 101 / 351 | time 6[s] | loss 0.54 | epoch 9 | iter 121 / 351 | time 7[s] | loss 0.54 | epoch 9 | iter 141 / 351 | time 8[s] | loss 0.55 | epoch 9 | iter 161 / 351 | time 9[s] | loss 0.55 | epoch 9 | iter 181 / 351 | time 10[s] | loss 0.54 | epoch 9 | iter 201 / 351 | time 12[s] | loss 0.53 | epoch 9 | iter 221 / 351 | time 13[s] | loss 0.53 | epoch 9 | iter 241 / 351 | time 14[s] | loss 0.53 | epoch 9 | iter 261 / 351 | time 15[s] | loss 0.53 | epoch 9 | iter 281 / 351 | time 16[s] | loss 0.54 | epoch 9 | iter 301 / 351 | time 17[s] | loss 0.54 | epoch 9 | iter 321 / 351 | time 19[s] | loss 0.53 | epoch 9 | iter 341 / 351 | time 20[s] | loss 0.52 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 856 --- Q 292+167 T 1053 [91m☒[0m 1152 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [91m☒[0m 238 --- val acc 24.120% | epoch 10 | iter 1 / 351 | time 0[s] | loss 0.51 | epoch 10 | iter 21 / 351 | time 1[s] | loss 0.52 | epoch 10 | iter 41 / 351 | time 2[s] | loss 0.52 | epoch 10 | iter 61 / 351 | time 3[s] | loss 0.51 | epoch 10 | iter 81 / 351 | time 4[s] | loss 0.50 | epoch 10 | iter 101 / 351 | time 5[s] | loss 0.51 | epoch 10 | iter 121 / 351 | time 7[s] | loss 0.51 | epoch 10 | iter 141 / 351 | time 8[s] | loss 0.51 | epoch 10 | iter 161 / 351 | time 9[s] | loss 0.50 | epoch 10 | iter 181 / 351 | time 10[s] | loss 0.51 | epoch 10 | iter 201 / 351 | time 11[s] | loss 0.51 | epoch 10 | iter 221 / 351 | time 13[s] | loss 0.50 | epoch 10 | iter 241 / 351 | time 14[s] | loss 0.49 | epoch 10 | iter 261 / 351 | time 15[s] | loss 0.49 | epoch 10 | iter 281 / 351 | time 16[s] | loss 0.50 | epoch 10 | iter 301 / 351 | time 17[s] | loss 0.50 | epoch 10 | iter 321 / 351 | time 18[s] | loss 0.49 | epoch 10 | iter 341 / 351 | time 20[s] | loss 0.49 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1137 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 420 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1429 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 29.780% | epoch 11 | iter 1 / 351 | time 0[s] | loss 0.46 | epoch 11 | iter 21 / 351 | time 1[s] | loss 0.47 | epoch 11 | iter 41 / 351 | time 2[s] | loss 0.48 | epoch 11 | iter 61 / 351 | time 3[s] | loss 0.49 | epoch 11 | iter 81 / 351 | time 4[s] | loss 0.48 | epoch 11 | iter 101 / 351 | time 5[s] | loss 0.48 | epoch 11 | iter 121 / 351 | time 7[s] | loss 0.48 | epoch 11 | iter 141 / 351 | time 8[s] | loss 0.48 | epoch 11 | iter 161 / 351 | time 9[s] | loss 0.48 | epoch 11 | iter 181 / 351 | time 10[s] | loss 0.48 | epoch 11 | iter 201 / 351 | time 11[s] | loss 0.48 | epoch 11 | iter 221 / 351 | time 13[s] | loss 0.47 | epoch 11 | iter 241 / 351 | time 14[s] | loss 0.46 | epoch 11 | iter 261 / 351 | time 15[s] | loss 0.47 | epoch 11 | iter 281 / 351 | time 16[s] | loss 0.46 | epoch 11 | iter 301 / 351 | time 17[s] | loss 0.47 | epoch 11 | iter 321 / 351 | time 18[s] | loss 0.46 | epoch 11 | iter 341 / 351 | time 20[s] | loss 0.46 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 420 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 866 --- Q 39+341 T 236 [91m☒[0m 233 --- val acc 26.080% | epoch 12 | iter 1 / 351 | time 0[s] | loss 0.47 | epoch 12 | iter 21 / 351 | time 1[s] | loss 0.46 | epoch 12 | iter 41 / 351 | time 2[s] | loss 0.46 | epoch 12 | iter 61 / 351 | time 3[s] | loss 0.45 | epoch 12 | iter 81 / 351 | time 4[s] | loss 0.45 | epoch 12 | iter 101 / 351 | time 5[s] | loss 0.46 | epoch 12 | iter 121 / 351 | time 7[s] | loss 0.45 | epoch 12 | iter 141 / 351 | time 8[s] | loss 0.45 | epoch 12 | iter 161 / 351 | time 9[s] | loss 0.45 | epoch 12 | iter 181 / 351 | time 10[s] | loss 0.45 | epoch 12 | iter 201 / 351 | time 11[s] | loss 0.45 | epoch 12 | iter 221 / 351 | time 13[s] | loss 0.45 | epoch 12 | iter 241 / 351 | time 14[s] | loss 0.48 | epoch 12 | iter 261 / 351 | time 15[s] | loss 0.47 | epoch 12 | iter 281 / 351 | time 16[s] | loss 0.45 | epoch 12 | iter 301 / 351 | time 17[s] | loss 0.44 | epoch 12 | iter 321 / 351 | time 18[s] | loss 0.43 | epoch 12 | iter 341 / 351 | time 20[s] | loss 0.43 Q 58+77 T 162 [91m☒[0m 161 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 420 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 28.100% | epoch 13 | iter 1 / 351 | time 0[s] | loss 0.46 | epoch 13 | iter 21 / 351 | time 1[s] | loss 0.43 | epoch 13 | iter 41 / 351 | time 2[s] | loss 0.43 | epoch 13 | iter 61 / 351 | time 3[s] | loss 0.43 | epoch 13 | iter 81 / 351 | time 4[s] | loss 0.43 | epoch 13 | iter 101 / 351 | time 5[s] | loss 0.45 | epoch 13 | iter 121 / 351 | time 7[s] | loss 0.44 | epoch 13 | iter 141 / 351 | time 8[s] | loss 0.44 | epoch 13 | iter 161 / 351 | time 9[s] | loss 0.44 | epoch 13 | iter 181 / 351 | time 10[s] | loss 0.43 | epoch 13 | iter 201 / 351 | time 11[s] | loss 0.42 | epoch 13 | iter 221 / 351 | time 13[s] | loss 0.42 | epoch 13 | iter 241 / 351 | time 14[s] | loss 0.42 | epoch 13 | iter 261 / 351 | time 15[s] | loss 0.43 | epoch 13 | iter 281 / 351 | time 16[s] | loss 0.43 | epoch 13 | iter 301 / 351 | time 17[s] | loss 0.43 | epoch 13 | iter 321 / 351 | time 18[s] | loss 0.44 | epoch 13 | iter 341 / 351 | time 20[s] | loss 0.43 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1141 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 160 --- Q 55+763 T 422 [91m☒[0m 424 --- Q 752+006 T 857 [91m☒[0m 859 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1429 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [91m☒[0m 237 --- val acc 33.320% | epoch 14 | iter 1 / 351 | time 0[s] | loss 0.44 | epoch 14 | iter 21 / 351 | time 1[s] | loss 0.42 | epoch 14 | iter 41 / 351 | time 2[s] | loss 0.42 | epoch 14 | iter 61 / 351 | time 3[s] | loss 0.43 | epoch 14 | iter 81 / 351 | time 4[s] | loss 0.43 | epoch 14 | iter 101 / 351 | time 5[s] | loss 0.41 | epoch 14 | iter 121 / 351 | time 7[s] | loss 0.40 | epoch 14 | iter 141 / 351 | time 8[s] | loss 0.41 | epoch 14 | iter 161 / 351 | time 9[s] | loss 0.41 | epoch 14 | iter 181 / 351 | time 10[s] | loss 0.42 | epoch 14 | iter 201 / 351 | time 11[s] | loss 0.44 | epoch 14 | iter 221 / 351 | time 13[s] | loss 0.43 | epoch 14 | iter 241 / 351 | time 14[s] | loss 0.42 | epoch 14 | iter 261 / 351 | time 15[s] | loss 0.41 | epoch 14 | iter 281 / 351 | time 16[s] | loss 0.40 | epoch 14 | iter 301 / 351 | time 17[s] | loss 0.40 | epoch 14 | iter 321 / 351 | time 18[s] | loss 0.40 | epoch 14 | iter 341 / 351 | time 20[s] | loss 0.40 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 856 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [91m☒[0m 1426 --- Q 838+62 T 864 [91m☒[0m 862 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 35.180% | epoch 15 | iter 1 / 351 | time 0[s] | loss 0.40 | epoch 15 | iter 21 / 351 | time 1[s] | loss 0.40 | epoch 15 | iter 41 / 351 | time 2[s] | loss 0.42 | epoch 15 | iter 61 / 351 | time 3[s] | loss 0.41 | epoch 15 | iter 81 / 351 | time 4[s] | loss 0.40 | epoch 15 | iter 101 / 351 | time 5[s] | loss 0.40 | epoch 15 | iter 121 / 351 | time 7[s] | loss 0.39 | epoch 15 | iter 141 / 351 | time 8[s] | loss 0.39 | epoch 15 | iter 161 / 351 | time 9[s] | loss 0.40 | epoch 15 | iter 181 / 351 | time 10[s] | loss 0.41 | epoch 15 | iter 201 / 351 | time 11[s] | loss 0.41 | epoch 15 | iter 221 / 351 | time 13[s] | loss 0.39 | epoch 15 | iter 241 / 351 | time 14[s] | loss 0.39 | epoch 15 | iter 261 / 351 | time 15[s] | loss 0.40 | epoch 15 | iter 281 / 351 | time 16[s] | loss 0.41 | epoch 15 | iter 301 / 351 | time 17[s] | loss 0.39 | epoch 15 | iter 321 / 351 | time 18[s] | loss 0.39 | epoch 15 | iter 341 / 351 | time 20[s] | loss 0.38 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 164 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 866 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 39.040% | epoch 16 | iter 1 / 351 | time 0[s] | loss 0.39 | epoch 16 | iter 21 / 351 | time 1[s] | loss 0.37 | epoch 16 | iter 41 / 351 | time 2[s] | loss 0.37 | epoch 16 | iter 61 / 351 | time 3[s] | loss 0.38 | epoch 16 | iter 81 / 351 | time 4[s] | loss 0.39 | epoch 16 | iter 101 / 351 | time 5[s] | loss 0.38 | epoch 16 | iter 121 / 351 | time 7[s] | loss 0.38 | epoch 16 | iter 141 / 351 | time 8[s] | loss 0.37 | epoch 16 | iter 161 / 351 | time 9[s] | loss 0.40 | epoch 16 | iter 181 / 351 | time 10[s] | loss 0.39 | epoch 16 | iter 201 / 351 | time 11[s] | loss 0.38 | epoch 16 | iter 221 / 351 | time 12[s] | loss 0.41 | epoch 16 | iter 241 / 351 | time 14[s] | loss 0.41 | epoch 16 | iter 261 / 351 | time 15[s] | loss 0.40 | epoch 16 | iter 281 / 351 | time 16[s] | loss 0.40 | epoch 16 | iter 301 / 351 | time 17[s] | loss 0.38 | epoch 16 | iter 321 / 351 | time 18[s] | loss 0.38 | epoch 16 | iter 341 / 351 | time 19[s] | loss 0.37 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [91m☒[0m 237 --- val acc 43.220% | epoch 17 | iter 1 / 351 | time 0[s] | loss 0.35 | epoch 17 | iter 21 / 351 | time 1[s] | loss 0.36 | epoch 17 | iter 41 / 351 | time 2[s] | loss 0.37 | epoch 17 | iter 61 / 351 | time 3[s] | loss 0.36 | epoch 17 | iter 81 / 351 | time 4[s] | loss 0.36 | epoch 17 | iter 101 / 351 | time 5[s] | loss 0.37 | epoch 17 | iter 121 / 351 | time 7[s] | loss 0.37 | epoch 17 | iter 141 / 351 | time 8[s] | loss 0.37 | epoch 17 | iter 161 / 351 | time 9[s] | loss 0.37 | epoch 17 | iter 181 / 351 | time 10[s] | loss 0.38 | epoch 17 | iter 201 / 351 | time 11[s] | loss 0.38 | epoch 17 | iter 221 / 351 | time 13[s] | loss 0.37 | epoch 17 | iter 241 / 351 | time 14[s] | loss 0.37 | epoch 17 | iter 261 / 351 | time 15[s] | loss 0.37 | epoch 17 | iter 281 / 351 | time 16[s] | loss 0.37 | epoch 17 | iter 301 / 351 | time 17[s] | loss 0.37 | epoch 17 | iter 321 / 351 | time 19[s] | loss 0.37 | epoch 17 | iter 341 / 351 | time 20[s] | loss 0.37 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 164 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 856 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 41.100% | epoch 18 | iter 1 / 351 | time 0[s] | loss 0.36 | epoch 18 | iter 21 / 351 | time 1[s] | loss 0.37 | epoch 18 | iter 41 / 351 | time 2[s] | loss 0.37 | epoch 18 | iter 61 / 351 | time 3[s] | loss 0.37 | epoch 18 | iter 81 / 351 | time 4[s] | loss 0.35 | epoch 18 | iter 101 / 351 | time 5[s] | loss 0.35 | epoch 18 | iter 121 / 351 | time 7[s] | loss 0.37 | epoch 18 | iter 141 / 351 | time 8[s] | loss 0.36 | epoch 18 | iter 161 / 351 | time 9[s] | loss 0.35 | epoch 18 | iter 181 / 351 | time 10[s] | loss 0.36 | epoch 18 | iter 201 / 351 | time 11[s] | loss 0.37 | epoch 18 | iter 221 / 351 | time 13[s] | loss 0.38 | epoch 18 | iter 241 / 351 | time 14[s] | loss 0.38 | epoch 18 | iter 261 / 351 | time 15[s] | loss 0.36 | epoch 18 | iter 281 / 351 | time 16[s] | loss 0.36 | epoch 18 | iter 301 / 351 | time 17[s] | loss 0.36 | epoch 18 | iter 321 / 351 | time 18[s] | loss 0.35 | epoch 18 | iter 341 / 351 | time 20[s] | loss 0.34 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1141 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 38.580% | epoch 19 | iter 1 / 351 | time 0[s] | loss 0.36 | epoch 19 | iter 21 / 351 | time 1[s] | loss 0.35 | epoch 19 | iter 41 / 351 | time 2[s] | loss 0.35 | epoch 19 | iter 61 / 351 | time 3[s] | loss 0.35 | epoch 19 | iter 81 / 351 | time 4[s] | loss 0.35 | epoch 19 | iter 101 / 351 | time 6[s] | loss 0.34 | epoch 19 | iter 121 / 351 | time 7[s] | loss 0.35 | epoch 19 | iter 141 / 351 | time 8[s] | loss 0.35 | epoch 19 | iter 161 / 351 | time 9[s] | loss 0.35 | epoch 19 | iter 181 / 351 | time 10[s] | loss 0.35 | epoch 19 | iter 201 / 351 | time 11[s] | loss 0.34 | epoch 19 | iter 221 / 351 | time 13[s] | loss 0.35 | epoch 19 | iter 241 / 351 | time 14[s] | loss 0.36 | epoch 19 | iter 261 / 351 | time 15[s] | loss 0.37 | epoch 19 | iter 281 / 351 | time 16[s] | loss 0.36 | epoch 19 | iter 301 / 351 | time 17[s] | loss 0.35 | epoch 19 | iter 321 / 351 | time 19[s] | loss 0.35 | epoch 19 | iter 341 / 351 | time 20[s] | loss 0.35 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 164 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [91m☒[0m 862 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 46.720% | epoch 20 | iter 1 / 351 | time 0[s] | loss 0.32 | epoch 20 | iter 21 / 351 | time 1[s] | loss 0.34 | epoch 20 | iter 41 / 351 | time 2[s] | loss 0.36 | epoch 20 | iter 61 / 351 | time 3[s] | loss 0.36 | epoch 20 | iter 81 / 351 | time 4[s] | loss 0.35 | epoch 20 | iter 101 / 351 | time 5[s] | loss 0.35 | epoch 20 | iter 121 / 351 | time 7[s] | loss 0.36 | epoch 20 | iter 141 / 351 | time 8[s] | loss 0.35 | epoch 20 | iter 161 / 351 | time 9[s] | loss 0.34 | epoch 20 | iter 181 / 351 | time 10[s] | loss 0.34 | epoch 20 | iter 201 / 351 | time 11[s] | loss 0.33 | epoch 20 | iter 221 / 351 | time 13[s] | loss 0.33 | epoch 20 | iter 241 / 351 | time 14[s] | loss 0.33 | epoch 20 | iter 261 / 351 | time 15[s] | loss 0.34 | epoch 20 | iter 281 / 351 | time 16[s] | loss 0.34 | epoch 20 | iter 301 / 351 | time 17[s] | loss 0.34 | epoch 20 | iter 321 / 351 | time 18[s] | loss 0.34 | epoch 20 | iter 341 / 351 | time 20[s] | loss 0.34 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [91m☒[0m 1141 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 162 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 856 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 48.480% | epoch 21 | iter 1 / 351 | time 0[s] | loss 0.32 | epoch 21 | iter 21 / 351 | time 1[s] | loss 0.33 | epoch 21 | iter 41 / 351 | time 2[s] | loss 0.34 | epoch 21 | iter 61 / 351 | time 3[s] | loss 0.34 | epoch 21 | iter 81 / 351 | time 4[s] | loss 0.33 | epoch 21 | iter 101 / 351 | time 6[s] | loss 0.33 | epoch 21 | iter 121 / 351 | time 7[s] | loss 0.33 | epoch 21 | iter 141 / 351 | time 8[s] | loss 0.33 | epoch 21 | iter 161 / 351 | time 9[s] | loss 0.33 | epoch 21 | iter 181 / 351 | time 10[s] | loss 0.33 | epoch 21 | iter 201 / 351 | time 11[s] | loss 0.32 | epoch 21 | iter 221 / 351 | time 13[s] | loss 0.33 | epoch 21 | iter 241 / 351 | time 14[s] | loss 0.33 | epoch 21 | iter 261 / 351 | time 15[s] | loss 0.33 | epoch 21 | iter 281 / 351 | time 16[s] | loss 0.32 | epoch 21 | iter 301 / 351 | time 17[s] | loss 0.32 | epoch 21 | iter 321 / 351 | time 19[s] | loss 0.33 | epoch 21 | iter 341 / 351 | time 20[s] | loss 0.33 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1140 --- Q 48+285 T 666 [91m☒[0m 665 --- Q 551+8 T 163 [91m☒[0m 164 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1429 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 44.940% | epoch 22 | iter 1 / 351 | time 0[s] | loss 0.31 | epoch 22 | iter 21 / 351 | time 1[s] | loss 0.32 | epoch 22 | iter 41 / 351 | time 2[s] | loss 0.32 | epoch 22 | iter 61 / 351 | time 3[s] | loss 0.34 | epoch 22 | iter 81 / 351 | time 4[s] | loss 0.32 | epoch 22 | iter 101 / 351 | time 6[s] | loss 0.33 | epoch 22 | iter 121 / 351 | time 7[s] | loss 0.33 | epoch 22 | iter 141 / 351 | time 8[s] | loss 0.34 | epoch 22 | iter 161 / 351 | time 9[s] | loss 0.34 | epoch 22 | iter 181 / 351 | time 10[s] | loss 0.34 | epoch 22 | iter 201 / 351 | time 11[s] | loss 0.32 | epoch 22 | iter 221 / 351 | time 13[s] | loss 0.31 | epoch 22 | iter 241 / 351 | time 14[s] | loss 0.32 | epoch 22 | iter 261 / 351 | time 15[s] | loss 0.31 | epoch 22 | iter 281 / 351 | time 16[s] | loss 0.32 | epoch 22 | iter 301 / 351 | time 17[s] | loss 0.33 | epoch 22 | iter 321 / 351 | time 19[s] | loss 0.33 | epoch 22 | iter 341 / 351 | time 20[s] | loss 0.33 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [91m☒[0m 667 --- Q 551+8 T 163 [91m☒[0m 162 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [91m☒[0m 865 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 44.800% | epoch 23 | iter 1 / 351 | time 0[s] | loss 0.32 | epoch 23 | iter 21 / 351 | time 1[s] | loss 0.31 | epoch 23 | iter 41 / 351 | time 2[s] | loss 0.32 | epoch 23 | iter 61 / 351 | time 3[s] | loss 0.31 | epoch 23 | iter 81 / 351 | time 4[s] | loss 0.31 | epoch 23 | iter 101 / 351 | time 5[s] | loss 0.32 | epoch 23 | iter 121 / 351 | time 7[s] | loss 0.32 | epoch 23 | iter 141 / 351 | time 8[s] | loss 0.33 | epoch 23 | iter 161 / 351 | time 9[s] | loss 0.32 | epoch 23 | iter 181 / 351 | time 10[s] | loss 0.32 | epoch 23 | iter 201 / 351 | time 11[s] | loss 0.33 | epoch 23 | iter 221 / 351 | time 13[s] | loss 0.33 | epoch 23 | iter 241 / 351 | time 14[s] | loss 0.33 | epoch 23 | iter 261 / 351 | time 15[s] | loss 0.32 | epoch 23 | iter 281 / 351 | time 16[s] | loss 0.32 | epoch 23 | iter 301 / 351 | time 17[s] | loss 0.31 | epoch 23 | iter 321 / 351 | time 19[s] | loss 0.31 | epoch 23 | iter 341 / 351 | time 20[s] | loss 0.31 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [91m☒[0m 1140 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 162 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1426 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 41.060% | epoch 24 | iter 1 / 351 | time 0[s] | loss 0.34 | epoch 24 | iter 21 / 351 | time 1[s] | loss 0.32 | epoch 24 | iter 41 / 351 | time 2[s] | loss 0.30 | epoch 24 | iter 61 / 351 | time 3[s] | loss 0.30 | epoch 24 | iter 81 / 351 | time 4[s] | loss 0.30 | epoch 24 | iter 101 / 351 | time 5[s] | loss 0.30 | epoch 24 | iter 121 / 351 | time 7[s] | loss 0.31 | epoch 24 | iter 141 / 351 | time 8[s] | loss 0.32 | epoch 24 | iter 161 / 351 | time 9[s] | loss 0.32 | epoch 24 | iter 181 / 351 | time 10[s] | loss 0.32 | epoch 24 | iter 201 / 351 | time 11[s] | loss 0.31 | epoch 24 | iter 221 / 351 | time 13[s] | loss 0.32 | epoch 24 | iter 241 / 351 | time 14[s] | loss 0.32 | epoch 24 | iter 261 / 351 | time 15[s] | loss 0.31 | epoch 24 | iter 281 / 351 | time 16[s] | loss 0.31 | epoch 24 | iter 301 / 351 | time 17[s] | loss 0.31 | epoch 24 | iter 321 / 351 | time 18[s] | loss 0.30 | epoch 24 | iter 341 / 351 | time 20[s] | loss 0.30 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1140 --- Q 48+285 T 666 [91m☒[0m 665 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 45.180% | epoch 25 | iter 1 / 351 | time 0[s] | loss 0.34 | epoch 25 | iter 21 / 351 | time 1[s] | loss 0.29 | epoch 25 | iter 41 / 351 | time 2[s] | loss 0.30 | epoch 25 | iter 61 / 351 | time 3[s] | loss 0.30 | epoch 25 | iter 81 / 351 | time 4[s] | loss 0.30 | epoch 25 | iter 101 / 351 | time 6[s] | loss 0.29 | epoch 25 | iter 121 / 351 | time 7[s] | loss 0.31 | epoch 25 | iter 141 / 351 | time 8[s] | loss 0.32 | epoch 25 | iter 161 / 351 | time 9[s] | loss 0.32 | epoch 25 | iter 181 / 351 | time 10[s] | loss 0.31 | epoch 25 | iter 201 / 351 | time 11[s] | loss 0.32 | epoch 25 | iter 221 / 351 | time 13[s] | loss 0.30 | epoch 25 | iter 241 / 351 | time 14[s] | loss 0.29 | epoch 25 | iter 261 / 351 | time 15[s] | loss 0.30 | epoch 25 | iter 281 / 351 | time 16[s] | loss 0.30 | epoch 25 | iter 301 / 351 | time 17[s] | loss 0.30 | epoch 25 | iter 321 / 351 | time 19[s] | loss 0.30 | epoch 25 | iter 341 / 351 | time 20[s] | loss 0.30 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [91m☒[0m 1141 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [91m☒[0m 162 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [91m☒[0m 1054 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [91m☒[0m 862 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 51.620%
%python3 plt.ylim(0, 1) plt.plot(acc_list_baseline) plt.plot(acc_list_reversed) plt.legend(labels=['baseline', 'reversed input']) plt.show()
- 入力を反転させるだけで正解率が倍くらい上がる
- 1桁目が正解しにくかったのと関係ありそう
- 学習にかかった時間は12分30秒ほど(CPUのみ)
Peeky
覗き見の実装
%python3 class PeekyDecoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(H+D, 4*H) / np.sqrt(H+D)).astype('f') lstm_Wh = (rn(H, 4*H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4*H).astype('f') affine_W = (rn(H+H, V) / np.sqrt(H+H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params, self.grads = [], [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads self.cache = None def forward(self, xs, h): N, T = xs.shape N, H = h.shape self.lstm.set_state(h) out = self.embed.forward(xs) hs = np.repeat(h, T, axis=0).reshape(N, T, H) out = np.concatenate((hs, out), axis=2) out = self.lstm.forward(out) out = np.concatenate((hs, out), axis=2) score = self.affine.forward(out) self.cache = H return score def backward(self, dscore): H = self.cache dout = self.affine.backward(dscore) dout, dhs0 = dout[:, :, H:], dout[:, :, :H] dout = self.lstm.backward(dout) dembed, dhs1 = dout[:, :, H:], dout[:, :, :H] self.embed.backward(dembed) dhs = dhs0 + dhs1 dh = self.lstm.dh + np.sum(dhs, axis=1) return dh def generate(self, h, start_id, sample_size): sampled = [] char_id = start_id self.lstm.set_state(h) H = h.shape[1] peeky_h = h.reshape(1, 1, H) for _ in range(sample_size): x = np.array([char_id]).reshape((1, 1)) out = self.embed.forward(x) out = np.concatenate((peeky_h, out), axis=2) out = self.lstm.forward(out) out = np.concatenate((peeky_h, out), axis=2) score = self.affine.forward(out) char_id = np.argmax(score.flatten()) sampled.append(char_id) return sampled
%python3 class PeekySeq2Seq(Seq2Seq): def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size self.encoder = Encoder(V, D, H) self.decoder = PeekyDecoder(V, D, H) self.softmax = TimeSoftmaxWithLoss() self.params = self.encoder.params + self.decoder.params self.grads = self.encoder.grads + self.decoder.grads
%python3 (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # model = Seq2Seq(vocab_size, wordvec_size, hidden_size) model = PeekySeq2Seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list_peeky = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list_peeky.append(acc) print('val acc %.3f%%' % (acc * 100))
| epoch 1 | iter 1 / 351 | time 0[s] | loss 2.57 | epoch 1 | iter 21 / 351 | time 1[s] | loss 2.48 | epoch 1 | iter 41 / 351 | time 2[s] | loss 2.20 | epoch 1 | iter 61 / 351 | time 3[s] | loss 1.99 | epoch 1 | iter 81 / 351 | time 4[s] | loss 1.89 | epoch 1 | iter 101 / 351 | time 5[s] | loss 1.82 | epoch 1 | iter 121 / 351 | time 7[s] | loss 1.82 | epoch 1 | iter 141 / 351 | time 8[s] | loss 1.80 | epoch 1 | iter 161 / 351 | time 9[s] | loss 1.79 | epoch 1 | iter 181 / 351 | time 10[s] | loss 1.78 | epoch 1 | iter 201 / 351 | time 11[s] | loss 1.77 | epoch 1 | iter 221 / 351 | time 13[s] | loss 1.76 | epoch 1 | iter 241 / 351 | time 14[s] | loss 1.76 | epoch 1 | iter 261 / 351 | time 15[s] | loss 1.75 | epoch 1 | iter 281 / 351 | time 16[s] | loss 1.74 | epoch 1 | iter 301 / 351 | time 17[s] | loss 1.74 | epoch 1 | iter 321 / 351 | time 19[s] | loss 1.73 | epoch 1 | iter 341 / 351 | time 20[s] | loss 1.73 Q 58+77 T 162 [91m☒[0m 100 --- Q 461+579 T 1139 [91m☒[0m 1013 --- Q 48+285 T 666 [91m☒[0m 102 --- Q 551+8 T 163 [91m☒[0m 100 --- Q 55+763 T 422 [91m☒[0m 1023 --- Q 752+006 T 857 [91m☒[0m 1023 --- Q 292+167 T 1053 [91m☒[0m 1023 --- Q 795+038 T 1427 [91m☒[0m 1111 --- Q 838+62 T 864 [91m☒[0m 102 --- Q 39+341 T 236 [91m☒[0m 102 --- val acc 0.280% | epoch 2 | iter 1 / 351 | time 0[s] | loss 1.71 | epoch 2 | iter 21 / 351 | time 1[s] | loss 1.71 | epoch 2 | iter 41 / 351 | time 2[s] | loss 1.71 | epoch 2 | iter 61 / 351 | time 3[s] | loss 1.71 | epoch 2 | iter 81 / 351 | time 4[s] | loss 1.70 | epoch 2 | iter 101 / 351 | time 6[s] | loss 1.68 | epoch 2 | iter 121 / 351 | time 7[s] | loss 1.69 | epoch 2 | iter 141 / 351 | time 8[s] | loss 1.68 | epoch 2 | iter 161 / 351 | time 9[s] | loss 1.67 | epoch 2 | iter 181 / 351 | time 10[s] | loss 1.67 | epoch 2 | iter 201 / 351 | time 12[s] | loss 1.65 | epoch 2 | iter 221 / 351 | time 13[s] | loss 1.65 | epoch 2 | iter 241 / 351 | time 14[s] | loss 1.65 | epoch 2 | iter 261 / 351 | time 15[s] | loss 1.63 | epoch 2 | iter 281 / 351 | time 17[s] | loss 1.62 | epoch 2 | iter 301 / 351 | time 18[s] | loss 1.61 | epoch 2 | iter 321 / 351 | time 19[s] | loss 1.61 | epoch 2 | iter 341 / 351 | time 20[s] | loss 1.60 Q 58+77 T 162 [91m☒[0m 100 --- Q 461+579 T 1139 [91m☒[0m 1200 --- Q 48+285 T 666 [91m☒[0m 690 --- Q 551+8 T 163 [91m☒[0m 100 --- Q 55+763 T 422 [91m☒[0m 690 --- Q 752+006 T 857 [91m☒[0m 999 --- Q 292+167 T 1053 [91m☒[0m 1029 --- Q 795+038 T 1427 [91m☒[0m 1240 --- Q 838+62 T 864 [91m☒[0m 792 --- Q 39+341 T 236 [91m☒[0m 290 --- val acc 0.400% | epoch 3 | iter 1 / 351 | time 0[s] | loss 1.58 | epoch 3 | iter 21 / 351 | time 1[s] | loss 1.59 | epoch 3 | iter 41 / 351 | time 2[s] | loss 1.58 | epoch 3 | iter 61 / 351 | time 3[s] | loss 1.56 | epoch 3 | iter 81 / 351 | time 5[s] | loss 1.55 | epoch 3 | iter 101 / 351 | time 6[s] | loss 1.53 | epoch 3 | iter 121 / 351 | time 7[s] | loss 1.51 | epoch 3 | iter 141 / 351 | time 8[s] | loss 1.50 | epoch 3 | iter 161 / 351 | time 9[s] | loss 1.49 | epoch 3 | iter 181 / 351 | time 11[s] | loss 1.47 | epoch 3 | iter 201 / 351 | time 12[s] | loss 1.46 | epoch 3 | iter 221 / 351 | time 13[s] | loss 1.43 | epoch 3 | iter 241 / 351 | time 14[s] | loss 1.42 | epoch 3 | iter 261 / 351 | time 16[s] | loss 1.41 | epoch 3 | iter 281 / 351 | time 17[s] | loss 1.39 | epoch 3 | iter 301 / 351 | time 18[s] | loss 1.37 | epoch 3 | iter 321 / 351 | time 19[s] | loss 1.36 | epoch 3 | iter 341 / 351 | time 21[s] | loss 1.35 Q 58+77 T 162 [91m☒[0m 154 --- Q 461+579 T 1139 [91m☒[0m 1033 --- Q 48+285 T 666 [91m☒[0m 644 --- Q 551+8 T 163 [91m☒[0m 161 --- Q 55+763 T 422 [91m☒[0m 433 --- Q 752+006 T 857 [91m☒[0m 818 --- Q 292+167 T 1053 [91m☒[0m 1018 --- Q 795+038 T 1427 [91m☒[0m 1344 --- Q 838+62 T 864 [91m☒[0m 834 --- Q 39+341 T 236 [91m☒[0m 211 --- val acc 1.600% | epoch 4 | iter 1 / 351 | time 0[s] | loss 1.32 | epoch 4 | iter 21 / 351 | time 1[s] | loss 1.32 | epoch 4 | iter 41 / 351 | time 2[s] | loss 1.30 | epoch 4 | iter 61 / 351 | time 3[s] | loss 1.30 | epoch 4 | iter 81 / 351 | time 5[s] | loss 1.28 | epoch 4 | iter 101 / 351 | time 6[s] | loss 1.27 | epoch 4 | iter 121 / 351 | time 7[s] | loss 1.25 | epoch 4 | iter 141 / 351 | time 8[s] | loss 1.24 | epoch 4 | iter 161 / 351 | time 10[s] | loss 1.22 | epoch 4 | iter 181 / 351 | time 11[s] | loss 1.21 | epoch 4 | iter 201 / 351 | time 12[s] | loss 1.20 | epoch 4 | iter 221 / 351 | time 13[s] | loss 1.20 | epoch 4 | iter 241 / 351 | time 15[s] | loss 1.17 | epoch 4 | iter 261 / 351 | time 16[s] | loss 1.16 | epoch 4 | iter 281 / 351 | time 17[s] | loss 1.14 | epoch 4 | iter 301 / 351 | time 18[s] | loss 1.12 | epoch 4 | iter 321 / 351 | time 20[s] | loss 1.11 | epoch 4 | iter 341 / 351 | time 21[s] | loss 1.10 Q 58+77 T 162 [91m☒[0m 158 --- Q 461+579 T 1139 [91m☒[0m 1123 --- Q 48+285 T 666 [91m☒[0m 657 --- Q 551+8 T 163 [91m☒[0m 165 --- Q 55+763 T 422 [91m☒[0m 423 --- Q 752+006 T 857 [91m☒[0m 777 --- Q 292+167 T 1053 [91m☒[0m 1023 --- Q 795+038 T 1427 [91m☒[0m 1388 --- Q 838+62 T 864 [91m☒[0m 887 --- Q 39+341 T 236 [91m☒[0m 223 --- val acc 5.140% | epoch 5 | iter 1 / 351 | time 0[s] | loss 1.08 | epoch 5 | iter 21 / 351 | time 1[s] | loss 1.07 | epoch 5 | iter 41 / 351 | time 2[s] | loss 1.05 | epoch 5 | iter 61 / 351 | time 3[s] | loss 1.04 | epoch 5 | iter 81 / 351 | time 5[s] | loss 1.02 | epoch 5 | iter 101 / 351 | time 6[s] | loss 1.01 | epoch 5 | iter 121 / 351 | time 7[s] | loss 1.00 | epoch 5 | iter 141 / 351 | time 8[s] | loss 0.99 | epoch 5 | iter 161 / 351 | time 10[s] | loss 0.99 | epoch 5 | iter 181 / 351 | time 11[s] | loss 0.96 | epoch 5 | iter 201 / 351 | time 12[s] | loss 0.95 | epoch 5 | iter 221 / 351 | time 13[s] | loss 0.94 | epoch 5 | iter 241 / 351 | time 15[s] | loss 0.92 | epoch 5 | iter 261 / 351 | time 16[s] | loss 0.91 | epoch 5 | iter 281 / 351 | time 17[s] | loss 0.90 | epoch 5 | iter 301 / 351 | time 19[s] | loss 0.89 | epoch 5 | iter 321 / 351 | time 20[s] | loss 0.88 | epoch 5 | iter 341 / 351 | time 21[s] | loss 0.87 Q 58+77 T 162 [91m☒[0m 160 --- Q 461+579 T 1139 [91m☒[0m 1135 --- Q 48+285 T 666 [91m☒[0m 668 --- Q 551+8 T 163 [91m☒[0m 169 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 861 --- Q 292+167 T 1053 [91m☒[0m 1045 --- Q 795+038 T 1427 [91m☒[0m 1324 --- Q 838+62 T 864 [91m☒[0m 861 --- Q 39+341 T 236 [91m☒[0m 239 --- val acc 9.380% | epoch 6 | iter 1 / 351 | time 0[s] | loss 0.90 | epoch 6 | iter 21 / 351 | time 1[s] | loss 0.86 | epoch 6 | iter 41 / 351 | time 2[s] | loss 0.83 | epoch 6 | iter 61 / 351 | time 3[s] | loss 0.84 | epoch 6 | iter 81 / 351 | time 5[s] | loss 0.82 | epoch 6 | iter 101 / 351 | time 6[s] | loss 0.81 | epoch 6 | iter 121 / 351 | time 7[s] | loss 0.80 | epoch 6 | iter 141 / 351 | time 8[s] | loss 0.79 | epoch 6 | iter 161 / 351 | time 10[s] | loss 0.78 | epoch 6 | iter 181 / 351 | time 11[s] | loss 0.77 | epoch 6 | iter 201 / 351 | time 12[s] | loss 0.76 | epoch 6 | iter 221 / 351 | time 14[s] | loss 0.76 | epoch 6 | iter 241 / 351 | time 15[s] | loss 0.74 | epoch 6 | iter 261 / 351 | time 16[s] | loss 0.74 | epoch 6 | iter 281 / 351 | time 17[s] | loss 0.73 | epoch 6 | iter 301 / 351 | time 19[s] | loss 0.72 | epoch 6 | iter 321 / 351 | time 20[s] | loss 0.72 | epoch 6 | iter 341 / 351 | time 21[s] | loss 0.71 Q 58+77 T 162 [91m☒[0m 163 --- Q 461+579 T 1139 [91m☒[0m 1138 --- Q 48+285 T 666 [91m☒[0m 668 --- Q 551+8 T 163 [91m☒[0m 166 --- Q 55+763 T 422 [91m☒[0m 423 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [91m☒[0m 1048 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [91m☒[0m 873 --- Q 39+341 T 236 [91m☒[0m 239 --- val acc 15.040% | epoch 7 | iter 1 / 351 | time 0[s] | loss 0.68 | epoch 7 | iter 21 / 351 | time 1[s] | loss 0.69 | epoch 7 | iter 41 / 351 | time 2[s] | loss 0.67 | epoch 7 | iter 61 / 351 | time 3[s] | loss 0.66 | epoch 7 | iter 81 / 351 | time 5[s] | loss 0.66 | epoch 7 | iter 101 / 351 | time 6[s] | loss 0.65 | epoch 7 | iter 121 / 351 | time 7[s] | loss 0.65 | epoch 7 | iter 141 / 351 | time 8[s] | loss 0.64 | epoch 7 | iter 161 / 351 | time 10[s] | loss 0.63 | epoch 7 | iter 181 / 351 | time 11[s] | loss 0.61 | epoch 7 | iter 201 / 351 | time 12[s] | loss 0.61 | epoch 7 | iter 221 / 351 | time 13[s] | loss 0.60 | epoch 7 | iter 241 / 351 | time 15[s] | loss 0.57 | epoch 7 | iter 261 / 351 | time 16[s] | loss 0.57 | epoch 7 | iter 281 / 351 | time 17[s] | loss 0.57 | epoch 7 | iter 301 / 351 | time 19[s] | loss 0.55 | epoch 7 | iter 321 / 351 | time 20[s] | loss 0.54 | epoch 7 | iter 341 / 351 | time 21[s] | loss 0.53 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [91m☒[0m 665 --- Q 551+8 T 163 [91m☒[0m 156 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 858 --- Q 292+167 T 1053 [91m☒[0m 1052 --- Q 795+038 T 1427 [91m☒[0m 1428 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [91m☒[0m 235 --- val acc 39.100% | epoch 8 | iter 1 / 351 | time 0[s] | loss 0.51 | epoch 8 | iter 21 / 351 | time 1[s] | loss 0.50 | epoch 8 | iter 41 / 351 | time 2[s] | loss 0.49 | epoch 8 | iter 61 / 351 | time 3[s] | loss 0.48 | epoch 8 | iter 81 / 351 | time 5[s] | loss 0.47 | epoch 8 | iter 101 / 351 | time 6[s] | loss 0.46 | epoch 8 | iter 121 / 351 | time 7[s] | loss 0.46 | epoch 8 | iter 141 / 351 | time 8[s] | loss 0.44 | epoch 8 | iter 161 / 351 | time 10[s] | loss 0.41 | epoch 8 | iter 181 / 351 | time 11[s] | loss 0.42 | epoch 8 | iter 201 / 351 | time 12[s] | loss 0.41 | epoch 8 | iter 221 / 351 | time 14[s] | loss 0.40 | epoch 8 | iter 241 / 351 | time 15[s] | loss 0.39 | epoch 8 | iter 261 / 351 | time 16[s] | loss 0.37 | epoch 8 | iter 281 / 351 | time 17[s] | loss 0.36 | epoch 8 | iter 301 / 351 | time 19[s] | loss 0.36 | epoch 8 | iter 321 / 351 | time 20[s] | loss 0.35 | epoch 8 | iter 341 / 351 | time 21[s] | loss 0.34 Q 58+77 T 162 [91m☒[0m 161 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [91m☒[0m 657 --- Q 551+8 T 163 [91m☒[0m 155 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1438 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 65.060% | epoch 9 | iter 1 / 351 | time 0[s] | loss 0.32 | epoch 9 | iter 21 / 351 | time 1[s] | loss 0.31 | epoch 9 | iter 41 / 351 | time 2[s] | loss 0.31 | epoch 9 | iter 61 / 351 | time 3[s] | loss 0.31 | epoch 9 | iter 81 / 351 | time 5[s] | loss 0.29 | epoch 9 | iter 101 / 351 | time 6[s] | loss 0.29 | epoch 9 | iter 121 / 351 | time 7[s] | loss 0.29 | epoch 9 | iter 141 / 351 | time 8[s] | loss 0.27 | epoch 9 | iter 161 / 351 | time 10[s] | loss 0.27 | epoch 9 | iter 181 / 351 | time 11[s] | loss 0.26 | epoch 9 | iter 201 / 351 | time 12[s] | loss 0.25 | epoch 9 | iter 221 / 351 | time 13[s] | loss 0.25 | epoch 9 | iter 241 / 351 | time 15[s] | loss 0.24 | epoch 9 | iter 261 / 351 | time 16[s] | loss 0.24 | epoch 9 | iter 281 / 351 | time 17[s] | loss 0.23 | epoch 9 | iter 301 / 351 | time 19[s] | loss 0.22 | epoch 9 | iter 321 / 351 | time 20[s] | loss 0.22 | epoch 9 | iter 341 / 351 | time 21[s] | loss 0.21 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [91m☒[0m 1140 --- Q 48+285 T 666 [91m☒[0m 657 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 83.280% | epoch 10 | iter 1 / 351 | time 0[s] | loss 0.22 | epoch 10 | iter 21 / 351 | time 1[s] | loss 0.20 | epoch 10 | iter 41 / 351 | time 2[s] | loss 0.20 | epoch 10 | iter 61 / 351 | time 3[s] | loss 0.20 | epoch 10 | iter 81 / 351 | time 5[s] | loss 0.18 | epoch 10 | iter 101 / 351 | time 6[s] | loss 0.17 | epoch 10 | iter 121 / 351 | time 7[s] | loss 0.18 | epoch 10 | iter 141 / 351 | time 9[s] | loss 0.17 | epoch 10 | iter 161 / 351 | time 10[s] | loss 0.17 | epoch 10 | iter 181 / 351 | time 11[s] | loss 0.17 | epoch 10 | iter 201 / 351 | time 12[s] | loss 0.17 | epoch 10 | iter 221 / 351 | time 14[s] | loss 0.16 | epoch 10 | iter 241 / 351 | time 15[s] | loss 0.15 | epoch 10 | iter 261 / 351 | time 16[s] | loss 0.15 | epoch 10 | iter 281 / 351 | time 17[s] | loss 0.15 | epoch 10 | iter 301 / 351 | time 19[s] | loss 0.15 | epoch 10 | iter 321 / 351 | time 20[s] | loss 0.14 | epoch 10 | iter 341 / 351 | time 21[s] | loss 0.14 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [91m☒[0m 656 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 88.400% | epoch 11 | iter 1 / 351 | time 0[s] | loss 0.13 | epoch 11 | iter 21 / 351 | time 1[s] | loss 0.13 | epoch 11 | iter 41 / 351 | time 2[s] | loss 0.13 | epoch 11 | iter 61 / 351 | time 3[s] | loss 0.12 | epoch 11 | iter 81 / 351 | time 5[s] | loss 0.12 | epoch 11 | iter 101 / 351 | time 6[s] | loss 0.12 | epoch 11 | iter 121 / 351 | time 7[s] | loss 0.11 | epoch 11 | iter 141 / 351 | time 9[s] | loss 0.12 | epoch 11 | iter 161 / 351 | time 10[s] | loss 0.11 | epoch 11 | iter 181 / 351 | time 11[s] | loss 0.11 | epoch 11 | iter 201 / 351 | time 12[s] | loss 0.12 | epoch 11 | iter 221 / 351 | time 14[s] | loss 0.11 | epoch 11 | iter 241 / 351 | time 15[s] | loss 0.11 | epoch 11 | iter 261 / 351 | time 16[s] | loss 0.10 | epoch 11 | iter 281 / 351 | time 17[s] | loss 0.10 | epoch 11 | iter 301 / 351 | time 19[s] | loss 0.10 | epoch 11 | iter 321 / 351 | time 20[s] | loss 0.09 | epoch 11 | iter 341 / 351 | time 21[s] | loss 0.09 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 90.940% | epoch 12 | iter 1 / 351 | time 0[s] | loss 0.09 | epoch 12 | iter 21 / 351 | time 1[s] | loss 0.09 | epoch 12 | iter 41 / 351 | time 2[s] | loss 0.09 | epoch 12 | iter 61 / 351 | time 3[s] | loss 0.09 | epoch 12 | iter 81 / 351 | time 5[s] | loss 0.09 | epoch 12 | iter 101 / 351 | time 6[s] | loss 0.08 | epoch 12 | iter 121 / 351 | time 7[s] | loss 0.08 | epoch 12 | iter 141 / 351 | time 8[s] | loss 0.08 | epoch 12 | iter 161 / 351 | time 10[s] | loss 0.08 | epoch 12 | iter 181 / 351 | time 11[s] | loss 0.08 | epoch 12 | iter 201 / 351 | time 12[s] | loss 0.08 | epoch 12 | iter 221 / 351 | time 14[s] | loss 0.09 | epoch 12 | iter 241 / 351 | time 15[s] | loss 0.09 | epoch 12 | iter 261 / 351 | time 16[s] | loss 0.09 | epoch 12 | iter 281 / 351 | time 17[s] | loss 0.08 | epoch 12 | iter 301 / 351 | time 19[s] | loss 0.08 | epoch 12 | iter 321 / 351 | time 20[s] | loss 0.07 | epoch 12 | iter 341 / 351 | time 21[s] | loss 0.08 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 92.220% | epoch 13 | iter 1 / 351 | time 0[s] | loss 0.07 | epoch 13 | iter 21 / 351 | time 1[s] | loss 0.07 | epoch 13 | iter 41 / 351 | time 2[s] | loss 0.07 | epoch 13 | iter 61 / 351 | time 3[s] | loss 0.07 | epoch 13 | iter 81 / 351 | time 5[s] | loss 0.06 | epoch 13 | iter 101 / 351 | time 6[s] | loss 0.06 | epoch 13 | iter 121 / 351 | time 7[s] | loss 0.07 | epoch 13 | iter 141 / 351 | time 8[s] | loss 0.06 | epoch 13 | iter 161 / 351 | time 10[s] | loss 0.06 | epoch 13 | iter 181 / 351 | time 11[s] | loss 0.06 | epoch 13 | iter 201 / 351 | time 12[s] | loss 0.06 | epoch 13 | iter 221 / 351 | time 13[s] | loss 0.06 | epoch 13 | iter 241 / 351 | time 15[s] | loss 0.06 | epoch 13 | iter 261 / 351 | time 16[s] | loss 0.06 | epoch 13 | iter 281 / 351 | time 17[s] | loss 0.06 | epoch 13 | iter 301 / 351 | time 18[s] | loss 0.05 | epoch 13 | iter 321 / 351 | time 20[s] | loss 0.05 | epoch 13 | iter 341 / 351 | time 21[s] | loss 0.06 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 94.420% | epoch 14 | iter 1 / 351 | time 0[s] | loss 0.05 | epoch 14 | iter 21 / 351 | time 1[s] | loss 0.05 | epoch 14 | iter 41 / 351 | time 2[s] | loss 0.05 | epoch 14 | iter 61 / 351 | time 3[s] | loss 0.05 | epoch 14 | iter 81 / 351 | time 5[s] | loss 0.05 | epoch 14 | iter 101 / 351 | time 6[s] | loss 0.05 | epoch 14 | iter 121 / 351 | time 7[s] | loss 0.05 | epoch 14 | iter 141 / 351 | time 8[s] | loss 0.05 | epoch 14 | iter 161 / 351 | time 10[s] | loss 0.05 | epoch 14 | iter 181 / 351 | time 11[s] | loss 0.05 | epoch 14 | iter 201 / 351 | time 12[s] | loss 0.05 | epoch 14 | iter 221 / 351 | time 13[s] | loss 0.06 | epoch 14 | iter 241 / 351 | time 15[s] | loss 0.06 | epoch 14 | iter 261 / 351 | time 16[s] | loss 0.07 | epoch 14 | iter 281 / 351 | time 17[s] | loss 0.06 | epoch 14 | iter 301 / 351 | time 19[s] | loss 0.06 | epoch 14 | iter 321 / 351 | time 20[s] | loss 0.05 | epoch 14 | iter 341 / 351 | time 21[s] | loss 0.05 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 94.340% | epoch 15 | iter 1 / 351 | time 0[s] | loss 0.04 | epoch 15 | iter 21 / 351 | time 1[s] | loss 0.04 | epoch 15 | iter 41 / 351 | time 2[s] | loss 0.04 | epoch 15 | iter 61 / 351 | time 3[s] | loss 0.05 | epoch 15 | iter 81 / 351 | time 5[s] | loss 0.04 | epoch 15 | iter 101 / 351 | time 6[s] | loss 0.05 | epoch 15 | iter 121 / 351 | time 7[s] | loss 0.04 | epoch 15 | iter 141 / 351 | time 8[s] | loss 0.04 | epoch 15 | iter 161 / 351 | time 10[s] | loss 0.04 | epoch 15 | iter 181 / 351 | time 11[s] | loss 0.05 | epoch 15 | iter 201 / 351 | time 12[s] | loss 0.04 | epoch 15 | iter 221 / 351 | time 13[s] | loss 0.04 | epoch 15 | iter 241 / 351 | time 15[s] | loss 0.03 | epoch 15 | iter 261 / 351 | time 16[s] | loss 0.04 | epoch 15 | iter 281 / 351 | time 17[s] | loss 0.04 | epoch 15 | iter 301 / 351 | time 18[s] | loss 0.05 | epoch 15 | iter 321 / 351 | time 20[s] | loss 0.04 | epoch 15 | iter 341 / 351 | time 21[s] | loss 0.04 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 94.760% | epoch 16 | iter 1 / 351 | time 0[s] | loss 0.03 | epoch 16 | iter 21 / 351 | time 1[s] | loss 0.05 | epoch 16 | iter 41 / 351 | time 2[s] | loss 0.06 | epoch 16 | iter 61 / 351 | time 3[s] | loss 0.05 | epoch 16 | iter 81 / 351 | time 5[s] | loss 0.04 | epoch 16 | iter 101 / 351 | time 6[s] | loss 0.04 | epoch 16 | iter 121 / 351 | time 7[s] | loss 0.04 | epoch 16 | iter 141 / 351 | time 8[s] | loss 0.04 | epoch 16 | iter 161 / 351 | time 10[s] | loss 0.04 | epoch 16 | iter 181 / 351 | time 11[s] | loss 0.04 | epoch 16 | iter 201 / 351 | time 12[s] | loss 0.05 | epoch 16 | iter 221 / 351 | time 13[s] | loss 0.05 | epoch 16 | iter 241 / 351 | time 15[s] | loss 0.04 | epoch 16 | iter 261 / 351 | time 16[s] | loss 0.04 | epoch 16 | iter 281 / 351 | time 17[s] | loss 0.03 | epoch 16 | iter 301 / 351 | time 19[s] | loss 0.03 | epoch 16 | iter 321 / 351 | time 20[s] | loss 0.04 | epoch 16 | iter 341 / 351 | time 21[s] | loss 0.04 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 96.080% | epoch 17 | iter 1 / 351 | time 0[s] | loss 0.04 | epoch 17 | iter 21 / 351 | time 1[s] | loss 0.03 | epoch 17 | iter 41 / 351 | time 2[s] | loss 0.03 | epoch 17 | iter 61 / 351 | time 3[s] | loss 0.03 | epoch 17 | iter 81 / 351 | time 5[s] | loss 0.03 | epoch 17 | iter 101 / 351 | time 6[s] | loss 0.03 | epoch 17 | iter 121 / 351 | time 7[s] | loss 0.02 | epoch 17 | iter 141 / 351 | time 8[s] | loss 0.02 | epoch 17 | iter 161 / 351 | time 10[s] | loss 0.03 | epoch 17 | iter 181 / 351 | time 11[s] | loss 0.03 | epoch 17 | iter 201 / 351 | time 12[s] | loss 0.03 | epoch 17 | iter 221 / 351 | time 13[s] | loss 0.03 | epoch 17 | iter 241 / 351 | time 15[s] | loss 0.03 | epoch 17 | iter 261 / 351 | time 16[s] | loss 0.03 | epoch 17 | iter 281 / 351 | time 17[s] | loss 0.03 | epoch 17 | iter 301 / 351 | time 18[s] | loss 0.03 | epoch 17 | iter 321 / 351 | time 20[s] | loss 0.04 | epoch 17 | iter 341 / 351 | time 21[s] | loss 0.05 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [91m☒[0m 856 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 91.420% | epoch 18 | iter 1 / 351 | time 0[s] | loss 0.06 | epoch 18 | iter 21 / 351 | time 1[s] | loss 0.05 | epoch 18 | iter 41 / 351 | time 2[s] | loss 0.05 | epoch 18 | iter 61 / 351 | time 3[s] | loss 0.05 | epoch 18 | iter 81 / 351 | time 5[s] | loss 0.05 | epoch 18 | iter 101 / 351 | time 6[s] | loss 0.04 | epoch 18 | iter 121 / 351 | time 7[s] | loss 0.03 | epoch 18 | iter 141 / 351 | time 8[s] | loss 0.03 | epoch 18 | iter 161 / 351 | time 10[s] | loss 0.03 | epoch 18 | iter 181 / 351 | time 11[s] | loss 0.02 | epoch 18 | iter 201 / 351 | time 12[s] | loss 0.02 | epoch 18 | iter 221 / 351 | time 13[s] | loss 0.02 | epoch 18 | iter 241 / 351 | time 15[s] | loss 0.02 | epoch 18 | iter 261 / 351 | time 16[s] | loss 0.02 | epoch 18 | iter 281 / 351 | time 17[s] | loss 0.02 | epoch 18 | iter 301 / 351 | time 19[s] | loss 0.02 | epoch 18 | iter 321 / 351 | time 20[s] | loss 0.02 | epoch 18 | iter 341 / 351 | time 21[s] | loss 0.02 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 98.320% | epoch 19 | iter 1 / 351 | time 0[s] | loss 0.01 | epoch 19 | iter 21 / 351 | time 1[s] | loss 0.02 | epoch 19 | iter 41 / 351 | time 2[s] | loss 0.02 | epoch 19 | iter 61 / 351 | time 3[s] | loss 0.02 | epoch 19 | iter 81 / 351 | time 5[s] | loss 0.02 | epoch 19 | iter 101 / 351 | time 6[s] | loss 0.02 | epoch 19 | iter 121 / 351 | time 7[s] | loss 0.03 | epoch 19 | iter 141 / 351 | time 8[s] | loss 0.03 | epoch 19 | iter 161 / 351 | time 10[s] | loss 0.03 | epoch 19 | iter 181 / 351 | time 11[s] | loss 0.04 | epoch 19 | iter 201 / 351 | time 12[s] | loss 0.04 | epoch 19 | iter 221 / 351 | time 13[s] | loss 0.03 | epoch 19 | iter 241 / 351 | time 15[s] | loss 0.03 | epoch 19 | iter 261 / 351 | time 16[s] | loss 0.03 | epoch 19 | iter 281 / 351 | time 17[s] | loss 0.03 | epoch 19 | iter 301 / 351 | time 18[s] | loss 0.02 | epoch 19 | iter 321 / 351 | time 20[s] | loss 0.03 | epoch 19 | iter 341 / 351 | time 21[s] | loss 0.02 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 97.220% | epoch 20 | iter 1 / 351 | time 0[s] | loss 0.03 | epoch 20 | iter 21 / 351 | time 1[s] | loss 0.02 | epoch 20 | iter 41 / 351 | time 2[s] | loss 0.04 | epoch 20 | iter 61 / 351 | time 3[s] | loss 0.03 | epoch 20 | iter 81 / 351 | time 5[s] | loss 0.04 | epoch 20 | iter 101 / 351 | time 6[s] | loss 0.03 | epoch 20 | iter 121 / 351 | time 7[s] | loss 0.03 | epoch 20 | iter 141 / 351 | time 8[s] | loss 0.03 | epoch 20 | iter 161 / 351 | time 10[s] | loss 0.02 | epoch 20 | iter 181 / 351 | time 11[s] | loss 0.03 | epoch 20 | iter 201 / 351 | time 12[s] | loss 0.02 | epoch 20 | iter 221 / 351 | time 13[s] | loss 0.02 | epoch 20 | iter 241 / 351 | time 15[s] | loss 0.02 | epoch 20 | iter 261 / 351 | time 16[s] | loss 0.02 | epoch 20 | iter 281 / 351 | time 17[s] | loss 0.03 | epoch 20 | iter 301 / 351 | time 18[s] | loss 0.02 | epoch 20 | iter 321 / 351 | time 20[s] | loss 0.02 | epoch 20 | iter 341 / 351 | time 21[s] | loss 0.03 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [91m☒[0m 1437 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 95.080% | epoch 21 | iter 1 / 351 | time 0[s] | loss 0.03 | epoch 21 | iter 21 / 351 | time 1[s] | loss 0.03 | epoch 21 | iter 41 / 351 | time 2[s] | loss 0.02 | epoch 21 | iter 61 / 351 | time 3[s] | loss 0.02 | epoch 21 | iter 81 / 351 | time 5[s] | loss 0.02 | epoch 21 | iter 101 / 351 | time 6[s] | loss 0.02 | epoch 21 | iter 121 / 351 | time 7[s] | loss 0.02 | epoch 21 | iter 141 / 351 | time 8[s] | loss 0.02 | epoch 21 | iter 161 / 351 | time 10[s] | loss 0.02 | epoch 21 | iter 181 / 351 | time 11[s] | loss 0.02 | epoch 21 | iter 201 / 351 | time 12[s] | loss 0.02 | epoch 21 | iter 221 / 351 | time 14[s] | loss 0.01 | epoch 21 | iter 241 / 351 | time 15[s] | loss 0.01 | epoch 21 | iter 261 / 351 | time 16[s] | loss 0.01 | epoch 21 | iter 281 / 351 | time 17[s] | loss 0.02 | epoch 21 | iter 301 / 351 | time 19[s] | loss 0.02 | epoch 21 | iter 321 / 351 | time 20[s] | loss 0.02 | epoch 21 | iter 341 / 351 | time 21[s] | loss 0.02 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 97.480% | epoch 22 | iter 1 / 351 | time 0[s] | loss 0.03 | epoch 22 | iter 21 / 351 | time 1[s] | loss 0.03 | epoch 22 | iter 41 / 351 | time 2[s] | loss 0.02 | epoch 22 | iter 61 / 351 | time 3[s] | loss 0.02 | epoch 22 | iter 81 / 351 | time 5[s] | loss 0.02 | epoch 22 | iter 101 / 351 | time 6[s] | loss 0.02 | epoch 22 | iter 121 / 351 | time 7[s] | loss 0.02 | epoch 22 | iter 141 / 351 | time 8[s] | loss 0.02 | epoch 22 | iter 161 / 351 | time 10[s] | loss 0.02 | epoch 22 | iter 181 / 351 | time 11[s] | loss 0.02 | epoch 22 | iter 201 / 351 | time 12[s] | loss 0.02 | epoch 22 | iter 221 / 351 | time 13[s] | loss 0.02 | epoch 22 | iter 241 / 351 | time 15[s] | loss 0.02 | epoch 22 | iter 261 / 351 | time 16[s] | loss 0.03 | epoch 22 | iter 281 / 351 | time 17[s] | loss 0.04 | epoch 22 | iter 301 / 351 | time 18[s] | loss 0.03 | epoch 22 | iter 321 / 351 | time 20[s] | loss 0.03 | epoch 22 | iter 341 / 351 | time 21[s] | loss 0.02 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 95.020% | epoch 23 | iter 1 / 351 | time 0[s] | loss 0.04 | epoch 23 | iter 21 / 351 | time 1[s] | loss 0.03 | epoch 23 | iter 41 / 351 | time 2[s] | loss 0.03 | epoch 23 | iter 61 / 351 | time 3[s] | loss 0.03 | epoch 23 | iter 81 / 351 | time 5[s] | loss 0.02 | epoch 23 | iter 101 / 351 | time 6[s] | loss 0.02 | epoch 23 | iter 121 / 351 | time 7[s] | loss 0.01 | epoch 23 | iter 141 / 351 | time 9[s] | loss 0.02 | epoch 23 | iter 161 / 351 | time 10[s] | loss 0.01 | epoch 23 | iter 181 / 351 | time 11[s] | loss 0.02 | epoch 23 | iter 201 / 351 | time 12[s] | loss 0.02 | epoch 23 | iter 221 / 351 | time 14[s] | loss 0.02 | epoch 23 | iter 241 / 351 | time 15[s] | loss 0.02 | epoch 23 | iter 261 / 351 | time 16[s] | loss 0.03 | epoch 23 | iter 281 / 351 | time 17[s] | loss 0.02 | epoch 23 | iter 301 / 351 | time 19[s] | loss 0.02 | epoch 23 | iter 321 / 351 | time 20[s] | loss 0.03 | epoch 23 | iter 341 / 351 | time 21[s] | loss 0.04 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [91m☒[0m 854 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 93.260% | epoch 24 | iter 1 / 351 | time 0[s] | loss 0.04 | epoch 24 | iter 21 / 351 | time 1[s] | loss 0.03 | epoch 24 | iter 41 / 351 | time 2[s] | loss 0.03 | epoch 24 | iter 61 / 351 | time 3[s] | loss 0.03 | epoch 24 | iter 81 / 351 | time 5[s] | loss 0.02 | epoch 24 | iter 101 / 351 | time 6[s] | loss 0.01 | epoch 24 | iter 121 / 351 | time 7[s] | loss 0.02 | epoch 24 | iter 141 / 351 | time 8[s] | loss 0.01 | epoch 24 | iter 161 / 351 | time 10[s] | loss 0.01 | epoch 24 | iter 181 / 351 | time 11[s] | loss 0.01 | epoch 24 | iter 201 / 351 | time 12[s] | loss 0.01 | epoch 24 | iter 221 / 351 | time 13[s] | loss 0.02 | epoch 24 | iter 241 / 351 | time 15[s] | loss 0.03 | epoch 24 | iter 261 / 351 | time 16[s] | loss 0.03 | epoch 24 | iter 281 / 351 | time 17[s] | loss 0.03 | epoch 24 | iter 301 / 351 | time 18[s] | loss 0.03 | epoch 24 | iter 321 / 351 | time 20[s] | loss 0.02 | epoch 24 | iter 341 / 351 | time 21[s] | loss 0.02 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 97.800% | epoch 25 | iter 1 / 351 | time 0[s] | loss 0.02 | epoch 25 | iter 21 / 351 | time 1[s] | loss 0.01 | epoch 25 | iter 41 / 351 | time 2[s] | loss 0.01 | epoch 25 | iter 61 / 351 | time 3[s] | loss 0.01 | epoch 25 | iter 81 / 351 | time 5[s] | loss 0.01 | epoch 25 | iter 101 / 351 | time 6[s] | loss 0.01 | epoch 25 | iter 121 / 351 | time 7[s] | loss 0.01 | epoch 25 | iter 141 / 351 | time 9[s] | loss 0.01 | epoch 25 | iter 161 / 351 | time 10[s] | loss 0.01 | epoch 25 | iter 181 / 351 | time 11[s] | loss 0.01 | epoch 25 | iter 201 / 351 | time 12[s] | loss 0.01 | epoch 25 | iter 221 / 351 | time 14[s] | loss 0.01 | epoch 25 | iter 241 / 351 | time 15[s] | loss 0.01 | epoch 25 | iter 261 / 351 | time 16[s] | loss 0.01 | epoch 25 | iter 281 / 351 | time 17[s] | loss 0.01 | epoch 25 | iter 301 / 351 | time 19[s] | loss 0.01 | epoch 25 | iter 321 / 351 | time 20[s] | loss 0.01 | epoch 25 | iter 341 / 351 | time 21[s] | loss 0.01 Q 58+77 T 162 [92m☑[0m 162 --- Q 461+579 T 1139 [92m☑[0m 1139 --- Q 48+285 T 666 [92m☑[0m 666 --- Q 551+8 T 163 [92m☑[0m 163 --- Q 55+763 T 422 [92m☑[0m 422 --- Q 752+006 T 857 [92m☑[0m 857 --- Q 292+167 T 1053 [92m☑[0m 1053 --- Q 795+038 T 1427 [92m☑[0m 1427 --- Q 838+62 T 864 [92m☑[0m 864 --- Q 39+341 T 236 [92m☑[0m 236 --- val acc 97.760%
%python3 plt.ylim(0, 1) plt.plot(acc_list_baseline) plt.plot(acc_list_reversed) plt.plot(acc_list_peeky) plt.legend(labels=['baseline', 'reversed input', 'peeky']) plt.show()
- 最初の正解は遅い気がする
- が、正解しだすと一気に賢くなる
%md