ゼロから作る Deep Learning 2/Seq2Seq

Posted on

7章: RNNによる文章生成

ゼロから作るDeep Learning (2)の読書メモです。6章ではゲートと呼ばれる仕組みを導入することで長期的な依存関係を学習できる LSTM の実装について学びました。7章では前章で実装した言語モデルを利用して RNN による文章生成を行い、時系列データを別の時系列データに変換できる Seq2Seq と呼ばれる手法をみていきます。

参考実装

%sh
rm -rf /tmp/deep-learning-from-scratch-2
git clone https://github.com/oreilly-japan/deep-learning-from-scratch-2 /tmp/deep-learning-from-scratch-2
Cloning into '/tmp/deep-learning-from-scratch-2'...

必要なモジュールを入れる

%sh
pip3 install numpy matplotlib
Requirement already satisfied: numpy in /usr/lib64/python3.6/dist-packages
Requirement already satisfied: matplotlib in /usr/lib64/python3.6/dist-packages
Requirement already satisfied: pytz in /usr/lib/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: six>=1.10 in /usr/lib/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: cycler>=0.10 in /usr/lib/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/lib64/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: python-dateutil>=2.1 in /usr/lib/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/lib/python3.6/dist-packages (from matplotlib)
Requirement already satisfied: setuptools in /usr/lib/python3.6/dist-packages (from kiwisolver>=1.0.1->matplotlib)
You are using pip version 9.0.3, however version 19.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.

7.1: 言語モデルを使った文章生成

  • 前章で実装した言語モデルを使って文章を生成する

%python3
import sys
sys.path.append('/tmp/deep-learning-from-scratch-2')

LSTMレイヤ

%python3
from common.functions import sigmoid


class LSTM:
    def __init__(self, Wx, Wh, b):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.cache = None

    def forward(self, x, h_prev, c_prev):
        Wx, Wh, b = self.params
        N, H = h_prev.shape

        A = np.dot(x, Wx) + np.dot(h_prev, Wh) + b

        f = A[:, :H]
        g = A[:, H:2*H]
        i = A[:, 2*H:3*H]
        o = A[:, 3*H:]

        f = sigmoid(f)
        g = np.tanh(g)
        i = sigmoid(i)
        o = sigmoid(o)

        c_next = f * c_prev + g * i
        h_next = o * np.tanh(c_next)

        self.cache = (x, h_prev, c_prev, i, f, g, o, c_next)
        return h_next, c_next

    def backward(self, dh_next, dc_next):
        Wx, Wh, b = self.params
        x, h_prev, c_prev, i, f, g, o, c_next = self.cache

        tanh_c_next = np.tanh(c_next)

        ds = dc_next + (dh_next * o) * (1 - tanh_c_next ** 2)

        dc_prev = ds * f

        di = ds * g
        df = ds * c_prev
        do = dh_next * tanh_c_next
        dg = ds * i

        di *= i * (1 - i)
        df *= f * (1 - f)
        do *= o * (1 - o)
        dg *= (1 - g ** 2)

        dA = np.hstack((df, dg, di, do))

        dWh = np.dot(h_prev.T, dA)
        dWx = np.dot(x.T, dA)
        db = dA.sum(axis=0)

        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db

        dx = np.dot(dA, Wx.T)
        dh_prev = np.dot(dA, Wh.T)

        return dx, dh_prev, dc_prev

%python3
class TimeLSTM:
    def __init__(self, Wx, Wh, b, stateful=False):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.layers = None

        self.h, self.c = None, None
        self.dh = None
        self.stateful = stateful

    def forward(self, xs):
        Wx, Wh, b = self.params
        N, T, D = xs.shape
        H = Wh.shape[0]

        self.layers = []
        hs = np.empty((N, T, H), dtype='f')

        if not self.stateful or self.h is None:
            self.h = np.zeros((N, H), dtype='f')
        if not self.stateful or self.c is None:
            self.c = np.zeros((N, H), dtype='f')

        for t in range(T):
            layer = LSTM(*self.params)
            self.h, self.c = layer.forward(xs[:, t, :], self.h, self.c)
            hs[:, t, :] = self.h

            self.layers.append(layer)

        return hs

    def backward(self, dhs):
        Wx, Wh, b = self.params
        N, T, H = dhs.shape
        D = Wx.shape[0]

        dxs = np.empty((N, T, D), dtype='f')
        dh, dc = 0, 0

        grads = [0, 0, 0]
        for t in reversed(range(T)):
            layer = self.layers[t]
            dx, dh, dc = layer.backward(dhs[:, t, :] + dh, dc)
            dxs[:, t, :] = dx
            for i, grad in enumerate(layer.grads):
                grads[i] += grad

        for i, grad in enumerate(grads):
            self.grads[i][...] = grad
        self.dh = dh
        return dxs

    def set_state(self, h, c=None):
        self.h, self.c = h, c

    def reset_state(self):
        self.h, self.c = None, None

Rnnlm

%python3
import pickle
from common.time_layers import TimeSoftmaxWithLoss, TimeEmbedding, TimeAffine

class Rnnlm:
    def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.layers = [
            TimeEmbedding(embed_W),
            TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
            TimeAffine(affine_W, affine_b)
        ]
        self.loss_layer = TimeSoftmaxWithLoss()
        self.lstm_layer = self.layers[1]
        
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, xs):
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs
    
    def forward(self, xs, ts):
        score = self.predict(xs)
        loss = self.loss_layer.forward(score, ts)
        return loss
    
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    
    def reset_state(self):
        self.lstm_layer.reset_state()
    
    def save_params(self, file_name='Rnnlm.pkl'):
        with open(file_name, 'wb') as f:
            pickle.dump(self.params, f)
    
    def load_params(self, file_name='Rnnlm.pkl'):
        with open(file_name, 'rb') as f:
            self.params = pickle.load(f)

BetterRnnlm

Rnnlmとの違い:

  • LSTM レイヤの多層化
  • Dropout を使用
  • 重みを共有

%python3
from common.time_layers import TimeEmbedding, TimeDropout, TimeAffine, TimeSoftmaxWithLoss
from common.np import *
from common.base_model import BaseModel


class BetterRnnlm(BaseModel):
    def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx1 = (rn(D, 4*H) / np.sqrt(D)).astype('f')
        lstm_Wh1 = (rn(H, 4*H) / np.sqrt(H)).astype('f')
        lstm_b1 = np.zeros(4*H).astype('f')
        lstm_Wx2 = (rn(D, 4*H) / np.sqrt(D)).astype('f')
        lstm_Wh2 = (rn(H, 4*H) / np.sqrt(H)).astype('f')
        lstm_b2 = np.zeros(4*H).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.layers = [
            TimeEmbedding(embed_W),
            TimeDropout(dropout_ratio),
            TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True),
            TimeDropout(dropout_ratio),
            TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True),
            TimeDropout(dropout_ratio),
            TimeAffine(embed_W.T, affine_b)
        ]
        self.loss_layer = TimeSoftmaxWithLoss()
        self.lstm_layers = [self.layers[2], self.layers[4]]
        self.drop_layers = [self.layers[1], self.layers[3], self.layers[5]]
        
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
    
    def predict(self, xs, train_flg=False):
        for layer in self.drop_layers:
            layer.train_flg = train_flg
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs
    
    def forward(self, xs, ts, train_flg=True):
        score = self.predict(xs, train_flg)
        loss = self.loss_layer.forward(score, ts)
        return loss
    
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    
    def reset_state(self):
        for layer in self.lstm_layers:
            layer.reset_state()

文章生成

  • np.random.choice は指定の確率分布に従って適当に選ぶやつ

%python3
import numpy as np
from common.functions import softmax


class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
        
        return word_ids

%python3
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)
you march instrument quickly dual overly t. resident shirts benign attendants takeover-stock accountability guard deregulation recoup mean corners impressed operation negotiated incorrectly reservations ssangyong mandate discretion alliances touched authority cathcart know-how treaty disagreement falls dai-ichi polyethylene multiple diplomat goldsmith airplanes murdoch durkin cool naturally truce setbacks small heating rico crazy scarce confronted circumstances leslie force said prudential arms cholesterol happening surfaced parties security tendered week declaring earthquake intimate visible backing rank seismic hair divisive know-how prints yard whooping worry hills accepting mich. banning free-market 500-stock charity earlier integrity place leads mediator benjamin indicated alleviate kick concert desire conditions budgetary chose

  • 上の結果はトレーニングしてないモデルの出力なのですごい適当

%python3
model.load_params('/tmp/deep-learning-from-scratch-2/ch06/Rnnlm.pkl')

%python3
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)
you serve illinois wiped reportedly mid-1980s substantially f emhart expression macmillan than automatic derivative appetite rhone-poulenc spots following wisconsin 1960s computer-driven ec application massage 's harmful worse announced deliberately mission wars institutional ehrlich chosen exception hut arbitragers stepping compete chris supplying carbide initiatives went feelings overhead customers today accelerated unfilled system iras drinks low licenses judge doctor rural widen look cms hotels products towns hearst grown dominion placing tell privately workstation responsibility formation unpublished cloud makers stick allies jay conspiring co. nih gen-probe confronted bankers unfriendly become magnified calgary enserch apt abortion beauty reruns deal constitution itself rebounded fare jeff

  • トレーニング済みの重みを入れてもそんなに変わらず

%sh
curl https://www.oreilly.co.jp/pub/9784873118369/BetterRnnlm.pkl > /tmp/BetterRnnlm.pkl
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0 37.7M    0  236k    0     0   159k      0  0:04:02  0:00:01  0:04:01  159k
  6 37.7M    6 2460k    0     0   985k      0  0:00:39  0:00:02  0:00:37  984k
 13 37.7M   13 5164k    0     0  1474k      0  0:00:26  0:00:03  0:00:23 1474k
 18 37.7M   18 7180k    0     0  1593k      0  0:00:24  0:00:04  0:00:20 1593k
 24 37.7M   24 9372k    0     0  1700k      0  0:00:22  0:00:05  0:00:17 1860k
 30 37.7M   30 11.3M    0     0  1789k      0  0:00:21  0:00:06  0:00:15 2271k
 36 37.7M   36 13.6M    0     0  1860k      0  0:00:20  0:00:07  0:00:13 2296k
 42 37.7M   42 15.9M    0     0  1913k      0  0:00:20  0:00:08  0:00:12 2219k
 48 37.7M   48 18.1M    0     0  1958k      0  0:00:19  0:00:09  0:00:10 2288k
 54 37.7M   54 20.4M    0     0  1993k      0  0:00:19  0:00:10  0:00:09 2317k
 60 37.7M   60 22.7M    0     0  2026k      0  0:00:19  0:00:11  0:00:08 2336k
 66 37.7M   66 25.1M    0     0  2062k      0  0:00:18  0:00:12  0:00:06 2366k
 73 37.7M   73 27.7M    0     0  2110k      0  0:00:18  0:00:13  0:00:05 2450k
 81 37.7M   81 30.6M    0     0  2165k      0  0:00:17  0:00:14  0:00:03 2559k
 88 37.7M   88 33.3M    0     0  2207k      0  0:00:17  0:00:15  0:00:02 2659k
 95 37.7M   95 35.9M    0     0  2233k      0  0:00:17  0:00:16  0:00:01 2710k
100 37.7M  100 37.7M    0     0  2240k      0  0:00:17  0:00:17 --:--:-- 2710k

%python3
import numpy as np
from common.functions import softmax


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
        
        return word_ids

%python3
model = BetterRnnlmGen()
model.load_params('/tmp/BetterRnnlm.pkl')

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)
you want to raise their eggs.
 the gradual results exceeding the market have lent an unprecedented number of plants held during the past five years at the top rate of economists and energy casualty sales april.
 the bank also said output of unfilled orders have hit a surge in expenses of business as an increase of sales by the navy 's and prepared assets.
 eight business projects went across the area ahead the new package cited texas continental corp. 's chairman frederick a. robinson and nl.
 these days drexel 's clients are a brand attitude to

  • BetterRnnlmの方はだいぶ英語っぽい感じになっている

7.2: seq2seq

足し算を文字のリストとみて結果への変換をやる

使用するデータセット

%sh
cat /tmp/deep-learning-from-scratch-2/dataset/addition.txt | head
16+75  _91  
52+607 _659 
75+22  _97  
63+22  _85  
795+3  _798 
706+796_1502
8+4    _12  
84+317 _401 
9+3    _12  
6+2    _8   

%python3
from dataset import sequence

(x_train, t_train), (x_test, t_test) = \
    sequence.load_data('addition.txt', seed=1984)
char_to_id, id_to_char = sequence.get_vocab()

print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)

print(x_train[0])
print(t_train[0])

print(''.join([id_to_char[c] for c in x_train[0]]))
print(''.join([id_to_char[c] for c in t_train[0]]))
(45000, 7) (45000, 5)
(5000, 7) (5000, 5)
[ 3  0  2  0  0 11  5]
[ 6  0 11  7  5]
71+118 
_189 

Encoderの実装

%python3
class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        
        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
        
        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None
    
    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]
    
    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh
        
        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout

%python3
class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(D)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        
        self.params, self.grads = [], []
        
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
    
    def forward(self, xs, h):
        self.lstm.set_state(h)
        
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score
    
    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh
    
    def generate(self, h, start_id, sample_size):
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)
        
        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1, 1))
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)
            
            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))
        
        return sampled

%python3
from common.base_model import BaseModel

class Seq2Seq(BaseModel):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()
        
        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads
    
    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]
        
        h = self.encoder.forward(xs)
        score = self.decoder.forward(decoder_xs, h)
        loss = self.softmax.forward(score, decoder_ts)
        return loss
    
    def backward(self, dout=1):
        dout = self.softmax.backward(dout)
        dh = self.decoder.backward(dout)
        dout = self.encoder.backward(dh)
        return dout
    
    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)
        sampled = self.decoder.generate(h, start_id, sample_size)
        return sampled

Seq2Seqモデルのトレーニング

%python3
import numpy as np
import matplotlib.pyplot as plt
from dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq

(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
char_to_id, id_to_char = sequence.get_vocab()

vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

model = Seq2Seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1,
                batch_size=batch_size, max_grad=max_grad)

    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)
    
    acc = float(correct_num) / len(x_test)
    acc_list.append(acc)
    print('val acc %.3f%%' % (acc * 100))
| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.56
| epoch 1 |  iter 21 / 351 | time 1[s] | loss 2.44
| epoch 1 |  iter 41 / 351 | time 2[s] | loss 2.07
| epoch 1 |  iter 61 / 351 | time 3[s] | loss 1.93
| epoch 1 |  iter 81 / 351 | time 4[s] | loss 1.88
| epoch 1 |  iter 101 / 351 | time 5[s] | loss 1.82
| epoch 1 |  iter 121 / 351 | time 6[s] | loss 1.80
| epoch 1 |  iter 141 / 351 | time 8[s] | loss 1.78
| epoch 1 |  iter 161 / 351 | time 9[s] | loss 1.77
| epoch 1 |  iter 181 / 351 | time 10[s] | loss 1.76
| epoch 1 |  iter 201 / 351 | time 11[s] | loss 1.76
| epoch 1 |  iter 221 / 351 | time 12[s] | loss 1.75
| epoch 1 |  iter 241 / 351 | time 13[s] | loss 1.74
| epoch 1 |  iter 261 / 351 | time 14[s] | loss 1.74
| epoch 1 |  iter 281 / 351 | time 16[s] | loss 1.73
| epoch 1 |  iter 301 / 351 | time 17[s] | loss 1.72
| epoch 1 |  iter 321 / 351 | time 18[s] | loss 1.72
| epoch 1 |  iter 341 / 351 | time 19[s] | loss 1.72
Q 77+85  
T 162 
☒ 100 
---
Q 975+164
T 1139
☒ 1000
---
Q 582+84 
T 666 
☒ 100 
---
Q 8+155  
T 163 
☒ 100 
---
Q 367+55 
T 422 
☒ 100 
---
Q 600+257
T 857 
☒ 1000
---
Q 761+292
T 1053
☒ 1000
---
Q 830+597
T 1427
☒ 1000
---
Q 26+838 
T 864 
☒ 100 
---
Q 143+93 
T 236 
☒ 200 
---
val acc 0.220%
| epoch 2 |  iter 1 / 351 | time 0[s] | loss 1.72
| epoch 2 |  iter 21 / 351 | time 1[s] | loss 1.71
| epoch 2 |  iter 41 / 351 | time 2[s] | loss 1.72
| epoch 2 |  iter 61 / 351 | time 3[s] | loss 1.70
| epoch 2 |  iter 81 / 351 | time 4[s] | loss 1.69
| epoch 2 |  iter 101 / 351 | time 5[s] | loss 1.70
| epoch 2 |  iter 121 / 351 | time 7[s] | loss 1.69
| epoch 2 |  iter 141 / 351 | time 8[s] | loss 1.69
| epoch 2 |  iter 161 / 351 | time 9[s] | loss 1.68
| epoch 2 |  iter 181 / 351 | time 10[s] | loss 1.68
| epoch 2 |  iter 201 / 351 | time 11[s] | loss 1.68
| epoch 2 |  iter 221 / 351 | time 12[s] | loss 1.68
| epoch 2 |  iter 241 / 351 | time 13[s] | loss 1.66
| epoch 2 |  iter 261 / 351 | time 15[s] | loss 1.65
| epoch 2 |  iter 281 / 351 | time 16[s] | loss 1.64
| epoch 2 |  iter 301 / 351 | time 17[s] | loss 1.63
| epoch 2 |  iter 321 / 351 | time 18[s] | loss 1.61
| epoch 2 |  iter 341 / 351 | time 19[s] | loss 1.59
Q 77+85  
T 162 
☒ 100 
---
Q 975+164
T 1139
☒ 1000
---
Q 582+84 
T 666 
☒ 700 
---
Q 8+155  
T 163 
☒ 100 
---
Q 367+55 
T 422 
☒ 400 
---
Q 600+257
T 857 
☒ 800 
---
Q 761+292
T 1053
☒ 1000
---
Q 830+597
T 1427
☒ 1207
---
Q 26+838 
T 864 
☒ 700 
---
Q 143+93 
T 236 
☒ 400 
---
val acc 0.220%
| epoch 3 |  iter 1 / 351 | time 0[s] | loss 1.57
| epoch 3 |  iter 21 / 351 | time 1[s] | loss 1.57
| epoch 3 |  iter 41 / 351 | time 2[s] | loss 1.55
| epoch 3 |  iter 61 / 351 | time 3[s] | loss 1.54
| epoch 3 |  iter 81 / 351 | time 4[s] | loss 1.52
| epoch 3 |  iter 101 / 351 | time 5[s] | loss 1.51
| epoch 3 |  iter 121 / 351 | time 7[s] | loss 1.49
| epoch 3 |  iter 141 / 351 | time 8[s] | loss 1.48
| epoch 3 |  iter 161 / 351 | time 9[s] | loss 1.46
| epoch 3 |  iter 181 / 351 | time 10[s] | loss 1.44
| epoch 3 |  iter 201 / 351 | time 11[s] | loss 1.43
| epoch 3 |  iter 221 / 351 | time 12[s] | loss 1.41
| epoch 3 |  iter 241 / 351 | time 14[s] | loss 1.39
| epoch 3 |  iter 261 / 351 | time 15[s] | loss 1.39
| epoch 3 |  iter 281 / 351 | time 16[s] | loss 1.37
| epoch 3 |  iter 301 / 351 | time 17[s] | loss 1.37
| epoch 3 |  iter 321 / 351 | time 18[s] | loss 1.35
| epoch 3 |  iter 341 / 351 | time 20[s] | loss 1.34
Q 77+85  
T 162 
☒ 136 
---
Q 975+164
T 1139
☒ 1169
---
Q 582+84 
T 666 
☒ 668 
---
Q 8+155  
T 163 
☒ 128 
---
Q 367+55 
T 422 
☒ 446 
---
Q 600+257
T 857 
☒ 839 
---
Q 761+292
T 1053
☒ 1009
---
Q 830+597
T 1427
☒ 1468
---
Q 26+838 
T 864 
☒ 808 
---
Q 143+93 
T 236 
☒ 228 
---
val acc 0.940%
| epoch 4 |  iter 1 / 351 | time 0[s] | loss 1.34
| epoch 4 |  iter 21 / 351 | time 1[s] | loss 1.33
| epoch 4 |  iter 41 / 351 | time 2[s] | loss 1.32
| epoch 4 |  iter 61 / 351 | time 3[s] | loss 1.30
| epoch 4 |  iter 81 / 351 | time 4[s] | loss 1.30
| epoch 4 |  iter 101 / 351 | time 5[s] | loss 1.29
| epoch 4 |  iter 121 / 351 | time 7[s] | loss 1.28
| epoch 4 |  iter 141 / 351 | time 8[s] | loss 1.27
| epoch 4 |  iter 161 / 351 | time 9[s] | loss 1.26
| epoch 4 |  iter 181 / 351 | time 10[s] | loss 1.25
| epoch 4 |  iter 201 / 351 | time 11[s] | loss 1.25
| epoch 4 |  iter 221 / 351 | time 13[s] | loss 1.25
| epoch 4 |  iter 241 / 351 | time 14[s] | loss 1.23
| epoch 4 |  iter 261 / 351 | time 15[s] | loss 1.22
| epoch 4 |  iter 281 / 351 | time 16[s] | loss 1.22
| epoch 4 |  iter 301 / 351 | time 17[s] | loss 1.21
| epoch 4 |  iter 321 / 351 | time 18[s] | loss 1.20
| epoch 4 |  iter 341 / 351 | time 20[s] | loss 1.20
Q 77+85  
T 162 
☒ 156 
---
Q 975+164
T 1139
☒ 1222
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☒ 199 
---
Q 367+55 
T 422 
☒ 402 
---
Q 600+257
T 857 
☒ 902 
---
Q 761+292
T 1053
☒ 1006
---
Q 830+597
T 1427
☒ 1525
---
Q 26+838 
T 864 
☒ 826 
---
Q 143+93 
T 236 
☒ 205 
---
val acc 2.400%
| epoch 5 |  iter 1 / 351 | time 0[s] | loss 1.19
| epoch 5 |  iter 21 / 351 | time 1[s] | loss 1.18
| epoch 5 |  iter 41 / 351 | time 2[s] | loss 1.17
| epoch 5 |  iter 61 / 351 | time 3[s] | loss 1.16
| epoch 5 |  iter 81 / 351 | time 4[s] | loss 1.17
| epoch 5 |  iter 101 / 351 | time 5[s] | loss 1.15
| epoch 5 |  iter 121 / 351 | time 7[s] | loss 1.15
| epoch 5 |  iter 141 / 351 | time 8[s] | loss 1.15
| epoch 5 |  iter 161 / 351 | time 9[s] | loss 1.13
| epoch 5 |  iter 181 / 351 | time 10[s] | loss 1.13
| epoch 5 |  iter 201 / 351 | time 11[s] | loss 1.13
| epoch 5 |  iter 221 / 351 | time 12[s] | loss 1.12
| epoch 5 |  iter 241 / 351 | time 14[s] | loss 1.12
| epoch 5 |  iter 261 / 351 | time 15[s] | loss 1.11
| epoch 5 |  iter 281 / 351 | time 16[s] | loss 1.11
| epoch 5 |  iter 301 / 351 | time 17[s] | loss 1.10
| epoch 5 |  iter 321 / 351 | time 18[s] | loss 1.09
| epoch 5 |  iter 341 / 351 | time 20[s] | loss 1.09
Q 77+85  
T 162 
☒ 155 
---
Q 975+164
T 1139
☒ 1165
---
Q 582+84 
T 666 
☒ 645 
---
Q 8+155  
T 163 
☒ 160 
---
Q 367+55 
T 422 
☒ 421 
---
Q 600+257
T 857 
☒ 882 
---
Q 761+292
T 1053
☒ 1015
---
Q 830+597
T 1427
☒ 1444
---
Q 26+838 
T 864 
☒ 846 
---
Q 143+93 
T 236 
☒ 221 
---
val acc 4.360%
| epoch 6 |  iter 1 / 351 | time 0[s] | loss 1.06
| epoch 6 |  iter 21 / 351 | time 1[s] | loss 1.07
| epoch 6 |  iter 41 / 351 | time 2[s] | loss 1.07
| epoch 6 |  iter 61 / 351 | time 3[s] | loss 1.08
| epoch 6 |  iter 81 / 351 | time 4[s] | loss 1.08
| epoch 6 |  iter 101 / 351 | time 5[s] | loss 1.08
| epoch 6 |  iter 121 / 351 | time 7[s] | loss 1.06
| epoch 6 |  iter 141 / 351 | time 8[s] | loss 1.05
| epoch 6 |  iter 161 / 351 | time 9[s] | loss 1.05
| epoch 6 |  iter 181 / 351 | time 10[s] | loss 1.06
| epoch 6 |  iter 201 / 351 | time 11[s] | loss 1.05
| epoch 6 |  iter 221 / 351 | time 12[s] | loss 1.05
| epoch 6 |  iter 241 / 351 | time 14[s] | loss 1.04
| epoch 6 |  iter 261 / 351 | time 15[s] | loss 1.04
| epoch 6 |  iter 281 / 351 | time 16[s] | loss 1.04
| epoch 6 |  iter 301 / 351 | time 17[s] | loss 1.03
| epoch 6 |  iter 321 / 351 | time 18[s] | loss 1.04
| epoch 6 |  iter 341 / 351 | time 20[s] | loss 1.01
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☒ 1119
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☒ 166 
---
Q 367+55 
T 422 
☒ 410 
---
Q 600+257
T 857 
☑ 857 
---
Q 761+292
T 1053
☒ 1009
---
Q 830+597
T 1427
☒ 1412
---
Q 26+838 
T 864 
☒ 867 
---
Q 143+93 
T 236 
☒ 246 
---
val acc 4.400%
| epoch 7 |  iter 1 / 351 | time 0[s] | loss 1.02
| epoch 7 |  iter 21 / 351 | time 1[s] | loss 1.01
| epoch 7 |  iter 41 / 351 | time 2[s] | loss 1.00
| epoch 7 |  iter 61 / 351 | time 3[s] | loss 1.00
| epoch 7 |  iter 81 / 351 | time 4[s] | loss 1.00
| epoch 7 |  iter 101 / 351 | time 5[s] | loss 1.00
| epoch 7 |  iter 121 / 351 | time 7[s] | loss 1.00
| epoch 7 |  iter 141 / 351 | time 8[s] | loss 0.98
| epoch 7 |  iter 161 / 351 | time 9[s] | loss 1.00
| epoch 7 |  iter 181 / 351 | time 10[s] | loss 0.99
| epoch 7 |  iter 201 / 351 | time 11[s] | loss 0.98
| epoch 7 |  iter 221 / 351 | time 13[s] | loss 1.00
| epoch 7 |  iter 241 / 351 | time 14[s] | loss 1.02
| epoch 7 |  iter 261 / 351 | time 15[s] | loss 1.00
| epoch 7 |  iter 281 / 351 | time 16[s] | loss 0.97
| epoch 7 |  iter 301 / 351 | time 17[s] | loss 0.97
| epoch 7 |  iter 321 / 351 | time 18[s] | loss 0.96
| epoch 7 |  iter 341 / 351 | time 20[s] | loss 0.96
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☒ 1175
---
Q 582+84 
T 666 
☒ 667 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 430 
---
Q 600+257
T 857 
☒ 886 
---
Q 761+292
T 1053
☒ 1076
---
Q 830+597
T 1427
☒ 1444
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 5.100%
| epoch 8 |  iter 1 / 351 | time 0[s] | loss 1.01
| epoch 8 |  iter 21 / 351 | time 1[s] | loss 0.95
| epoch 8 |  iter 41 / 351 | time 2[s] | loss 0.96
| epoch 8 |  iter 61 / 351 | time 3[s] | loss 0.95
| epoch 8 |  iter 81 / 351 | time 4[s] | loss 0.95
| epoch 8 |  iter 101 / 351 | time 5[s] | loss 0.96
| epoch 8 |  iter 121 / 351 | time 7[s] | loss 0.95
| epoch 8 |  iter 141 / 351 | time 8[s] | loss 0.95
| epoch 8 |  iter 161 / 351 | time 9[s] | loss 0.95
| epoch 8 |  iter 181 / 351 | time 10[s] | loss 0.94
| epoch 8 |  iter 201 / 351 | time 11[s] | loss 0.93
| epoch 8 |  iter 221 / 351 | time 13[s] | loss 0.93
| epoch 8 |  iter 241 / 351 | time 14[s] | loss 0.93
| epoch 8 |  iter 261 / 351 | time 15[s] | loss 0.95
| epoch 8 |  iter 281 / 351 | time 16[s] | loss 0.94
| epoch 8 |  iter 301 / 351 | time 18[s] | loss 0.92
| epoch 8 |  iter 321 / 351 | time 19[s] | loss 0.92
| epoch 8 |  iter 341 / 351 | time 20[s] | loss 0.92
Q 77+85  
T 162 
☒ 160 
---
Q 975+164
T 1139
☒ 1130
---
Q 582+84 
T 666 
☒ 668 
---
Q 8+155  
T 163 
☒ 158 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 858 
---
Q 761+292
T 1053
☒ 1009
---
Q 830+597
T 1427
☒ 1431
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 232 
---
val acc 5.440%
| epoch 9 |  iter 1 / 351 | time 0[s] | loss 0.92
| epoch 9 |  iter 21 / 351 | time 1[s] | loss 0.91
| epoch 9 |  iter 41 / 351 | time 2[s] | loss 0.90
| epoch 9 |  iter 61 / 351 | time 3[s] | loss 0.90
| epoch 9 |  iter 81 / 351 | time 4[s] | loss 0.89
| epoch 9 |  iter 101 / 351 | time 5[s] | loss 0.91
| epoch 9 |  iter 121 / 351 | time 7[s] | loss 0.90
| epoch 9 |  iter 141 / 351 | time 8[s] | loss 0.89
| epoch 9 |  iter 161 / 351 | time 9[s] | loss 0.94
| epoch 9 |  iter 181 / 351 | time 10[s] | loss 0.90
| epoch 9 |  iter 201 / 351 | time 12[s] | loss 0.90
| epoch 9 |  iter 221 / 351 | time 13[s] | loss 0.91
| epoch 9 |  iter 241 / 351 | time 14[s] | loss 0.89
| epoch 9 |  iter 261 / 351 | time 15[s] | loss 0.90
| epoch 9 |  iter 281 / 351 | time 16[s] | loss 0.90
| epoch 9 |  iter 301 / 351 | time 17[s] | loss 0.88
| epoch 9 |  iter 321 / 351 | time 19[s] | loss 0.87
| epoch 9 |  iter 341 / 351 | time 20[s] | loss 0.87
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☑ 1139
---
Q 582+84 
T 666 
☒ 667 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 427 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1069
---
Q 830+597
T 1427
☒ 1421
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 248 
---
val acc 7.680%
| epoch 10 |  iter 1 / 351 | time 0[s] | loss 0.84
| epoch 10 |  iter 21 / 351 | time 1[s] | loss 0.86
| epoch 10 |  iter 41 / 351 | time 2[s] | loss 0.87
| epoch 10 |  iter 61 / 351 | time 3[s] | loss 0.87
| epoch 10 |  iter 81 / 351 | time 4[s] | loss 0.86
| epoch 10 |  iter 101 / 351 | time 6[s] | loss 0.86
| epoch 10 |  iter 121 / 351 | time 7[s] | loss 0.86
| epoch 10 |  iter 141 / 351 | time 8[s] | loss 0.87
| epoch 10 |  iter 161 / 351 | time 9[s] | loss 0.85
| epoch 10 |  iter 181 / 351 | time 10[s] | loss 0.88
| epoch 10 |  iter 201 / 351 | time 11[s] | loss 0.85
| epoch 10 |  iter 221 / 351 | time 13[s] | loss 0.86
| epoch 10 |  iter 241 / 351 | time 14[s] | loss 0.86
| epoch 10 |  iter 261 / 351 | time 15[s] | loss 0.85
| epoch 10 |  iter 281 / 351 | time 16[s] | loss 0.85
| epoch 10 |  iter 301 / 351 | time 17[s] | loss 0.84
| epoch 10 |  iter 321 / 351 | time 18[s] | loss 0.84
| epoch 10 |  iter 341 / 351 | time 20[s] | loss 0.84
Q 77+85  
T 162 
☒ 160 
---
Q 975+164
T 1139
☒ 1130
---
Q 582+84 
T 666 
☒ 663 
---
Q 8+155  
T 163 
☒ 165 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1039
---
Q 830+597
T 1427
☒ 1409
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 8.840%
| epoch 11 |  iter 1 / 351 | time 0[s] | loss 0.80
| epoch 11 |  iter 21 / 351 | time 1[s] | loss 0.85
| epoch 11 |  iter 41 / 351 | time 2[s] | loss 0.83
| epoch 11 |  iter 61 / 351 | time 3[s] | loss 0.83
| epoch 11 |  iter 81 / 351 | time 4[s] | loss 0.83
| epoch 11 |  iter 101 / 351 | time 6[s] | loss 0.82
| epoch 11 |  iter 121 / 351 | time 7[s] | loss 0.82
| epoch 11 |  iter 141 / 351 | time 8[s] | loss 0.81
| epoch 11 |  iter 161 / 351 | time 9[s] | loss 0.81
| epoch 11 |  iter 181 / 351 | time 11[s] | loss 0.81
| epoch 11 |  iter 201 / 351 | time 12[s] | loss 0.81
| epoch 11 |  iter 221 / 351 | time 13[s] | loss 0.82
| epoch 11 |  iter 241 / 351 | time 15[s] | loss 0.81
| epoch 11 |  iter 261 / 351 | time 16[s] | loss 0.81
| epoch 11 |  iter 281 / 351 | time 17[s] | loss 0.82
| epoch 11 |  iter 301 / 351 | time 18[s] | loss 0.83
| epoch 11 |  iter 321 / 351 | time 19[s] | loss 0.80
| epoch 11 |  iter 341 / 351 | time 21[s] | loss 0.81
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☒ 1183
---
Q 582+84 
T 666 
☒ 658 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☑ 422 
---
Q 600+257
T 857 
☒ 851 
---
Q 761+292
T 1053
☒ 1073
---
Q 830+597
T 1427
☒ 1425
---
Q 26+838 
T 864 
☒ 861 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 8.020%
| epoch 12 |  iter 1 / 351 | time 0[s] | loss 0.80
| epoch 12 |  iter 21 / 351 | time 1[s] | loss 0.79
| epoch 12 |  iter 41 / 351 | time 2[s] | loss 0.80
| epoch 12 |  iter 61 / 351 | time 3[s] | loss 0.80
| epoch 12 |  iter 81 / 351 | time 4[s] | loss 0.79
| epoch 12 |  iter 101 / 351 | time 6[s] | loss 0.79
| epoch 12 |  iter 121 / 351 | time 7[s] | loss 0.78
| epoch 12 |  iter 141 / 351 | time 8[s] | loss 0.79
| epoch 12 |  iter 161 / 351 | time 9[s] | loss 0.79
| epoch 12 |  iter 181 / 351 | time 10[s] | loss 0.82
| epoch 12 |  iter 201 / 351 | time 11[s] | loss 0.79
| epoch 12 |  iter 221 / 351 | time 13[s] | loss 0.77
| epoch 12 |  iter 241 / 351 | time 14[s] | loss 0.78
| epoch 12 |  iter 261 / 351 | time 15[s] | loss 0.78
| epoch 12 |  iter 281 / 351 | time 16[s] | loss 0.78
| epoch 12 |  iter 301 / 351 | time 17[s] | loss 0.77
| epoch 12 |  iter 321 / 351 | time 19[s] | loss 0.78
| epoch 12 |  iter 341 / 351 | time 20[s] | loss 0.77
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☒ 1129
---
Q 582+84 
T 666 
☒ 669 
---
Q 8+155  
T 163 
☒ 166 
---
Q 367+55 
T 422 
☒ 423 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1039
---
Q 830+597
T 1427
☒ 1421
---
Q 26+838 
T 864 
☒ 867 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 12.200%
| epoch 13 |  iter 1 / 351 | time 0[s] | loss 0.75
| epoch 13 |  iter 21 / 351 | time 1[s] | loss 0.77
| epoch 13 |  iter 41 / 351 | time 2[s] | loss 0.75
| epoch 13 |  iter 61 / 351 | time 3[s] | loss 0.76
| epoch 13 |  iter 81 / 351 | time 4[s] | loss 0.76
| epoch 13 |  iter 101 / 351 | time 5[s] | loss 0.76
| epoch 13 |  iter 121 / 351 | time 7[s] | loss 0.79
| epoch 13 |  iter 141 / 351 | time 8[s] | loss 0.76
| epoch 13 |  iter 161 / 351 | time 9[s] | loss 0.75
| epoch 13 |  iter 181 / 351 | time 10[s] | loss 0.80
| epoch 13 |  iter 201 / 351 | time 11[s] | loss 0.76
| epoch 13 |  iter 221 / 351 | time 13[s] | loss 0.76
| epoch 13 |  iter 241 / 351 | time 14[s] | loss 0.75
| epoch 13 |  iter 261 / 351 | time 15[s] | loss 0.77
| epoch 13 |  iter 281 / 351 | time 16[s] | loss 0.75
| epoch 13 |  iter 301 / 351 | time 17[s] | loss 0.74
| epoch 13 |  iter 321 / 351 | time 18[s] | loss 0.74
| epoch 13 |  iter 341 / 351 | time 20[s] | loss 0.73
Q 77+85  
T 162 
☒ 160 
---
Q 975+164
T 1139
☑ 1139
---
Q 582+84 
T 666 
☒ 664 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 851 
---
Q 761+292
T 1053
☒ 1063
---
Q 830+597
T 1427
☒ 1421
---
Q 26+838 
T 864 
☒ 861 
---
Q 143+93 
T 236 
☒ 239 
---
val acc 12.460%
| epoch 14 |  iter 1 / 351 | time 0[s] | loss 0.74
| epoch 14 |  iter 21 / 351 | time 1[s] | loss 0.73
| epoch 14 |  iter 41 / 351 | time 2[s] | loss 0.73
| epoch 14 |  iter 61 / 351 | time 3[s] | loss 0.72
| epoch 14 |  iter 81 / 351 | time 4[s] | loss 0.73
| epoch 14 |  iter 101 / 351 | time 5[s] | loss 0.74
| epoch 14 |  iter 121 / 351 | time 7[s] | loss 0.74
| epoch 14 |  iter 141 / 351 | time 8[s] | loss 0.72
| epoch 14 |  iter 161 / 351 | time 9[s] | loss 0.72
| epoch 14 |  iter 181 / 351 | time 10[s] | loss 0.71
| epoch 14 |  iter 201 / 351 | time 11[s] | loss 0.71
| epoch 14 |  iter 221 / 351 | time 13[s] | loss 0.73
| epoch 14 |  iter 241 / 351 | time 14[s] | loss 0.73
| epoch 14 |  iter 261 / 351 | time 15[s] | loss 0.72
| epoch 14 |  iter 281 / 351 | time 16[s] | loss 0.71
| epoch 14 |  iter 301 / 351 | time 17[s] | loss 0.71
| epoch 14 |  iter 321 / 351 | time 18[s] | loss 0.71
| epoch 14 |  iter 341 / 351 | time 20[s] | loss 0.70
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1179
---
Q 582+84 
T 666 
☒ 658 
---
Q 8+155  
T 163 
☒ 166 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1065
---
Q 830+597
T 1427
☒ 1418
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 233 
---
val acc 10.060%
| epoch 15 |  iter 1 / 351 | time 0[s] | loss 0.73
| epoch 15 |  iter 21 / 351 | time 1[s] | loss 0.70
| epoch 15 |  iter 41 / 351 | time 2[s] | loss 0.71
| epoch 15 |  iter 61 / 351 | time 3[s] | loss 0.69
| epoch 15 |  iter 81 / 351 | time 4[s] | loss 0.70
| epoch 15 |  iter 101 / 351 | time 5[s] | loss 0.69
| epoch 15 |  iter 121 / 351 | time 7[s] | loss 0.69
| epoch 15 |  iter 141 / 351 | time 8[s] | loss 0.70
| epoch 15 |  iter 161 / 351 | time 9[s] | loss 0.71
| epoch 15 |  iter 181 / 351 | time 10[s] | loss 0.73
| epoch 15 |  iter 201 / 351 | time 11[s] | loss 0.73
| epoch 15 |  iter 221 / 351 | time 13[s] | loss 0.72
| epoch 15 |  iter 241 / 351 | time 14[s] | loss 0.71
| epoch 15 |  iter 261 / 351 | time 15[s] | loss 0.69
| epoch 15 |  iter 281 / 351 | time 16[s] | loss 0.71
| epoch 15 |  iter 301 / 351 | time 17[s] | loss 0.68
| epoch 15 |  iter 321 / 351 | time 18[s] | loss 0.69
| epoch 15 |  iter 341 / 351 | time 20[s] | loss 0.69
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1130
---
Q 582+84 
T 666 
☒ 668 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 423 
---
Q 600+257
T 857 
☒ 851 
---
Q 761+292
T 1053
☒ 1062
---
Q 830+597
T 1427
☒ 1444
---
Q 26+838 
T 864 
☒ 861 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 14.080%
| epoch 16 |  iter 1 / 351 | time 0[s] | loss 0.67
| epoch 16 |  iter 21 / 351 | time 1[s] | loss 0.68
| epoch 16 |  iter 41 / 351 | time 2[s] | loss 0.68
| epoch 16 |  iter 61 / 351 | time 3[s] | loss 0.67
| epoch 16 |  iter 81 / 351 | time 4[s] | loss 0.67
| epoch 16 |  iter 101 / 351 | time 6[s] | loss 0.68
| epoch 16 |  iter 121 / 351 | time 7[s] | loss 0.68
| epoch 16 |  iter 141 / 351 | time 8[s] | loss 0.67
| epoch 16 |  iter 161 / 351 | time 9[s] | loss 0.66
| epoch 16 |  iter 181 / 351 | time 10[s] | loss 0.67
| epoch 16 |  iter 201 / 351 | time 11[s] | loss 0.66
| epoch 16 |  iter 221 / 351 | time 13[s] | loss 0.66
| epoch 16 |  iter 241 / 351 | time 14[s] | loss 0.65
| epoch 16 |  iter 261 / 351 | time 15[s] | loss 0.66
| epoch 16 |  iter 281 / 351 | time 16[s] | loss 0.67
| epoch 16 |  iter 301 / 351 | time 17[s] | loss 0.65
| epoch 16 |  iter 321 / 351 | time 19[s] | loss 0.65
| epoch 16 |  iter 341 / 351 | time 20[s] | loss 0.64
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1129
---
Q 582+84 
T 666 
☒ 669 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 850 
---
Q 761+292
T 1053
☒ 1044
---
Q 830+597
T 1427
☒ 1418
---
Q 26+838 
T 864 
☒ 861 
---
Q 143+93 
T 236 
☒ 237 
---
val acc 15.680%
| epoch 17 |  iter 1 / 351 | time 0[s] | loss 0.65
| epoch 17 |  iter 21 / 351 | time 1[s] | loss 0.64
| epoch 17 |  iter 41 / 351 | time 2[s] | loss 0.70
| epoch 17 |  iter 61 / 351 | time 3[s] | loss 0.69
| epoch 17 |  iter 81 / 351 | time 4[s] | loss 0.64
| epoch 17 |  iter 101 / 351 | time 5[s] | loss 0.63
| epoch 17 |  iter 121 / 351 | time 7[s] | loss 0.64
| epoch 17 |  iter 141 / 351 | time 8[s] | loss 0.64
| epoch 17 |  iter 161 / 351 | time 9[s] | loss 0.65
| epoch 17 |  iter 181 / 351 | time 10[s] | loss 0.65
| epoch 17 |  iter 201 / 351 | time 11[s] | loss 0.64
| epoch 17 |  iter 221 / 351 | time 13[s] | loss 0.64
| epoch 17 |  iter 241 / 351 | time 14[s] | loss 0.65
| epoch 17 |  iter 261 / 351 | time 15[s] | loss 0.65
| epoch 17 |  iter 281 / 351 | time 16[s] | loss 0.64
| epoch 17 |  iter 301 / 351 | time 17[s] | loss 0.64
| epoch 17 |  iter 321 / 351 | time 19[s] | loss 0.64
| epoch 17 |  iter 341 / 351 | time 20[s] | loss 0.66
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1138
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☒ 166 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1044
---
Q 830+597
T 1427
☒ 1424
---
Q 26+838 
T 864 
☒ 861 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 16.240%
| epoch 18 |  iter 1 / 351 | time 0[s] | loss 0.62
| epoch 18 |  iter 21 / 351 | time 1[s] | loss 0.62
| epoch 18 |  iter 41 / 351 | time 2[s] | loss 0.62
| epoch 18 |  iter 61 / 351 | time 3[s] | loss 0.62
| epoch 18 |  iter 81 / 351 | time 4[s] | loss 0.64
| epoch 18 |  iter 101 / 351 | time 6[s] | loss 0.64
| epoch 18 |  iter 121 / 351 | time 7[s] | loss 0.61
| epoch 18 |  iter 141 / 351 | time 8[s] | loss 0.65
| epoch 18 |  iter 161 / 351 | time 9[s] | loss 0.65
| epoch 18 |  iter 181 / 351 | time 10[s] | loss 0.62
| epoch 18 |  iter 201 / 351 | time 12[s] | loss 0.61
| epoch 18 |  iter 221 / 351 | time 13[s] | loss 0.61
| epoch 18 |  iter 241 / 351 | time 14[s] | loss 0.64
| epoch 18 |  iter 261 / 351 | time 15[s] | loss 0.61
| epoch 18 |  iter 281 / 351 | time 16[s] | loss 0.61
| epoch 18 |  iter 301 / 351 | time 17[s] | loss 0.61
| epoch 18 |  iter 321 / 351 | time 19[s] | loss 0.61
| epoch 18 |  iter 341 / 351 | time 20[s] | loss 0.61
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1143
---
Q 582+84 
T 666 
☒ 661 
---
Q 8+155  
T 163 
☒ 162 
---
Q 367+55 
T 422 
☑ 422 
---
Q 600+257
T 857 
☒ 851 
---
Q 761+292
T 1053
☒ 1049
---
Q 830+597
T 1427
☒ 1424
---
Q 26+838 
T 864 
☒ 867 
---
Q 143+93 
T 236 
☒ 239 
---
val acc 16.620%
| epoch 19 |  iter 1 / 351 | time 0[s] | loss 0.62
| epoch 19 |  iter 21 / 351 | time 1[s] | loss 0.63
| epoch 19 |  iter 41 / 351 | time 2[s] | loss 0.61
| epoch 19 |  iter 61 / 351 | time 3[s] | loss 0.59
| epoch 19 |  iter 81 / 351 | time 4[s] | loss 0.60
| epoch 19 |  iter 101 / 351 | time 5[s] | loss 0.61
| epoch 19 |  iter 121 / 351 | time 7[s] | loss 0.60
| epoch 19 |  iter 141 / 351 | time 8[s] | loss 0.59
| epoch 19 |  iter 161 / 351 | time 9[s] | loss 0.60
| epoch 19 |  iter 181 / 351 | time 10[s] | loss 0.59
| epoch 19 |  iter 201 / 351 | time 11[s] | loss 0.58
| epoch 19 |  iter 221 / 351 | time 13[s] | loss 0.60
| epoch 19 |  iter 241 / 351 | time 14[s] | loss 0.58
| epoch 19 |  iter 261 / 351 | time 15[s] | loss 0.59
| epoch 19 |  iter 281 / 351 | time 16[s] | loss 0.59
| epoch 19 |  iter 301 / 351 | time 17[s] | loss 0.59
| epoch 19 |  iter 321 / 351 | time 19[s] | loss 0.59
| epoch 19 |  iter 341 / 351 | time 20[s] | loss 0.59
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1129
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 423 
---
Q 600+257
T 857 
☒ 852 
---
Q 761+292
T 1053
☑ 1053
---
Q 830+597
T 1427
☒ 1421
---
Q 26+838 
T 864 
☒ 867 
---
Q 143+93 
T 236 
☒ 235 
---
val acc 19.760%
| epoch 20 |  iter 1 / 351 | time 0[s] | loss 0.56
| epoch 20 |  iter 21 / 351 | time 1[s] | loss 0.58
| epoch 20 |  iter 41 / 351 | time 2[s] | loss 0.59
| epoch 20 |  iter 61 / 351 | time 3[s] | loss 0.58
| epoch 20 |  iter 81 / 351 | time 5[s] | loss 0.59
| epoch 20 |  iter 101 / 351 | time 6[s] | loss 0.57
| epoch 20 |  iter 121 / 351 | time 7[s] | loss 0.58
| epoch 20 |  iter 141 / 351 | time 8[s] | loss 0.60
| epoch 20 |  iter 161 / 351 | time 9[s] | loss 0.63
| epoch 20 |  iter 181 / 351 | time 10[s] | loss 0.58
| epoch 20 |  iter 201 / 351 | time 12[s] | loss 0.59
| epoch 20 |  iter 221 / 351 | time 13[s] | loss 0.58
| epoch 20 |  iter 241 / 351 | time 14[s] | loss 0.62
| epoch 20 |  iter 261 / 351 | time 15[s] | loss 0.62
| epoch 20 |  iter 281 / 351 | time 16[s] | loss 0.61
| epoch 20 |  iter 301 / 351 | time 18[s] | loss 0.60
| epoch 20 |  iter 321 / 351 | time 19[s] | loss 0.61
| epoch 20 |  iter 341 / 351 | time 20[s] | loss 0.56
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1141
---
Q 582+84 
T 666 
☒ 665 
---
Q 8+155  
T 163 
☒ 164 
---
Q 367+55 
T 422 
☑ 422 
---
Q 600+257
T 857 
☒ 852 
---
Q 761+292
T 1053
☑ 1053
---
Q 830+597
T 1427
☒ 1424
---
Q 26+838 
T 864 
☒ 862 
---
Q 143+93 
T 236 
☒ 235 
---
val acc 22.020%
| epoch 21 |  iter 1 / 351 | time 0[s] | loss 0.55
| epoch 21 |  iter 21 / 351 | time 1[s] | loss 0.56
| epoch 21 |  iter 41 / 351 | time 2[s] | loss 0.56
| epoch 21 |  iter 61 / 351 | time 3[s] | loss 0.57
| epoch 21 |  iter 81 / 351 | time 4[s] | loss 0.56
| epoch 21 |  iter 101 / 351 | time 5[s] | loss 0.56
| epoch 21 |  iter 121 / 351 | time 7[s] | loss 0.58
| epoch 21 |  iter 141 / 351 | time 8[s] | loss 0.58
| epoch 21 |  iter 161 / 351 | time 9[s] | loss 0.55
| epoch 21 |  iter 181 / 351 | time 10[s] | loss 0.57
| epoch 21 |  iter 201 / 351 | time 11[s] | loss 0.56
| epoch 21 |  iter 221 / 351 | time 13[s] | loss 0.56
| epoch 21 |  iter 241 / 351 | time 14[s] | loss 0.55
| epoch 21 |  iter 261 / 351 | time 15[s] | loss 0.56
| epoch 21 |  iter 281 / 351 | time 16[s] | loss 0.58
| epoch 21 |  iter 301 / 351 | time 17[s] | loss 0.56
| epoch 21 |  iter 321 / 351 | time 18[s] | loss 0.55
| epoch 21 |  iter 341 / 351 | time 20[s] | loss 0.57
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1144
---
Q 582+84 
T 666 
☒ 667 
---
Q 8+155  
T 163 
☒ 165 
---
Q 367+55 
T 422 
☒ 423 
---
Q 600+257
T 857 
☒ 850 
---
Q 761+292
T 1053
☒ 1055
---
Q 830+597
T 1427
☑ 1427
---
Q 26+838 
T 864 
☒ 867 
---
Q 143+93 
T 236 
☒ 237 
---
val acc 14.560%
| epoch 22 |  iter 1 / 351 | time 0[s] | loss 0.61
| epoch 22 |  iter 21 / 351 | time 1[s] | loss 0.58
| epoch 22 |  iter 41 / 351 | time 2[s] | loss 0.57
| epoch 22 |  iter 61 / 351 | time 3[s] | loss 0.58
| epoch 22 |  iter 81 / 351 | time 4[s] | loss 0.57
| epoch 22 |  iter 101 / 351 | time 5[s] | loss 0.58
| epoch 22 |  iter 121 / 351 | time 7[s] | loss 0.56
| epoch 22 |  iter 141 / 351 | time 8[s] | loss 0.54
| epoch 22 |  iter 161 / 351 | time 9[s] | loss 0.55
| epoch 22 |  iter 181 / 351 | time 10[s] | loss 0.55
| epoch 22 |  iter 201 / 351 | time 11[s] | loss 0.54
| epoch 22 |  iter 221 / 351 | time 12[s] | loss 0.53
| epoch 22 |  iter 241 / 351 | time 14[s] | loss 0.53
| epoch 22 |  iter 261 / 351 | time 15[s] | loss 0.54
| epoch 22 |  iter 281 / 351 | time 16[s] | loss 0.54
| epoch 22 |  iter 301 / 351 | time 17[s] | loss 0.54
| epoch 22 |  iter 321 / 351 | time 18[s] | loss 0.54
| epoch 22 |  iter 341 / 351 | time 20[s] | loss 0.54
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1141
---
Q 582+84 
T 666 
☒ 665 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 421 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1050
---
Q 830+597
T 1427
☒ 1424
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 235 
---
val acc 24.840%
| epoch 23 |  iter 1 / 351 | time 0[s] | loss 0.52
| epoch 23 |  iter 21 / 351 | time 1[s] | loss 0.53
| epoch 23 |  iter 41 / 351 | time 2[s] | loss 0.53
| epoch 23 |  iter 61 / 351 | time 3[s] | loss 0.57
| epoch 23 |  iter 81 / 351 | time 4[s] | loss 0.57
| epoch 23 |  iter 101 / 351 | time 5[s] | loss 0.56
| epoch 23 |  iter 121 / 351 | time 7[s] | loss 0.51
| epoch 23 |  iter 141 / 351 | time 8[s] | loss 0.53
| epoch 23 |  iter 161 / 351 | time 9[s] | loss 0.54
| epoch 23 |  iter 181 / 351 | time 10[s] | loss 0.54
| epoch 23 |  iter 201 / 351 | time 11[s] | loss 0.53
| epoch 23 |  iter 221 / 351 | time 13[s] | loss 0.52
| epoch 23 |  iter 241 / 351 | time 14[s] | loss 0.53
| epoch 23 |  iter 261 / 351 | time 15[s] | loss 0.55
| epoch 23 |  iter 281 / 351 | time 16[s] | loss 0.53
| epoch 23 |  iter 301 / 351 | time 17[s] | loss 0.52
| epoch 23 |  iter 321 / 351 | time 18[s] | loss 0.52
| epoch 23 |  iter 341 / 351 | time 20[s] | loss 0.52
Q 77+85  
T 162 
☑ 162 
---
Q 975+164
T 1139
☒ 1143
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 420 
---
Q 600+257
T 857 
☑ 857 
---
Q 761+292
T 1053
☒ 1055
---
Q 830+597
T 1427
☒ 1424
---
Q 26+838 
T 864 
☒ 862 
---
Q 143+93 
T 236 
☒ 233 
---
val acc 25.740%
| epoch 24 |  iter 1 / 351 | time 0[s] | loss 0.50
| epoch 24 |  iter 21 / 351 | time 1[s] | loss 0.51
| epoch 24 |  iter 41 / 351 | time 2[s] | loss 0.54
| epoch 24 |  iter 61 / 351 | time 3[s] | loss 0.50
| epoch 24 |  iter 81 / 351 | time 4[s] | loss 0.51
| epoch 24 |  iter 101 / 351 | time 5[s] | loss 0.52
| epoch 24 |  iter 121 / 351 | time 7[s] | loss 0.53
| epoch 24 |  iter 141 / 351 | time 8[s] | loss 0.51
| epoch 24 |  iter 161 / 351 | time 9[s] | loss 0.55
| epoch 24 |  iter 181 / 351 | time 10[s] | loss 0.52
| epoch 24 |  iter 201 / 351 | time 12[s] | loss 0.51
| epoch 24 |  iter 221 / 351 | time 13[s] | loss 0.51
| epoch 24 |  iter 241 / 351 | time 14[s] | loss 0.52
| epoch 24 |  iter 261 / 351 | time 15[s] | loss 0.52
| epoch 24 |  iter 281 / 351 | time 17[s] | loss 0.52
| epoch 24 |  iter 301 / 351 | time 18[s] | loss 0.51
| epoch 24 |  iter 321 / 351 | time 19[s] | loss 0.51
| epoch 24 |  iter 341 / 351 | time 20[s] | loss 0.50
Q 77+85  
T 162 
☒ 165 
---
Q 975+164
T 1139
☒ 1140
---
Q 582+84 
T 666 
☒ 669 
---
Q 8+155  
T 163 
☑ 163 
---
Q 367+55 
T 422 
☒ 423 
---
Q 600+257
T 857 
☑ 857 
---
Q 761+292
T 1053
☒ 1055
---
Q 830+597
T 1427
☑ 1427
---
Q 26+838 
T 864 
☒ 865 
---
Q 143+93 
T 236 
☒ 235 
---
val acc 25.760%
| epoch 25 |  iter 1 / 351 | time 0[s] | loss 0.49
| epoch 25 |  iter 21 / 351 | time 1[s] | loss 0.48
| epoch 25 |  iter 41 / 351 | time 2[s] | loss 0.49
| epoch 25 |  iter 61 / 351 | time 3[s] | loss 0.49
| epoch 25 |  iter 81 / 351 | time 4[s] | loss 0.49
| epoch 25 |  iter 101 / 351 | time 6[s] | loss 0.49
| epoch 25 |  iter 121 / 351 | time 7[s] | loss 0.50
| epoch 25 |  iter 141 / 351 | time 8[s] | loss 0.52
| epoch 25 |  iter 161 / 351 | time 9[s] | loss 0.49
| epoch 25 |  iter 181 / 351 | time 10[s] | loss 0.49
| epoch 25 |  iter 201 / 351 | time 11[s] | loss 0.50
| epoch 25 |  iter 221 / 351 | time 13[s] | loss 0.52
| epoch 25 |  iter 241 / 351 | time 14[s] | loss 0.55
| epoch 25 |  iter 261 / 351 | time 15[s] | loss 0.53
| epoch 25 |  iter 281 / 351 | time 16[s] | loss 0.53
| epoch 25 |  iter 301 / 351 | time 17[s] | loss 0.53
| epoch 25 |  iter 321 / 351 | time 18[s] | loss 0.53
| epoch 25 |  iter 341 / 351 | time 20[s] | loss 0.53
Q 77+85  
T 162 
☒ 161 
---
Q 975+164
T 1139
☒ 1141
---
Q 582+84 
T 666 
☑ 666 
---
Q 8+155  
T 163 
☒ 164 
---
Q 367+55 
T 422 
☑ 422 
---
Q 600+257
T 857 
☒ 859 
---
Q 761+292
T 1053
☒ 1055
---
Q 830+597
T 1427
☒ 1425
---
Q 26+838 
T 864 
☒ 862 
---
Q 143+93 
T 236 
☒ 238 
---
val acc 28.500%

  • 最初は100とか1200とかしか答えられないのにepochが進むにつれて表現力があがって正解できるようになる(かわいい)
  • そしてやはり最後のひと桁が合わない

%python3
plt.ylim(0, 1)
plt.plot(acc_list)
[<matplotlib.lines.Line2D object at 0x7fb61fa29c18>]

%python3
acc_list_baseline = acc_list

改善: 入力を反転させる

入力を反転させるだけで精度が上がるらしい

%python3
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

model = Seq2Seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list_reversed = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1,
                batch_size=batch_size, max_grad=max_grad)

    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)
    
    acc = float(correct_num) / len(x_test)
    acc_list_reversed.append(acc)
    print('val acc %.3f%%' % (acc * 100))
| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.56
| epoch 1 |  iter 21 / 351 | time 1[s] | loss 2.43
| epoch 1 |  iter 41 / 351 | time 2[s] | loss 2.07
| epoch 1 |  iter 61 / 351 | time 3[s] | loss 1.94
| epoch 1 |  iter 81 / 351 | time 4[s] | loss 1.87
| epoch 1 |  iter 101 / 351 | time 5[s] | loss 1.81
| epoch 1 |  iter 121 / 351 | time 6[s] | loss 1.79
| epoch 1 |  iter 141 / 351 | time 8[s] | loss 1.77
| epoch 1 |  iter 161 / 351 | time 9[s] | loss 1.76
| epoch 1 |  iter 181 / 351 | time 10[s] | loss 1.75
| epoch 1 |  iter 201 / 351 | time 11[s] | loss 1.75
| epoch 1 |  iter 221 / 351 | time 12[s] | loss 1.74
| epoch 1 |  iter 241 / 351 | time 13[s] | loss 1.73
| epoch 1 |  iter 261 / 351 | time 14[s] | loss 1.72
| epoch 1 |  iter 281 / 351 | time 16[s] | loss 1.71
| epoch 1 |  iter 301 / 351 | time 17[s] | loss 1.71
| epoch 1 |  iter 321 / 351 | time 18[s] | loss 1.71
| epoch 1 |  iter 341 / 351 | time 19[s] | loss 1.70
Q   58+77
T 162 
☒ 100 
---
Q 461+579
T 1139
☒ 1000
---
Q  48+285
T 666 
☒ 700 
---
Q   551+8
T 163 
☒ 101 
---
Q  55+763
T 422 
☒ 700 
---
Q 752+006
T 857 
☒ 1000
---
Q 292+167
T 1053
☒ 1000
---
Q 795+038
T 1427
☒ 1101
---
Q  838+62
T 864 
☒ 710 
---
Q  39+341
T 236 
☒ 211 
---
val acc 0.360%
| epoch 2 |  iter 1 / 351 | time 0[s] | loss 1.69
| epoch 2 |  iter 21 / 351 | time 1[s] | loss 1.67
| epoch 2 |  iter 41 / 351 | time 2[s] | loss 1.68
| epoch 2 |  iter 61 / 351 | time 3[s] | loss 1.66
| epoch 2 |  iter 81 / 351 | time 4[s] | loss 1.65
| epoch 2 |  iter 101 / 351 | time 5[s] | loss 1.64
| epoch 2 |  iter 121 / 351 | time 6[s] | loss 1.63
| epoch 2 |  iter 141 / 351 | time 8[s] | loss 1.62
| epoch 2 |  iter 161 / 351 | time 9[s] | loss 1.61
| epoch 2 |  iter 181 / 351 | time 10[s] | loss 1.60
| epoch 2 |  iter 201 / 351 | time 11[s] | loss 1.59
| epoch 2 |  iter 221 / 351 | time 12[s] | loss 1.58
| epoch 2 |  iter 241 / 351 | time 13[s] | loss 1.56
| epoch 2 |  iter 261 / 351 | time 14[s] | loss 1.55
| epoch 2 |  iter 281 / 351 | time 16[s] | loss 1.54
| epoch 2 |  iter 301 / 351 | time 17[s] | loss 1.52
| epoch 2 |  iter 321 / 351 | time 18[s] | loss 1.50
| epoch 2 |  iter 341 / 351 | time 19[s] | loss 1.48
Q   58+77
T 162 
☒ 145 
---
Q 461+579
T 1139
☒ 1004
---
Q  48+285
T 666 
☒ 544 
---
Q   551+8
T 163 
☒ 124 
---
Q  55+763
T 422 
☒ 300 
---
Q 752+006
T 857 
☒ 800 
---
Q 292+167
T 1053
☒ 1000
---
Q 795+038
T 1427
☒ 1574
---
Q  838+62
T 864 
☒ 700 
---
Q  39+341
T 236 
☒ 300 
---
val acc 0.660%
| epoch 3 |  iter 1 / 351 | time 0[s] | loss 1.45
| epoch 3 |  iter 21 / 351 | time 1[s] | loss 1.45
| epoch 3 |  iter 41 / 351 | time 2[s] | loss 1.43
| epoch 3 |  iter 61 / 351 | time 3[s] | loss 1.41
| epoch 3 |  iter 81 / 351 | time 4[s] | loss 1.39
| epoch 3 |  iter 101 / 351 | time 5[s] | loss 1.38
| epoch 3 |  iter 121 / 351 | time 7[s] | loss 1.36
| epoch 3 |  iter 141 / 351 | time 8[s] | loss 1.35
| epoch 3 |  iter 161 / 351 | time 9[s] | loss 1.33
| epoch 3 |  iter 181 / 351 | time 10[s] | loss 1.31
| epoch 3 |  iter 201 / 351 | time 11[s] | loss 1.30
| epoch 3 |  iter 221 / 351 | time 12[s] | loss 1.28
| epoch 3 |  iter 241 / 351 | time 14[s] | loss 1.27
| epoch 3 |  iter 261 / 351 | time 15[s] | loss 1.26
| epoch 3 |  iter 281 / 351 | time 16[s] | loss 1.23
| epoch 3 |  iter 301 / 351 | time 17[s] | loss 1.23
| epoch 3 |  iter 321 / 351 | time 18[s] | loss 1.21
| epoch 3 |  iter 341 / 351 | time 20[s] | loss 1.20
Q   58+77
T 162 
☒ 158 
---
Q 461+579
T 1139
☒ 1148
---
Q  48+285
T 666 
☒ 664 
---
Q   551+8
T 163 
☒ 164 
---
Q  55+763
T 422 
☒ 408 
---
Q 752+006
T 857 
☒ 878 
---
Q 292+167
T 1053
☒ 1024
---
Q 795+038
T 1427
☒ 1448
---
Q  838+62
T 864 
☒ 875 
---
Q  39+341
T 236 
☒ 238 
---
val acc 3.300%
| epoch 4 |  iter 1 / 351 | time 0[s] | loss 1.20
| epoch 4 |  iter 21 / 351 | time 1[s] | loss 1.17
| epoch 4 |  iter 41 / 351 | time 2[s] | loss 1.14
| epoch 4 |  iter 61 / 351 | time 3[s] | loss 1.13
| epoch 4 |  iter 81 / 351 | time 4[s] | loss 1.12
| epoch 4 |  iter 101 / 351 | time 5[s] | loss 1.10
| epoch 4 |  iter 121 / 351 | time 7[s] | loss 1.08
| epoch 4 |  iter 141 / 351 | time 8[s] | loss 1.07
| epoch 4 |  iter 161 / 351 | time 9[s] | loss 1.05
| epoch 4 |  iter 181 / 351 | time 10[s] | loss 1.04
| epoch 4 |  iter 201 / 351 | time 11[s] | loss 1.02
| epoch 4 |  iter 221 / 351 | time 12[s] | loss 1.01
| epoch 4 |  iter 241 / 351 | time 14[s] | loss 0.98
| epoch 4 |  iter 261 / 351 | time 15[s] | loss 0.98
| epoch 4 |  iter 281 / 351 | time 16[s] | loss 0.97
| epoch 4 |  iter 301 / 351 | time 17[s] | loss 0.95
| epoch 4 |  iter 321 / 351 | time 18[s] | loss 0.94
| epoch 4 |  iter 341 / 351 | time 20[s] | loss 0.93
Q   58+77
T 162 
☒ 158 
---
Q 461+579
T 1139
☒ 1222
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 156 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 862 
---
Q 292+167
T 1053
☒ 1153
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 862 
---
Q  39+341
T 236 
☒ 238 
---
val acc 7.860%
| epoch 5 |  iter 1 / 351 | time 0[s] | loss 0.93
| epoch 5 |  iter 21 / 351 | time 1[s] | loss 0.90
| epoch 5 |  iter 41 / 351 | time 2[s] | loss 0.90
| epoch 5 |  iter 61 / 351 | time 3[s] | loss 0.88
| epoch 5 |  iter 81 / 351 | time 4[s] | loss 0.88
| epoch 5 |  iter 101 / 351 | time 5[s] | loss 0.86
| epoch 5 |  iter 121 / 351 | time 7[s] | loss 0.87
| epoch 5 |  iter 141 / 351 | time 8[s] | loss 0.86
| epoch 5 |  iter 161 / 351 | time 9[s] | loss 0.84
| epoch 5 |  iter 181 / 351 | time 10[s] | loss 0.84
| epoch 5 |  iter 201 / 351 | time 11[s] | loss 0.83
| epoch 5 |  iter 221 / 351 | time 12[s] | loss 0.82
| epoch 5 |  iter 241 / 351 | time 14[s] | loss 0.81
| epoch 5 |  iter 261 / 351 | time 15[s] | loss 0.80
| epoch 5 |  iter 281 / 351 | time 16[s] | loss 0.80
| epoch 5 |  iter 301 / 351 | time 17[s] | loss 0.79
| epoch 5 |  iter 321 / 351 | time 18[s] | loss 0.78
| epoch 5 |  iter 341 / 351 | time 19[s] | loss 0.78
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1134
---
Q  48+285
T 666 
☒ 662 
---
Q   551+8
T 163 
☒ 156 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 855 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☒ 1425
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☒ 231 
---
val acc 12.480%
| epoch 6 |  iter 1 / 351 | time 0[s] | loss 0.78
| epoch 6 |  iter 21 / 351 | time 1[s] | loss 0.75
| epoch 6 |  iter 41 / 351 | time 2[s] | loss 0.75
| epoch 6 |  iter 61 / 351 | time 3[s] | loss 0.75
| epoch 6 |  iter 81 / 351 | time 4[s] | loss 0.74
| epoch 6 |  iter 101 / 351 | time 5[s] | loss 0.74
| epoch 6 |  iter 121 / 351 | time 7[s] | loss 0.73
| epoch 6 |  iter 141 / 351 | time 8[s] | loss 0.73
| epoch 6 |  iter 161 / 351 | time 9[s] | loss 0.72
| epoch 6 |  iter 181 / 351 | time 10[s] | loss 0.72
| epoch 6 |  iter 201 / 351 | time 11[s] | loss 0.72
| epoch 6 |  iter 221 / 351 | time 12[s] | loss 0.72
| epoch 6 |  iter 241 / 351 | time 14[s] | loss 0.71
| epoch 6 |  iter 261 / 351 | time 15[s] | loss 0.70
| epoch 6 |  iter 281 / 351 | time 16[s] | loss 0.69
| epoch 6 |  iter 301 / 351 | time 17[s] | loss 0.69
| epoch 6 |  iter 321 / 351 | time 18[s] | loss 0.68
| epoch 6 |  iter 341 / 351 | time 20[s] | loss 0.68
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1137
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 855 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☒ 1426
---
Q  838+62
T 864 
☒ 861 
---
Q  39+341
T 236 
☒ 239 
---
val acc 16.960%
| epoch 7 |  iter 1 / 351 | time 0[s] | loss 0.65
| epoch 7 |  iter 21 / 351 | time 1[s] | loss 0.66
| epoch 7 |  iter 41 / 351 | time 2[s] | loss 0.66
| epoch 7 |  iter 61 / 351 | time 3[s] | loss 0.65
| epoch 7 |  iter 81 / 351 | time 4[s] | loss 0.64
| epoch 7 |  iter 101 / 351 | time 5[s] | loss 0.65
| epoch 7 |  iter 121 / 351 | time 7[s] | loss 0.65
| epoch 7 |  iter 141 / 351 | time 8[s] | loss 0.64
| epoch 7 |  iter 161 / 351 | time 9[s] | loss 0.63
| epoch 7 |  iter 181 / 351 | time 10[s] | loss 0.63
| epoch 7 |  iter 201 / 351 | time 11[s] | loss 0.63
| epoch 7 |  iter 221 / 351 | time 13[s] | loss 0.63
| epoch 7 |  iter 241 / 351 | time 14[s] | loss 0.63
| epoch 7 |  iter 261 / 351 | time 15[s] | loss 0.62
| epoch 7 |  iter 281 / 351 | time 16[s] | loss 0.62
| epoch 7 |  iter 301 / 351 | time 17[s] | loss 0.61
| epoch 7 |  iter 321 / 351 | time 18[s] | loss 0.61
| epoch 7 |  iter 341 / 351 | time 20[s] | loss 0.61
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1140
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 420 
---
Q 752+006
T 857 
☒ 855 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 863 
---
Q  39+341
T 236 
☒ 239 
---
val acc 16.400%
| epoch 8 |  iter 1 / 351 | time 0[s] | loss 0.62
| epoch 8 |  iter 21 / 351 | time 1[s] | loss 0.60
| epoch 8 |  iter 41 / 351 | time 2[s] | loss 0.59
| epoch 8 |  iter 61 / 351 | time 3[s] | loss 0.59
| epoch 8 |  iter 81 / 351 | time 4[s] | loss 0.59
| epoch 8 |  iter 101 / 351 | time 6[s] | loss 0.58
| epoch 8 |  iter 121 / 351 | time 7[s] | loss 0.58
| epoch 8 |  iter 141 / 351 | time 8[s] | loss 0.58
| epoch 8 |  iter 161 / 351 | time 9[s] | loss 0.58
| epoch 8 |  iter 181 / 351 | time 10[s] | loss 0.58
| epoch 8 |  iter 201 / 351 | time 11[s] | loss 0.57
| epoch 8 |  iter 221 / 351 | time 13[s] | loss 0.57
| epoch 8 |  iter 241 / 351 | time 14[s] | loss 0.57
| epoch 8 |  iter 261 / 351 | time 15[s] | loss 0.57
| epoch 8 |  iter 281 / 351 | time 16[s] | loss 0.57
| epoch 8 |  iter 301 / 351 | time 17[s] | loss 0.57
| epoch 8 |  iter 321 / 351 | time 19[s] | loss 0.55
| epoch 8 |  iter 341 / 351 | time 20[s] | loss 0.55
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1134
---
Q  48+285
T 666 
☒ 668 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 420 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☑ 236 
---
val acc 22.620%
| epoch 9 |  iter 1 / 351 | time 0[s] | loss 0.53
| epoch 9 |  iter 21 / 351 | time 1[s] | loss 0.54
| epoch 9 |  iter 41 / 351 | time 2[s] | loss 0.54
| epoch 9 |  iter 61 / 351 | time 3[s] | loss 0.55
| epoch 9 |  iter 81 / 351 | time 4[s] | loss 0.55
| epoch 9 |  iter 101 / 351 | time 6[s] | loss 0.54
| epoch 9 |  iter 121 / 351 | time 7[s] | loss 0.54
| epoch 9 |  iter 141 / 351 | time 8[s] | loss 0.55
| epoch 9 |  iter 161 / 351 | time 9[s] | loss 0.55
| epoch 9 |  iter 181 / 351 | time 10[s] | loss 0.54
| epoch 9 |  iter 201 / 351 | time 12[s] | loss 0.53
| epoch 9 |  iter 221 / 351 | time 13[s] | loss 0.53
| epoch 9 |  iter 241 / 351 | time 14[s] | loss 0.53
| epoch 9 |  iter 261 / 351 | time 15[s] | loss 0.53
| epoch 9 |  iter 281 / 351 | time 16[s] | loss 0.54
| epoch 9 |  iter 301 / 351 | time 17[s] | loss 0.54
| epoch 9 |  iter 321 / 351 | time 19[s] | loss 0.53
| epoch 9 |  iter 341 / 351 | time 20[s] | loss 0.52
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 856 
---
Q 292+167
T 1053
☒ 1152
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☒ 238 
---
val acc 24.120%
| epoch 10 |  iter 1 / 351 | time 0[s] | loss 0.51
| epoch 10 |  iter 21 / 351 | time 1[s] | loss 0.52
| epoch 10 |  iter 41 / 351 | time 2[s] | loss 0.52
| epoch 10 |  iter 61 / 351 | time 3[s] | loss 0.51
| epoch 10 |  iter 81 / 351 | time 4[s] | loss 0.50
| epoch 10 |  iter 101 / 351 | time 5[s] | loss 0.51
| epoch 10 |  iter 121 / 351 | time 7[s] | loss 0.51
| epoch 10 |  iter 141 / 351 | time 8[s] | loss 0.51
| epoch 10 |  iter 161 / 351 | time 9[s] | loss 0.50
| epoch 10 |  iter 181 / 351 | time 10[s] | loss 0.51
| epoch 10 |  iter 201 / 351 | time 11[s] | loss 0.51
| epoch 10 |  iter 221 / 351 | time 13[s] | loss 0.50
| epoch 10 |  iter 241 / 351 | time 14[s] | loss 0.49
| epoch 10 |  iter 261 / 351 | time 15[s] | loss 0.49
| epoch 10 |  iter 281 / 351 | time 16[s] | loss 0.50
| epoch 10 |  iter 301 / 351 | time 17[s] | loss 0.50
| epoch 10 |  iter 321 / 351 | time 18[s] | loss 0.49
| epoch 10 |  iter 341 / 351 | time 20[s] | loss 0.49
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1137
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 420 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1429
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☒ 235 
---
val acc 29.780%
| epoch 11 |  iter 1 / 351 | time 0[s] | loss 0.46
| epoch 11 |  iter 21 / 351 | time 1[s] | loss 0.47
| epoch 11 |  iter 41 / 351 | time 2[s] | loss 0.48
| epoch 11 |  iter 61 / 351 | time 3[s] | loss 0.49
| epoch 11 |  iter 81 / 351 | time 4[s] | loss 0.48
| epoch 11 |  iter 101 / 351 | time 5[s] | loss 0.48
| epoch 11 |  iter 121 / 351 | time 7[s] | loss 0.48
| epoch 11 |  iter 141 / 351 | time 8[s] | loss 0.48
| epoch 11 |  iter 161 / 351 | time 9[s] | loss 0.48
| epoch 11 |  iter 181 / 351 | time 10[s] | loss 0.48
| epoch 11 |  iter 201 / 351 | time 11[s] | loss 0.48
| epoch 11 |  iter 221 / 351 | time 13[s] | loss 0.47
| epoch 11 |  iter 241 / 351 | time 14[s] | loss 0.46
| epoch 11 |  iter 261 / 351 | time 15[s] | loss 0.47
| epoch 11 |  iter 281 / 351 | time 16[s] | loss 0.46
| epoch 11 |  iter 301 / 351 | time 17[s] | loss 0.47
| epoch 11 |  iter 321 / 351 | time 18[s] | loss 0.46
| epoch 11 |  iter 341 / 351 | time 20[s] | loss 0.46
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 420 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 866 
---
Q  39+341
T 236 
☒ 233 
---
val acc 26.080%
| epoch 12 |  iter 1 / 351 | time 0[s] | loss 0.47
| epoch 12 |  iter 21 / 351 | time 1[s] | loss 0.46
| epoch 12 |  iter 41 / 351 | time 2[s] | loss 0.46
| epoch 12 |  iter 61 / 351 | time 3[s] | loss 0.45
| epoch 12 |  iter 81 / 351 | time 4[s] | loss 0.45
| epoch 12 |  iter 101 / 351 | time 5[s] | loss 0.46
| epoch 12 |  iter 121 / 351 | time 7[s] | loss 0.45
| epoch 12 |  iter 141 / 351 | time 8[s] | loss 0.45
| epoch 12 |  iter 161 / 351 | time 9[s] | loss 0.45
| epoch 12 |  iter 181 / 351 | time 10[s] | loss 0.45
| epoch 12 |  iter 201 / 351 | time 11[s] | loss 0.45
| epoch 12 |  iter 221 / 351 | time 13[s] | loss 0.45
| epoch 12 |  iter 241 / 351 | time 14[s] | loss 0.48
| epoch 12 |  iter 261 / 351 | time 15[s] | loss 0.47
| epoch 12 |  iter 281 / 351 | time 16[s] | loss 0.45
| epoch 12 |  iter 301 / 351 | time 17[s] | loss 0.44
| epoch 12 |  iter 321 / 351 | time 18[s] | loss 0.43
| epoch 12 |  iter 341 / 351 | time 20[s] | loss 0.43
Q   58+77
T 162 
☒ 161 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 420 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☒ 235 
---
val acc 28.100%
| epoch 13 |  iter 1 / 351 | time 0[s] | loss 0.46
| epoch 13 |  iter 21 / 351 | time 1[s] | loss 0.43
| epoch 13 |  iter 41 / 351 | time 2[s] | loss 0.43
| epoch 13 |  iter 61 / 351 | time 3[s] | loss 0.43
| epoch 13 |  iter 81 / 351 | time 4[s] | loss 0.43
| epoch 13 |  iter 101 / 351 | time 5[s] | loss 0.45
| epoch 13 |  iter 121 / 351 | time 7[s] | loss 0.44
| epoch 13 |  iter 141 / 351 | time 8[s] | loss 0.44
| epoch 13 |  iter 161 / 351 | time 9[s] | loss 0.44
| epoch 13 |  iter 181 / 351 | time 10[s] | loss 0.43
| epoch 13 |  iter 201 / 351 | time 11[s] | loss 0.42
| epoch 13 |  iter 221 / 351 | time 13[s] | loss 0.42
| epoch 13 |  iter 241 / 351 | time 14[s] | loss 0.42
| epoch 13 |  iter 261 / 351 | time 15[s] | loss 0.43
| epoch 13 |  iter 281 / 351 | time 16[s] | loss 0.43
| epoch 13 |  iter 301 / 351 | time 17[s] | loss 0.43
| epoch 13 |  iter 321 / 351 | time 18[s] | loss 0.44
| epoch 13 |  iter 341 / 351 | time 20[s] | loss 0.43
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1141
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 160 
---
Q  55+763
T 422 
☒ 424 
---
Q 752+006
T 857 
☒ 859 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1429
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☒ 237 
---
val acc 33.320%
| epoch 14 |  iter 1 / 351 | time 0[s] | loss 0.44
| epoch 14 |  iter 21 / 351 | time 1[s] | loss 0.42
| epoch 14 |  iter 41 / 351 | time 2[s] | loss 0.42
| epoch 14 |  iter 61 / 351 | time 3[s] | loss 0.43
| epoch 14 |  iter 81 / 351 | time 4[s] | loss 0.43
| epoch 14 |  iter 101 / 351 | time 5[s] | loss 0.41
| epoch 14 |  iter 121 / 351 | time 7[s] | loss 0.40
| epoch 14 |  iter 141 / 351 | time 8[s] | loss 0.41
| epoch 14 |  iter 161 / 351 | time 9[s] | loss 0.41
| epoch 14 |  iter 181 / 351 | time 10[s] | loss 0.42
| epoch 14 |  iter 201 / 351 | time 11[s] | loss 0.44
| epoch 14 |  iter 221 / 351 | time 13[s] | loss 0.43
| epoch 14 |  iter 241 / 351 | time 14[s] | loss 0.42
| epoch 14 |  iter 261 / 351 | time 15[s] | loss 0.41
| epoch 14 |  iter 281 / 351 | time 16[s] | loss 0.40
| epoch 14 |  iter 301 / 351 | time 17[s] | loss 0.40
| epoch 14 |  iter 321 / 351 | time 18[s] | loss 0.40
| epoch 14 |  iter 341 / 351 | time 20[s] | loss 0.40
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 856 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☒ 1426
---
Q  838+62
T 864 
☒ 862 
---
Q  39+341
T 236 
☒ 235 
---
val acc 35.180%
| epoch 15 |  iter 1 / 351 | time 0[s] | loss 0.40
| epoch 15 |  iter 21 / 351 | time 1[s] | loss 0.40
| epoch 15 |  iter 41 / 351 | time 2[s] | loss 0.42
| epoch 15 |  iter 61 / 351 | time 3[s] | loss 0.41
| epoch 15 |  iter 81 / 351 | time 4[s] | loss 0.40
| epoch 15 |  iter 101 / 351 | time 5[s] | loss 0.40
| epoch 15 |  iter 121 / 351 | time 7[s] | loss 0.39
| epoch 15 |  iter 141 / 351 | time 8[s] | loss 0.39
| epoch 15 |  iter 161 / 351 | time 9[s] | loss 0.40
| epoch 15 |  iter 181 / 351 | time 10[s] | loss 0.41
| epoch 15 |  iter 201 / 351 | time 11[s] | loss 0.41
| epoch 15 |  iter 221 / 351 | time 13[s] | loss 0.39
| epoch 15 |  iter 241 / 351 | time 14[s] | loss 0.39
| epoch 15 |  iter 261 / 351 | time 15[s] | loss 0.40
| epoch 15 |  iter 281 / 351 | time 16[s] | loss 0.41
| epoch 15 |  iter 301 / 351 | time 17[s] | loss 0.39
| epoch 15 |  iter 321 / 351 | time 18[s] | loss 0.39
| epoch 15 |  iter 341 / 351 | time 20[s] | loss 0.38
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 164 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 866 
---
Q  39+341
T 236 
☑ 236 
---
val acc 39.040%
| epoch 16 |  iter 1 / 351 | time 0[s] | loss 0.39
| epoch 16 |  iter 21 / 351 | time 1[s] | loss 0.37
| epoch 16 |  iter 41 / 351 | time 2[s] | loss 0.37
| epoch 16 |  iter 61 / 351 | time 3[s] | loss 0.38
| epoch 16 |  iter 81 / 351 | time 4[s] | loss 0.39
| epoch 16 |  iter 101 / 351 | time 5[s] | loss 0.38
| epoch 16 |  iter 121 / 351 | time 7[s] | loss 0.38
| epoch 16 |  iter 141 / 351 | time 8[s] | loss 0.37
| epoch 16 |  iter 161 / 351 | time 9[s] | loss 0.40
| epoch 16 |  iter 181 / 351 | time 10[s] | loss 0.39
| epoch 16 |  iter 201 / 351 | time 11[s] | loss 0.38
| epoch 16 |  iter 221 / 351 | time 12[s] | loss 0.41
| epoch 16 |  iter 241 / 351 | time 14[s] | loss 0.41
| epoch 16 |  iter 261 / 351 | time 15[s] | loss 0.40
| epoch 16 |  iter 281 / 351 | time 16[s] | loss 0.40
| epoch 16 |  iter 301 / 351 | time 17[s] | loss 0.38
| epoch 16 |  iter 321 / 351 | time 18[s] | loss 0.38
| epoch 16 |  iter 341 / 351 | time 19[s] | loss 0.37
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☒ 237 
---
val acc 43.220%
| epoch 17 |  iter 1 / 351 | time 0[s] | loss 0.35
| epoch 17 |  iter 21 / 351 | time 1[s] | loss 0.36
| epoch 17 |  iter 41 / 351 | time 2[s] | loss 0.37
| epoch 17 |  iter 61 / 351 | time 3[s] | loss 0.36
| epoch 17 |  iter 81 / 351 | time 4[s] | loss 0.36
| epoch 17 |  iter 101 / 351 | time 5[s] | loss 0.37
| epoch 17 |  iter 121 / 351 | time 7[s] | loss 0.37
| epoch 17 |  iter 141 / 351 | time 8[s] | loss 0.37
| epoch 17 |  iter 161 / 351 | time 9[s] | loss 0.37
| epoch 17 |  iter 181 / 351 | time 10[s] | loss 0.38
| epoch 17 |  iter 201 / 351 | time 11[s] | loss 0.38
| epoch 17 |  iter 221 / 351 | time 13[s] | loss 0.37
| epoch 17 |  iter 241 / 351 | time 14[s] | loss 0.37
| epoch 17 |  iter 261 / 351 | time 15[s] | loss 0.37
| epoch 17 |  iter 281 / 351 | time 16[s] | loss 0.37
| epoch 17 |  iter 301 / 351 | time 17[s] | loss 0.37
| epoch 17 |  iter 321 / 351 | time 19[s] | loss 0.37
| epoch 17 |  iter 341 / 351 | time 20[s] | loss 0.37
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 164 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 856 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 41.100%
| epoch 18 |  iter 1 / 351 | time 0[s] | loss 0.36
| epoch 18 |  iter 21 / 351 | time 1[s] | loss 0.37
| epoch 18 |  iter 41 / 351 | time 2[s] | loss 0.37
| epoch 18 |  iter 61 / 351 | time 3[s] | loss 0.37
| epoch 18 |  iter 81 / 351 | time 4[s] | loss 0.35
| epoch 18 |  iter 101 / 351 | time 5[s] | loss 0.35
| epoch 18 |  iter 121 / 351 | time 7[s] | loss 0.37
| epoch 18 |  iter 141 / 351 | time 8[s] | loss 0.36
| epoch 18 |  iter 161 / 351 | time 9[s] | loss 0.35
| epoch 18 |  iter 181 / 351 | time 10[s] | loss 0.36
| epoch 18 |  iter 201 / 351 | time 11[s] | loss 0.37
| epoch 18 |  iter 221 / 351 | time 13[s] | loss 0.38
| epoch 18 |  iter 241 / 351 | time 14[s] | loss 0.38
| epoch 18 |  iter 261 / 351 | time 15[s] | loss 0.36
| epoch 18 |  iter 281 / 351 | time 16[s] | loss 0.36
| epoch 18 |  iter 301 / 351 | time 17[s] | loss 0.36
| epoch 18 |  iter 321 / 351 | time 18[s] | loss 0.35
| epoch 18 |  iter 341 / 351 | time 20[s] | loss 0.34
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1141
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 38.580%
| epoch 19 |  iter 1 / 351 | time 0[s] | loss 0.36
| epoch 19 |  iter 21 / 351 | time 1[s] | loss 0.35
| epoch 19 |  iter 41 / 351 | time 2[s] | loss 0.35
| epoch 19 |  iter 61 / 351 | time 3[s] | loss 0.35
| epoch 19 |  iter 81 / 351 | time 4[s] | loss 0.35
| epoch 19 |  iter 101 / 351 | time 6[s] | loss 0.34
| epoch 19 |  iter 121 / 351 | time 7[s] | loss 0.35
| epoch 19 |  iter 141 / 351 | time 8[s] | loss 0.35
| epoch 19 |  iter 161 / 351 | time 9[s] | loss 0.35
| epoch 19 |  iter 181 / 351 | time 10[s] | loss 0.35
| epoch 19 |  iter 201 / 351 | time 11[s] | loss 0.34
| epoch 19 |  iter 221 / 351 | time 13[s] | loss 0.35
| epoch 19 |  iter 241 / 351 | time 14[s] | loss 0.36
| epoch 19 |  iter 261 / 351 | time 15[s] | loss 0.37
| epoch 19 |  iter 281 / 351 | time 16[s] | loss 0.36
| epoch 19 |  iter 301 / 351 | time 17[s] | loss 0.35
| epoch 19 |  iter 321 / 351 | time 19[s] | loss 0.35
| epoch 19 |  iter 341 / 351 | time 20[s] | loss 0.35
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 164 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☒ 862 
---
Q  39+341
T 236 
☒ 235 
---
val acc 46.720%
| epoch 20 |  iter 1 / 351 | time 0[s] | loss 0.32
| epoch 20 |  iter 21 / 351 | time 1[s] | loss 0.34
| epoch 20 |  iter 41 / 351 | time 2[s] | loss 0.36
| epoch 20 |  iter 61 / 351 | time 3[s] | loss 0.36
| epoch 20 |  iter 81 / 351 | time 4[s] | loss 0.35
| epoch 20 |  iter 101 / 351 | time 5[s] | loss 0.35
| epoch 20 |  iter 121 / 351 | time 7[s] | loss 0.36
| epoch 20 |  iter 141 / 351 | time 8[s] | loss 0.35
| epoch 20 |  iter 161 / 351 | time 9[s] | loss 0.34
| epoch 20 |  iter 181 / 351 | time 10[s] | loss 0.34
| epoch 20 |  iter 201 / 351 | time 11[s] | loss 0.33
| epoch 20 |  iter 221 / 351 | time 13[s] | loss 0.33
| epoch 20 |  iter 241 / 351 | time 14[s] | loss 0.33
| epoch 20 |  iter 261 / 351 | time 15[s] | loss 0.34
| epoch 20 |  iter 281 / 351 | time 16[s] | loss 0.34
| epoch 20 |  iter 301 / 351 | time 17[s] | loss 0.34
| epoch 20 |  iter 321 / 351 | time 18[s] | loss 0.34
| epoch 20 |  iter 341 / 351 | time 20[s] | loss 0.34
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☒ 1141
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 162 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 856 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☑ 236 
---
val acc 48.480%
| epoch 21 |  iter 1 / 351 | time 0[s] | loss 0.32
| epoch 21 |  iter 21 / 351 | time 1[s] | loss 0.33
| epoch 21 |  iter 41 / 351 | time 2[s] | loss 0.34
| epoch 21 |  iter 61 / 351 | time 3[s] | loss 0.34
| epoch 21 |  iter 81 / 351 | time 4[s] | loss 0.33
| epoch 21 |  iter 101 / 351 | time 6[s] | loss 0.33
| epoch 21 |  iter 121 / 351 | time 7[s] | loss 0.33
| epoch 21 |  iter 141 / 351 | time 8[s] | loss 0.33
| epoch 21 |  iter 161 / 351 | time 9[s] | loss 0.33
| epoch 21 |  iter 181 / 351 | time 10[s] | loss 0.33
| epoch 21 |  iter 201 / 351 | time 11[s] | loss 0.32
| epoch 21 |  iter 221 / 351 | time 13[s] | loss 0.33
| epoch 21 |  iter 241 / 351 | time 14[s] | loss 0.33
| epoch 21 |  iter 261 / 351 | time 15[s] | loss 0.33
| epoch 21 |  iter 281 / 351 | time 16[s] | loss 0.32
| epoch 21 |  iter 301 / 351 | time 17[s] | loss 0.32
| epoch 21 |  iter 321 / 351 | time 19[s] | loss 0.33
| epoch 21 |  iter 341 / 351 | time 20[s] | loss 0.33
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1140
---
Q  48+285
T 666 
☒ 665 
---
Q   551+8
T 163 
☒ 164 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1429
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☑ 236 
---
val acc 44.940%
| epoch 22 |  iter 1 / 351 | time 0[s] | loss 0.31
| epoch 22 |  iter 21 / 351 | time 1[s] | loss 0.32
| epoch 22 |  iter 41 / 351 | time 2[s] | loss 0.32
| epoch 22 |  iter 61 / 351 | time 3[s] | loss 0.34
| epoch 22 |  iter 81 / 351 | time 4[s] | loss 0.32
| epoch 22 |  iter 101 / 351 | time 6[s] | loss 0.33
| epoch 22 |  iter 121 / 351 | time 7[s] | loss 0.33
| epoch 22 |  iter 141 / 351 | time 8[s] | loss 0.34
| epoch 22 |  iter 161 / 351 | time 9[s] | loss 0.34
| epoch 22 |  iter 181 / 351 | time 10[s] | loss 0.34
| epoch 22 |  iter 201 / 351 | time 11[s] | loss 0.32
| epoch 22 |  iter 221 / 351 | time 13[s] | loss 0.31
| epoch 22 |  iter 241 / 351 | time 14[s] | loss 0.32
| epoch 22 |  iter 261 / 351 | time 15[s] | loss 0.31
| epoch 22 |  iter 281 / 351 | time 16[s] | loss 0.32
| epoch 22 |  iter 301 / 351 | time 17[s] | loss 0.33
| epoch 22 |  iter 321 / 351 | time 19[s] | loss 0.33
| epoch 22 |  iter 341 / 351 | time 20[s] | loss 0.33
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☒ 667 
---
Q   551+8
T 163 
☒ 162 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☒ 865 
---
Q  39+341
T 236 
☒ 235 
---
val acc 44.800%
| epoch 23 |  iter 1 / 351 | time 0[s] | loss 0.32
| epoch 23 |  iter 21 / 351 | time 1[s] | loss 0.31
| epoch 23 |  iter 41 / 351 | time 2[s] | loss 0.32
| epoch 23 |  iter 61 / 351 | time 3[s] | loss 0.31
| epoch 23 |  iter 81 / 351 | time 4[s] | loss 0.31
| epoch 23 |  iter 101 / 351 | time 5[s] | loss 0.32
| epoch 23 |  iter 121 / 351 | time 7[s] | loss 0.32
| epoch 23 |  iter 141 / 351 | time 8[s] | loss 0.33
| epoch 23 |  iter 161 / 351 | time 9[s] | loss 0.32
| epoch 23 |  iter 181 / 351 | time 10[s] | loss 0.32
| epoch 23 |  iter 201 / 351 | time 11[s] | loss 0.33
| epoch 23 |  iter 221 / 351 | time 13[s] | loss 0.33
| epoch 23 |  iter 241 / 351 | time 14[s] | loss 0.33
| epoch 23 |  iter 261 / 351 | time 15[s] | loss 0.32
| epoch 23 |  iter 281 / 351 | time 16[s] | loss 0.32
| epoch 23 |  iter 301 / 351 | time 17[s] | loss 0.31
| epoch 23 |  iter 321 / 351 | time 19[s] | loss 0.31
| epoch 23 |  iter 341 / 351 | time 20[s] | loss 0.31
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☒ 1140
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 162 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1426
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☒ 235 
---
val acc 41.060%
| epoch 24 |  iter 1 / 351 | time 0[s] | loss 0.34
| epoch 24 |  iter 21 / 351 | time 1[s] | loss 0.32
| epoch 24 |  iter 41 / 351 | time 2[s] | loss 0.30
| epoch 24 |  iter 61 / 351 | time 3[s] | loss 0.30
| epoch 24 |  iter 81 / 351 | time 4[s] | loss 0.30
| epoch 24 |  iter 101 / 351 | time 5[s] | loss 0.30
| epoch 24 |  iter 121 / 351 | time 7[s] | loss 0.31
| epoch 24 |  iter 141 / 351 | time 8[s] | loss 0.32
| epoch 24 |  iter 161 / 351 | time 9[s] | loss 0.32
| epoch 24 |  iter 181 / 351 | time 10[s] | loss 0.32
| epoch 24 |  iter 201 / 351 | time 11[s] | loss 0.31
| epoch 24 |  iter 221 / 351 | time 13[s] | loss 0.32
| epoch 24 |  iter 241 / 351 | time 14[s] | loss 0.32
| epoch 24 |  iter 261 / 351 | time 15[s] | loss 0.31
| epoch 24 |  iter 281 / 351 | time 16[s] | loss 0.31
| epoch 24 |  iter 301 / 351 | time 17[s] | loss 0.31
| epoch 24 |  iter 321 / 351 | time 18[s] | loss 0.30
| epoch 24 |  iter 341 / 351 | time 20[s] | loss 0.30
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1140
---
Q  48+285
T 666 
☒ 665 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 45.180%
| epoch 25 |  iter 1 / 351 | time 0[s] | loss 0.34
| epoch 25 |  iter 21 / 351 | time 1[s] | loss 0.29
| epoch 25 |  iter 41 / 351 | time 2[s] | loss 0.30
| epoch 25 |  iter 61 / 351 | time 3[s] | loss 0.30
| epoch 25 |  iter 81 / 351 | time 4[s] | loss 0.30
| epoch 25 |  iter 101 / 351 | time 6[s] | loss 0.29
| epoch 25 |  iter 121 / 351 | time 7[s] | loss 0.31
| epoch 25 |  iter 141 / 351 | time 8[s] | loss 0.32
| epoch 25 |  iter 161 / 351 | time 9[s] | loss 0.32
| epoch 25 |  iter 181 / 351 | time 10[s] | loss 0.31
| epoch 25 |  iter 201 / 351 | time 11[s] | loss 0.32
| epoch 25 |  iter 221 / 351 | time 13[s] | loss 0.30
| epoch 25 |  iter 241 / 351 | time 14[s] | loss 0.29
| epoch 25 |  iter 261 / 351 | time 15[s] | loss 0.30
| epoch 25 |  iter 281 / 351 | time 16[s] | loss 0.30
| epoch 25 |  iter 301 / 351 | time 17[s] | loss 0.30
| epoch 25 |  iter 321 / 351 | time 19[s] | loss 0.30
| epoch 25 |  iter 341 / 351 | time 20[s] | loss 0.30
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☒ 1141
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☒ 162 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☒ 1054
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☒ 862 
---
Q  39+341
T 236 
☑ 236 
---
val acc 51.620%

%python3
plt.ylim(0, 1)
plt.plot(acc_list_baseline)
plt.plot(acc_list_reversed)
plt.legend(labels=['baseline', 'reversed input'])
plt.show()

  • 入力を反転させるだけで正解率が倍くらい上がる
  • 1桁目が正解しにくかったのと関係ありそう
  • 学習にかかった時間は12分30秒ほど(CPUのみ)

Peeky

覗き見の実装

%python3
class PeekyDecoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(H+D, 4*H) / np.sqrt(H+D)).astype('f')
        lstm_Wh = (rn(H, 4*H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4*H).astype('f')
        affine_W = (rn(H+H, V) / np.sqrt(H+H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)
        
        self.params, self.grads = [], []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
        self.cache = None
    
    def forward(self, xs, h):
        N, T = xs.shape
        N, H = h.shape
        
        self.lstm.set_state(h)
        
        out = self.embed.forward(xs)
        hs = np.repeat(h, T, axis=0).reshape(N, T, H)
        out = np.concatenate((hs, out), axis=2)
        
        out = self.lstm.forward(out)
        out = np.concatenate((hs, out), axis=2)
        
        score = self.affine.forward(out)
        self.cache = H
        return score
    
    def backward(self, dscore):
        H = self.cache
        
        dout = self.affine.backward(dscore)
        dout, dhs0 = dout[:, :, H:], dout[:, :, :H]
        dout = self.lstm.backward(dout)
        dembed, dhs1 = dout[:, :, H:], dout[:, :, :H]
        self.embed.backward(dembed)
        
        dhs = dhs0 + dhs1
        dh = self.lstm.dh + np.sum(dhs, axis=1)
        return dh
    
    def generate(self, h, start_id, sample_size):
        sampled = []
        char_id = start_id
        self.lstm.set_state(h)
        
        H = h.shape[1]
        peeky_h = h.reshape(1, 1, H)
        for _ in range(sample_size):
            x = np.array([char_id]).reshape((1, 1))
            out = self.embed.forward(x)
            
            out = np.concatenate((peeky_h, out), axis=2)
            out = self.lstm.forward(out)
            out = np.concatenate((peeky_h, out), axis=2)
            score = self.affine.forward(out)
            
            char_id = np.argmax(score.flatten())
            sampled.append(char_id)
        
        return sampled

%python3
class PeekySeq2Seq(Seq2Seq):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        
        self.encoder = Encoder(V, D, H)
        self.decoder = PeekyDecoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()
        
        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads

%python3
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

# model = Seq2Seq(vocab_size, wordvec_size, hidden_size)
model = PeekySeq2Seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list_peeky = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1,
                batch_size=batch_size, max_grad=max_grad)

    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)
    
    acc = float(correct_num) / len(x_test)
    acc_list_peeky.append(acc)
    print('val acc %.3f%%' % (acc * 100))
| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.57
| epoch 1 |  iter 21 / 351 | time 1[s] | loss 2.48
| epoch 1 |  iter 41 / 351 | time 2[s] | loss 2.20
| epoch 1 |  iter 61 / 351 | time 3[s] | loss 1.99
| epoch 1 |  iter 81 / 351 | time 4[s] | loss 1.89
| epoch 1 |  iter 101 / 351 | time 5[s] | loss 1.82
| epoch 1 |  iter 121 / 351 | time 7[s] | loss 1.82
| epoch 1 |  iter 141 / 351 | time 8[s] | loss 1.80
| epoch 1 |  iter 161 / 351 | time 9[s] | loss 1.79
| epoch 1 |  iter 181 / 351 | time 10[s] | loss 1.78
| epoch 1 |  iter 201 / 351 | time 11[s] | loss 1.77
| epoch 1 |  iter 221 / 351 | time 13[s] | loss 1.76
| epoch 1 |  iter 241 / 351 | time 14[s] | loss 1.76
| epoch 1 |  iter 261 / 351 | time 15[s] | loss 1.75
| epoch 1 |  iter 281 / 351 | time 16[s] | loss 1.74
| epoch 1 |  iter 301 / 351 | time 17[s] | loss 1.74
| epoch 1 |  iter 321 / 351 | time 19[s] | loss 1.73
| epoch 1 |  iter 341 / 351 | time 20[s] | loss 1.73
Q   58+77
T 162 
☒ 100 
---
Q 461+579
T 1139
☒ 1013
---
Q  48+285
T 666 
☒ 102 
---
Q   551+8
T 163 
☒ 100 
---
Q  55+763
T 422 
☒ 1023
---
Q 752+006
T 857 
☒ 1023
---
Q 292+167
T 1053
☒ 1023
---
Q 795+038
T 1427
☒ 1111
---
Q  838+62
T 864 
☒ 102 
---
Q  39+341
T 236 
☒ 102 
---
val acc 0.280%
| epoch 2 |  iter 1 / 351 | time 0[s] | loss 1.71
| epoch 2 |  iter 21 / 351 | time 1[s] | loss 1.71
| epoch 2 |  iter 41 / 351 | time 2[s] | loss 1.71
| epoch 2 |  iter 61 / 351 | time 3[s] | loss 1.71
| epoch 2 |  iter 81 / 351 | time 4[s] | loss 1.70
| epoch 2 |  iter 101 / 351 | time 6[s] | loss 1.68
| epoch 2 |  iter 121 / 351 | time 7[s] | loss 1.69
| epoch 2 |  iter 141 / 351 | time 8[s] | loss 1.68
| epoch 2 |  iter 161 / 351 | time 9[s] | loss 1.67
| epoch 2 |  iter 181 / 351 | time 10[s] | loss 1.67
| epoch 2 |  iter 201 / 351 | time 12[s] | loss 1.65
| epoch 2 |  iter 221 / 351 | time 13[s] | loss 1.65
| epoch 2 |  iter 241 / 351 | time 14[s] | loss 1.65
| epoch 2 |  iter 261 / 351 | time 15[s] | loss 1.63
| epoch 2 |  iter 281 / 351 | time 17[s] | loss 1.62
| epoch 2 |  iter 301 / 351 | time 18[s] | loss 1.61
| epoch 2 |  iter 321 / 351 | time 19[s] | loss 1.61
| epoch 2 |  iter 341 / 351 | time 20[s] | loss 1.60
Q   58+77
T 162 
☒ 100 
---
Q 461+579
T 1139
☒ 1200
---
Q  48+285
T 666 
☒ 690 
---
Q   551+8
T 163 
☒ 100 
---
Q  55+763
T 422 
☒ 690 
---
Q 752+006
T 857 
☒ 999 
---
Q 292+167
T 1053
☒ 1029
---
Q 795+038
T 1427
☒ 1240
---
Q  838+62
T 864 
☒ 792 
---
Q  39+341
T 236 
☒ 290 
---
val acc 0.400%
| epoch 3 |  iter 1 / 351 | time 0[s] | loss 1.58
| epoch 3 |  iter 21 / 351 | time 1[s] | loss 1.59
| epoch 3 |  iter 41 / 351 | time 2[s] | loss 1.58
| epoch 3 |  iter 61 / 351 | time 3[s] | loss 1.56
| epoch 3 |  iter 81 / 351 | time 5[s] | loss 1.55
| epoch 3 |  iter 101 / 351 | time 6[s] | loss 1.53
| epoch 3 |  iter 121 / 351 | time 7[s] | loss 1.51
| epoch 3 |  iter 141 / 351 | time 8[s] | loss 1.50
| epoch 3 |  iter 161 / 351 | time 9[s] | loss 1.49
| epoch 3 |  iter 181 / 351 | time 11[s] | loss 1.47
| epoch 3 |  iter 201 / 351 | time 12[s] | loss 1.46
| epoch 3 |  iter 221 / 351 | time 13[s] | loss 1.43
| epoch 3 |  iter 241 / 351 | time 14[s] | loss 1.42
| epoch 3 |  iter 261 / 351 | time 16[s] | loss 1.41
| epoch 3 |  iter 281 / 351 | time 17[s] | loss 1.39
| epoch 3 |  iter 301 / 351 | time 18[s] | loss 1.37
| epoch 3 |  iter 321 / 351 | time 19[s] | loss 1.36
| epoch 3 |  iter 341 / 351 | time 21[s] | loss 1.35
Q   58+77
T 162 
☒ 154 
---
Q 461+579
T 1139
☒ 1033
---
Q  48+285
T 666 
☒ 644 
---
Q   551+8
T 163 
☒ 161 
---
Q  55+763
T 422 
☒ 433 
---
Q 752+006
T 857 
☒ 818 
---
Q 292+167
T 1053
☒ 1018
---
Q 795+038
T 1427
☒ 1344
---
Q  838+62
T 864 
☒ 834 
---
Q  39+341
T 236 
☒ 211 
---
val acc 1.600%
| epoch 4 |  iter 1 / 351 | time 0[s] | loss 1.32
| epoch 4 |  iter 21 / 351 | time 1[s] | loss 1.32
| epoch 4 |  iter 41 / 351 | time 2[s] | loss 1.30
| epoch 4 |  iter 61 / 351 | time 3[s] | loss 1.30
| epoch 4 |  iter 81 / 351 | time 5[s] | loss 1.28
| epoch 4 |  iter 101 / 351 | time 6[s] | loss 1.27
| epoch 4 |  iter 121 / 351 | time 7[s] | loss 1.25
| epoch 4 |  iter 141 / 351 | time 8[s] | loss 1.24
| epoch 4 |  iter 161 / 351 | time 10[s] | loss 1.22
| epoch 4 |  iter 181 / 351 | time 11[s] | loss 1.21
| epoch 4 |  iter 201 / 351 | time 12[s] | loss 1.20
| epoch 4 |  iter 221 / 351 | time 13[s] | loss 1.20
| epoch 4 |  iter 241 / 351 | time 15[s] | loss 1.17
| epoch 4 |  iter 261 / 351 | time 16[s] | loss 1.16
| epoch 4 |  iter 281 / 351 | time 17[s] | loss 1.14
| epoch 4 |  iter 301 / 351 | time 18[s] | loss 1.12
| epoch 4 |  iter 321 / 351 | time 20[s] | loss 1.11
| epoch 4 |  iter 341 / 351 | time 21[s] | loss 1.10
Q   58+77
T 162 
☒ 158 
---
Q 461+579
T 1139
☒ 1123
---
Q  48+285
T 666 
☒ 657 
---
Q   551+8
T 163 
☒ 165 
---
Q  55+763
T 422 
☒ 423 
---
Q 752+006
T 857 
☒ 777 
---
Q 292+167
T 1053
☒ 1023
---
Q 795+038
T 1427
☒ 1388
---
Q  838+62
T 864 
☒ 887 
---
Q  39+341
T 236 
☒ 223 
---
val acc 5.140%
| epoch 5 |  iter 1 / 351 | time 0[s] | loss 1.08
| epoch 5 |  iter 21 / 351 | time 1[s] | loss 1.07
| epoch 5 |  iter 41 / 351 | time 2[s] | loss 1.05
| epoch 5 |  iter 61 / 351 | time 3[s] | loss 1.04
| epoch 5 |  iter 81 / 351 | time 5[s] | loss 1.02
| epoch 5 |  iter 101 / 351 | time 6[s] | loss 1.01
| epoch 5 |  iter 121 / 351 | time 7[s] | loss 1.00
| epoch 5 |  iter 141 / 351 | time 8[s] | loss 0.99
| epoch 5 |  iter 161 / 351 | time 10[s] | loss 0.99
| epoch 5 |  iter 181 / 351 | time 11[s] | loss 0.96
| epoch 5 |  iter 201 / 351 | time 12[s] | loss 0.95
| epoch 5 |  iter 221 / 351 | time 13[s] | loss 0.94
| epoch 5 |  iter 241 / 351 | time 15[s] | loss 0.92
| epoch 5 |  iter 261 / 351 | time 16[s] | loss 0.91
| epoch 5 |  iter 281 / 351 | time 17[s] | loss 0.90
| epoch 5 |  iter 301 / 351 | time 19[s] | loss 0.89
| epoch 5 |  iter 321 / 351 | time 20[s] | loss 0.88
| epoch 5 |  iter 341 / 351 | time 21[s] | loss 0.87
Q   58+77
T 162 
☒ 160 
---
Q 461+579
T 1139
☒ 1135
---
Q  48+285
T 666 
☒ 668 
---
Q   551+8
T 163 
☒ 169 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 861 
---
Q 292+167
T 1053
☒ 1045
---
Q 795+038
T 1427
☒ 1324
---
Q  838+62
T 864 
☒ 861 
---
Q  39+341
T 236 
☒ 239 
---
val acc 9.380%
| epoch 6 |  iter 1 / 351 | time 0[s] | loss 0.90
| epoch 6 |  iter 21 / 351 | time 1[s] | loss 0.86
| epoch 6 |  iter 41 / 351 | time 2[s] | loss 0.83
| epoch 6 |  iter 61 / 351 | time 3[s] | loss 0.84
| epoch 6 |  iter 81 / 351 | time 5[s] | loss 0.82
| epoch 6 |  iter 101 / 351 | time 6[s] | loss 0.81
| epoch 6 |  iter 121 / 351 | time 7[s] | loss 0.80
| epoch 6 |  iter 141 / 351 | time 8[s] | loss 0.79
| epoch 6 |  iter 161 / 351 | time 10[s] | loss 0.78
| epoch 6 |  iter 181 / 351 | time 11[s] | loss 0.77
| epoch 6 |  iter 201 / 351 | time 12[s] | loss 0.76
| epoch 6 |  iter 221 / 351 | time 14[s] | loss 0.76
| epoch 6 |  iter 241 / 351 | time 15[s] | loss 0.74
| epoch 6 |  iter 261 / 351 | time 16[s] | loss 0.74
| epoch 6 |  iter 281 / 351 | time 17[s] | loss 0.73
| epoch 6 |  iter 301 / 351 | time 19[s] | loss 0.72
| epoch 6 |  iter 321 / 351 | time 20[s] | loss 0.72
| epoch 6 |  iter 341 / 351 | time 21[s] | loss 0.71
Q   58+77
T 162 
☒ 163 
---
Q 461+579
T 1139
☒ 1138
---
Q  48+285
T 666 
☒ 668 
---
Q   551+8
T 163 
☒ 166 
---
Q  55+763
T 422 
☒ 423 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☒ 1048
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☒ 873 
---
Q  39+341
T 236 
☒ 239 
---
val acc 15.040%
| epoch 7 |  iter 1 / 351 | time 0[s] | loss 0.68
| epoch 7 |  iter 21 / 351 | time 1[s] | loss 0.69
| epoch 7 |  iter 41 / 351 | time 2[s] | loss 0.67
| epoch 7 |  iter 61 / 351 | time 3[s] | loss 0.66
| epoch 7 |  iter 81 / 351 | time 5[s] | loss 0.66
| epoch 7 |  iter 101 / 351 | time 6[s] | loss 0.65
| epoch 7 |  iter 121 / 351 | time 7[s] | loss 0.65
| epoch 7 |  iter 141 / 351 | time 8[s] | loss 0.64
| epoch 7 |  iter 161 / 351 | time 10[s] | loss 0.63
| epoch 7 |  iter 181 / 351 | time 11[s] | loss 0.61
| epoch 7 |  iter 201 / 351 | time 12[s] | loss 0.61
| epoch 7 |  iter 221 / 351 | time 13[s] | loss 0.60
| epoch 7 |  iter 241 / 351 | time 15[s] | loss 0.57
| epoch 7 |  iter 261 / 351 | time 16[s] | loss 0.57
| epoch 7 |  iter 281 / 351 | time 17[s] | loss 0.57
| epoch 7 |  iter 301 / 351 | time 19[s] | loss 0.55
| epoch 7 |  iter 321 / 351 | time 20[s] | loss 0.54
| epoch 7 |  iter 341 / 351 | time 21[s] | loss 0.53
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☒ 665 
---
Q   551+8
T 163 
☒ 156 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 858 
---
Q 292+167
T 1053
☒ 1052
---
Q 795+038
T 1427
☒ 1428
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☒ 235 
---
val acc 39.100%
| epoch 8 |  iter 1 / 351 | time 0[s] | loss 0.51
| epoch 8 |  iter 21 / 351 | time 1[s] | loss 0.50
| epoch 8 |  iter 41 / 351 | time 2[s] | loss 0.49
| epoch 8 |  iter 61 / 351 | time 3[s] | loss 0.48
| epoch 8 |  iter 81 / 351 | time 5[s] | loss 0.47
| epoch 8 |  iter 101 / 351 | time 6[s] | loss 0.46
| epoch 8 |  iter 121 / 351 | time 7[s] | loss 0.46
| epoch 8 |  iter 141 / 351 | time 8[s] | loss 0.44
| epoch 8 |  iter 161 / 351 | time 10[s] | loss 0.41
| epoch 8 |  iter 181 / 351 | time 11[s] | loss 0.42
| epoch 8 |  iter 201 / 351 | time 12[s] | loss 0.41
| epoch 8 |  iter 221 / 351 | time 14[s] | loss 0.40
| epoch 8 |  iter 241 / 351 | time 15[s] | loss 0.39
| epoch 8 |  iter 261 / 351 | time 16[s] | loss 0.37
| epoch 8 |  iter 281 / 351 | time 17[s] | loss 0.36
| epoch 8 |  iter 301 / 351 | time 19[s] | loss 0.36
| epoch 8 |  iter 321 / 351 | time 20[s] | loss 0.35
| epoch 8 |  iter 341 / 351 | time 21[s] | loss 0.34
Q   58+77
T 162 
☒ 161 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☒ 657 
---
Q   551+8
T 163 
☒ 155 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1438
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 65.060%
| epoch 9 |  iter 1 / 351 | time 0[s] | loss 0.32
| epoch 9 |  iter 21 / 351 | time 1[s] | loss 0.31
| epoch 9 |  iter 41 / 351 | time 2[s] | loss 0.31
| epoch 9 |  iter 61 / 351 | time 3[s] | loss 0.31
| epoch 9 |  iter 81 / 351 | time 5[s] | loss 0.29
| epoch 9 |  iter 101 / 351 | time 6[s] | loss 0.29
| epoch 9 |  iter 121 / 351 | time 7[s] | loss 0.29
| epoch 9 |  iter 141 / 351 | time 8[s] | loss 0.27
| epoch 9 |  iter 161 / 351 | time 10[s] | loss 0.27
| epoch 9 |  iter 181 / 351 | time 11[s] | loss 0.26
| epoch 9 |  iter 201 / 351 | time 12[s] | loss 0.25
| epoch 9 |  iter 221 / 351 | time 13[s] | loss 0.25
| epoch 9 |  iter 241 / 351 | time 15[s] | loss 0.24
| epoch 9 |  iter 261 / 351 | time 16[s] | loss 0.24
| epoch 9 |  iter 281 / 351 | time 17[s] | loss 0.23
| epoch 9 |  iter 301 / 351 | time 19[s] | loss 0.22
| epoch 9 |  iter 321 / 351 | time 20[s] | loss 0.22
| epoch 9 |  iter 341 / 351 | time 21[s] | loss 0.21
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☒ 1140
---
Q  48+285
T 666 
☒ 657 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 83.280%
| epoch 10 |  iter 1 / 351 | time 0[s] | loss 0.22
| epoch 10 |  iter 21 / 351 | time 1[s] | loss 0.20
| epoch 10 |  iter 41 / 351 | time 2[s] | loss 0.20
| epoch 10 |  iter 61 / 351 | time 3[s] | loss 0.20
| epoch 10 |  iter 81 / 351 | time 5[s] | loss 0.18
| epoch 10 |  iter 101 / 351 | time 6[s] | loss 0.17
| epoch 10 |  iter 121 / 351 | time 7[s] | loss 0.18
| epoch 10 |  iter 141 / 351 | time 9[s] | loss 0.17
| epoch 10 |  iter 161 / 351 | time 10[s] | loss 0.17
| epoch 10 |  iter 181 / 351 | time 11[s] | loss 0.17
| epoch 10 |  iter 201 / 351 | time 12[s] | loss 0.17
| epoch 10 |  iter 221 / 351 | time 14[s] | loss 0.16
| epoch 10 |  iter 241 / 351 | time 15[s] | loss 0.15
| epoch 10 |  iter 261 / 351 | time 16[s] | loss 0.15
| epoch 10 |  iter 281 / 351 | time 17[s] | loss 0.15
| epoch 10 |  iter 301 / 351 | time 19[s] | loss 0.15
| epoch 10 |  iter 321 / 351 | time 20[s] | loss 0.14
| epoch 10 |  iter 341 / 351 | time 21[s] | loss 0.14
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☒ 656 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 88.400%
| epoch 11 |  iter 1 / 351 | time 0[s] | loss 0.13
| epoch 11 |  iter 21 / 351 | time 1[s] | loss 0.13
| epoch 11 |  iter 41 / 351 | time 2[s] | loss 0.13
| epoch 11 |  iter 61 / 351 | time 3[s] | loss 0.12
| epoch 11 |  iter 81 / 351 | time 5[s] | loss 0.12
| epoch 11 |  iter 101 / 351 | time 6[s] | loss 0.12
| epoch 11 |  iter 121 / 351 | time 7[s] | loss 0.11
| epoch 11 |  iter 141 / 351 | time 9[s] | loss 0.12
| epoch 11 |  iter 161 / 351 | time 10[s] | loss 0.11
| epoch 11 |  iter 181 / 351 | time 11[s] | loss 0.11
| epoch 11 |  iter 201 / 351 | time 12[s] | loss 0.12
| epoch 11 |  iter 221 / 351 | time 14[s] | loss 0.11
| epoch 11 |  iter 241 / 351 | time 15[s] | loss 0.11
| epoch 11 |  iter 261 / 351 | time 16[s] | loss 0.10
| epoch 11 |  iter 281 / 351 | time 17[s] | loss 0.10
| epoch 11 |  iter 301 / 351 | time 19[s] | loss 0.10
| epoch 11 |  iter 321 / 351 | time 20[s] | loss 0.09
| epoch 11 |  iter 341 / 351 | time 21[s] | loss 0.09
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 90.940%
| epoch 12 |  iter 1 / 351 | time 0[s] | loss 0.09
| epoch 12 |  iter 21 / 351 | time 1[s] | loss 0.09
| epoch 12 |  iter 41 / 351 | time 2[s] | loss 0.09
| epoch 12 |  iter 61 / 351 | time 3[s] | loss 0.09
| epoch 12 |  iter 81 / 351 | time 5[s] | loss 0.09
| epoch 12 |  iter 101 / 351 | time 6[s] | loss 0.08
| epoch 12 |  iter 121 / 351 | time 7[s] | loss 0.08
| epoch 12 |  iter 141 / 351 | time 8[s] | loss 0.08
| epoch 12 |  iter 161 / 351 | time 10[s] | loss 0.08
| epoch 12 |  iter 181 / 351 | time 11[s] | loss 0.08
| epoch 12 |  iter 201 / 351 | time 12[s] | loss 0.08
| epoch 12 |  iter 221 / 351 | time 14[s] | loss 0.09
| epoch 12 |  iter 241 / 351 | time 15[s] | loss 0.09
| epoch 12 |  iter 261 / 351 | time 16[s] | loss 0.09
| epoch 12 |  iter 281 / 351 | time 17[s] | loss 0.08
| epoch 12 |  iter 301 / 351 | time 19[s] | loss 0.08
| epoch 12 |  iter 321 / 351 | time 20[s] | loss 0.07
| epoch 12 |  iter 341 / 351 | time 21[s] | loss 0.08
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 92.220%
| epoch 13 |  iter 1 / 351 | time 0[s] | loss 0.07
| epoch 13 |  iter 21 / 351 | time 1[s] | loss 0.07
| epoch 13 |  iter 41 / 351 | time 2[s] | loss 0.07
| epoch 13 |  iter 61 / 351 | time 3[s] | loss 0.07
| epoch 13 |  iter 81 / 351 | time 5[s] | loss 0.06
| epoch 13 |  iter 101 / 351 | time 6[s] | loss 0.06
| epoch 13 |  iter 121 / 351 | time 7[s] | loss 0.07
| epoch 13 |  iter 141 / 351 | time 8[s] | loss 0.06
| epoch 13 |  iter 161 / 351 | time 10[s] | loss 0.06
| epoch 13 |  iter 181 / 351 | time 11[s] | loss 0.06
| epoch 13 |  iter 201 / 351 | time 12[s] | loss 0.06
| epoch 13 |  iter 221 / 351 | time 13[s] | loss 0.06
| epoch 13 |  iter 241 / 351 | time 15[s] | loss 0.06
| epoch 13 |  iter 261 / 351 | time 16[s] | loss 0.06
| epoch 13 |  iter 281 / 351 | time 17[s] | loss 0.06
| epoch 13 |  iter 301 / 351 | time 18[s] | loss 0.05
| epoch 13 |  iter 321 / 351 | time 20[s] | loss 0.05
| epoch 13 |  iter 341 / 351 | time 21[s] | loss 0.06
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 94.420%
| epoch 14 |  iter 1 / 351 | time 0[s] | loss 0.05
| epoch 14 |  iter 21 / 351 | time 1[s] | loss 0.05
| epoch 14 |  iter 41 / 351 | time 2[s] | loss 0.05
| epoch 14 |  iter 61 / 351 | time 3[s] | loss 0.05
| epoch 14 |  iter 81 / 351 | time 5[s] | loss 0.05
| epoch 14 |  iter 101 / 351 | time 6[s] | loss 0.05
| epoch 14 |  iter 121 / 351 | time 7[s] | loss 0.05
| epoch 14 |  iter 141 / 351 | time 8[s] | loss 0.05
| epoch 14 |  iter 161 / 351 | time 10[s] | loss 0.05
| epoch 14 |  iter 181 / 351 | time 11[s] | loss 0.05
| epoch 14 |  iter 201 / 351 | time 12[s] | loss 0.05
| epoch 14 |  iter 221 / 351 | time 13[s] | loss 0.06
| epoch 14 |  iter 241 / 351 | time 15[s] | loss 0.06
| epoch 14 |  iter 261 / 351 | time 16[s] | loss 0.07
| epoch 14 |  iter 281 / 351 | time 17[s] | loss 0.06
| epoch 14 |  iter 301 / 351 | time 19[s] | loss 0.06
| epoch 14 |  iter 321 / 351 | time 20[s] | loss 0.05
| epoch 14 |  iter 341 / 351 | time 21[s] | loss 0.05
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 94.340%
| epoch 15 |  iter 1 / 351 | time 0[s] | loss 0.04
| epoch 15 |  iter 21 / 351 | time 1[s] | loss 0.04
| epoch 15 |  iter 41 / 351 | time 2[s] | loss 0.04
| epoch 15 |  iter 61 / 351 | time 3[s] | loss 0.05
| epoch 15 |  iter 81 / 351 | time 5[s] | loss 0.04
| epoch 15 |  iter 101 / 351 | time 6[s] | loss 0.05
| epoch 15 |  iter 121 / 351 | time 7[s] | loss 0.04
| epoch 15 |  iter 141 / 351 | time 8[s] | loss 0.04
| epoch 15 |  iter 161 / 351 | time 10[s] | loss 0.04
| epoch 15 |  iter 181 / 351 | time 11[s] | loss 0.05
| epoch 15 |  iter 201 / 351 | time 12[s] | loss 0.04
| epoch 15 |  iter 221 / 351 | time 13[s] | loss 0.04
| epoch 15 |  iter 241 / 351 | time 15[s] | loss 0.03
| epoch 15 |  iter 261 / 351 | time 16[s] | loss 0.04
| epoch 15 |  iter 281 / 351 | time 17[s] | loss 0.04
| epoch 15 |  iter 301 / 351 | time 18[s] | loss 0.05
| epoch 15 |  iter 321 / 351 | time 20[s] | loss 0.04
| epoch 15 |  iter 341 / 351 | time 21[s] | loss 0.04
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 94.760%
| epoch 16 |  iter 1 / 351 | time 0[s] | loss 0.03
| epoch 16 |  iter 21 / 351 | time 1[s] | loss 0.05
| epoch 16 |  iter 41 / 351 | time 2[s] | loss 0.06
| epoch 16 |  iter 61 / 351 | time 3[s] | loss 0.05
| epoch 16 |  iter 81 / 351 | time 5[s] | loss 0.04
| epoch 16 |  iter 101 / 351 | time 6[s] | loss 0.04
| epoch 16 |  iter 121 / 351 | time 7[s] | loss 0.04
| epoch 16 |  iter 141 / 351 | time 8[s] | loss 0.04
| epoch 16 |  iter 161 / 351 | time 10[s] | loss 0.04
| epoch 16 |  iter 181 / 351 | time 11[s] | loss 0.04
| epoch 16 |  iter 201 / 351 | time 12[s] | loss 0.05
| epoch 16 |  iter 221 / 351 | time 13[s] | loss 0.05
| epoch 16 |  iter 241 / 351 | time 15[s] | loss 0.04
| epoch 16 |  iter 261 / 351 | time 16[s] | loss 0.04
| epoch 16 |  iter 281 / 351 | time 17[s] | loss 0.03
| epoch 16 |  iter 301 / 351 | time 19[s] | loss 0.03
| epoch 16 |  iter 321 / 351 | time 20[s] | loss 0.04
| epoch 16 |  iter 341 / 351 | time 21[s] | loss 0.04
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 96.080%
| epoch 17 |  iter 1 / 351 | time 0[s] | loss 0.04
| epoch 17 |  iter 21 / 351 | time 1[s] | loss 0.03
| epoch 17 |  iter 41 / 351 | time 2[s] | loss 0.03
| epoch 17 |  iter 61 / 351 | time 3[s] | loss 0.03
| epoch 17 |  iter 81 / 351 | time 5[s] | loss 0.03
| epoch 17 |  iter 101 / 351 | time 6[s] | loss 0.03
| epoch 17 |  iter 121 / 351 | time 7[s] | loss 0.02
| epoch 17 |  iter 141 / 351 | time 8[s] | loss 0.02
| epoch 17 |  iter 161 / 351 | time 10[s] | loss 0.03
| epoch 17 |  iter 181 / 351 | time 11[s] | loss 0.03
| epoch 17 |  iter 201 / 351 | time 12[s] | loss 0.03
| epoch 17 |  iter 221 / 351 | time 13[s] | loss 0.03
| epoch 17 |  iter 241 / 351 | time 15[s] | loss 0.03
| epoch 17 |  iter 261 / 351 | time 16[s] | loss 0.03
| epoch 17 |  iter 281 / 351 | time 17[s] | loss 0.03
| epoch 17 |  iter 301 / 351 | time 18[s] | loss 0.03
| epoch 17 |  iter 321 / 351 | time 20[s] | loss 0.04
| epoch 17 |  iter 341 / 351 | time 21[s] | loss 0.05
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☒ 856 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 91.420%
| epoch 18 |  iter 1 / 351 | time 0[s] | loss 0.06
| epoch 18 |  iter 21 / 351 | time 1[s] | loss 0.05
| epoch 18 |  iter 41 / 351 | time 2[s] | loss 0.05
| epoch 18 |  iter 61 / 351 | time 3[s] | loss 0.05
| epoch 18 |  iter 81 / 351 | time 5[s] | loss 0.05
| epoch 18 |  iter 101 / 351 | time 6[s] | loss 0.04
| epoch 18 |  iter 121 / 351 | time 7[s] | loss 0.03
| epoch 18 |  iter 141 / 351 | time 8[s] | loss 0.03
| epoch 18 |  iter 161 / 351 | time 10[s] | loss 0.03
| epoch 18 |  iter 181 / 351 | time 11[s] | loss 0.02
| epoch 18 |  iter 201 / 351 | time 12[s] | loss 0.02
| epoch 18 |  iter 221 / 351 | time 13[s] | loss 0.02
| epoch 18 |  iter 241 / 351 | time 15[s] | loss 0.02
| epoch 18 |  iter 261 / 351 | time 16[s] | loss 0.02
| epoch 18 |  iter 281 / 351 | time 17[s] | loss 0.02
| epoch 18 |  iter 301 / 351 | time 19[s] | loss 0.02
| epoch 18 |  iter 321 / 351 | time 20[s] | loss 0.02
| epoch 18 |  iter 341 / 351 | time 21[s] | loss 0.02
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 98.320%
| epoch 19 |  iter 1 / 351 | time 0[s] | loss 0.01
| epoch 19 |  iter 21 / 351 | time 1[s] | loss 0.02
| epoch 19 |  iter 41 / 351 | time 2[s] | loss 0.02
| epoch 19 |  iter 61 / 351 | time 3[s] | loss 0.02
| epoch 19 |  iter 81 / 351 | time 5[s] | loss 0.02
| epoch 19 |  iter 101 / 351 | time 6[s] | loss 0.02
| epoch 19 |  iter 121 / 351 | time 7[s] | loss 0.03
| epoch 19 |  iter 141 / 351 | time 8[s] | loss 0.03
| epoch 19 |  iter 161 / 351 | time 10[s] | loss 0.03
| epoch 19 |  iter 181 / 351 | time 11[s] | loss 0.04
| epoch 19 |  iter 201 / 351 | time 12[s] | loss 0.04
| epoch 19 |  iter 221 / 351 | time 13[s] | loss 0.03
| epoch 19 |  iter 241 / 351 | time 15[s] | loss 0.03
| epoch 19 |  iter 261 / 351 | time 16[s] | loss 0.03
| epoch 19 |  iter 281 / 351 | time 17[s] | loss 0.03
| epoch 19 |  iter 301 / 351 | time 18[s] | loss 0.02
| epoch 19 |  iter 321 / 351 | time 20[s] | loss 0.03
| epoch 19 |  iter 341 / 351 | time 21[s] | loss 0.02
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 97.220%
| epoch 20 |  iter 1 / 351 | time 0[s] | loss 0.03
| epoch 20 |  iter 21 / 351 | time 1[s] | loss 0.02
| epoch 20 |  iter 41 / 351 | time 2[s] | loss 0.04
| epoch 20 |  iter 61 / 351 | time 3[s] | loss 0.03
| epoch 20 |  iter 81 / 351 | time 5[s] | loss 0.04
| epoch 20 |  iter 101 / 351 | time 6[s] | loss 0.03
| epoch 20 |  iter 121 / 351 | time 7[s] | loss 0.03
| epoch 20 |  iter 141 / 351 | time 8[s] | loss 0.03
| epoch 20 |  iter 161 / 351 | time 10[s] | loss 0.02
| epoch 20 |  iter 181 / 351 | time 11[s] | loss 0.03
| epoch 20 |  iter 201 / 351 | time 12[s] | loss 0.02
| epoch 20 |  iter 221 / 351 | time 13[s] | loss 0.02
| epoch 20 |  iter 241 / 351 | time 15[s] | loss 0.02
| epoch 20 |  iter 261 / 351 | time 16[s] | loss 0.02
| epoch 20 |  iter 281 / 351 | time 17[s] | loss 0.03
| epoch 20 |  iter 301 / 351 | time 18[s] | loss 0.02
| epoch 20 |  iter 321 / 351 | time 20[s] | loss 0.02
| epoch 20 |  iter 341 / 351 | time 21[s] | loss 0.03
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☒ 1437
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 95.080%
| epoch 21 |  iter 1 / 351 | time 0[s] | loss 0.03
| epoch 21 |  iter 21 / 351 | time 1[s] | loss 0.03
| epoch 21 |  iter 41 / 351 | time 2[s] | loss 0.02
| epoch 21 |  iter 61 / 351 | time 3[s] | loss 0.02
| epoch 21 |  iter 81 / 351 | time 5[s] | loss 0.02
| epoch 21 |  iter 101 / 351 | time 6[s] | loss 0.02
| epoch 21 |  iter 121 / 351 | time 7[s] | loss 0.02
| epoch 21 |  iter 141 / 351 | time 8[s] | loss 0.02
| epoch 21 |  iter 161 / 351 | time 10[s] | loss 0.02
| epoch 21 |  iter 181 / 351 | time 11[s] | loss 0.02
| epoch 21 |  iter 201 / 351 | time 12[s] | loss 0.02
| epoch 21 |  iter 221 / 351 | time 14[s] | loss 0.01
| epoch 21 |  iter 241 / 351 | time 15[s] | loss 0.01
| epoch 21 |  iter 261 / 351 | time 16[s] | loss 0.01
| epoch 21 |  iter 281 / 351 | time 17[s] | loss 0.02
| epoch 21 |  iter 301 / 351 | time 19[s] | loss 0.02
| epoch 21 |  iter 321 / 351 | time 20[s] | loss 0.02
| epoch 21 |  iter 341 / 351 | time 21[s] | loss 0.02
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 97.480%
| epoch 22 |  iter 1 / 351 | time 0[s] | loss 0.03
| epoch 22 |  iter 21 / 351 | time 1[s] | loss 0.03
| epoch 22 |  iter 41 / 351 | time 2[s] | loss 0.02
| epoch 22 |  iter 61 / 351 | time 3[s] | loss 0.02
| epoch 22 |  iter 81 / 351 | time 5[s] | loss 0.02
| epoch 22 |  iter 101 / 351 | time 6[s] | loss 0.02
| epoch 22 |  iter 121 / 351 | time 7[s] | loss 0.02
| epoch 22 |  iter 141 / 351 | time 8[s] | loss 0.02
| epoch 22 |  iter 161 / 351 | time 10[s] | loss 0.02
| epoch 22 |  iter 181 / 351 | time 11[s] | loss 0.02
| epoch 22 |  iter 201 / 351 | time 12[s] | loss 0.02
| epoch 22 |  iter 221 / 351 | time 13[s] | loss 0.02
| epoch 22 |  iter 241 / 351 | time 15[s] | loss 0.02
| epoch 22 |  iter 261 / 351 | time 16[s] | loss 0.03
| epoch 22 |  iter 281 / 351 | time 17[s] | loss 0.04
| epoch 22 |  iter 301 / 351 | time 18[s] | loss 0.03
| epoch 22 |  iter 321 / 351 | time 20[s] | loss 0.03
| epoch 22 |  iter 341 / 351 | time 21[s] | loss 0.02
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 95.020%
| epoch 23 |  iter 1 / 351 | time 0[s] | loss 0.04
| epoch 23 |  iter 21 / 351 | time 1[s] | loss 0.03
| epoch 23 |  iter 41 / 351 | time 2[s] | loss 0.03
| epoch 23 |  iter 61 / 351 | time 3[s] | loss 0.03
| epoch 23 |  iter 81 / 351 | time 5[s] | loss 0.02
| epoch 23 |  iter 101 / 351 | time 6[s] | loss 0.02
| epoch 23 |  iter 121 / 351 | time 7[s] | loss 0.01
| epoch 23 |  iter 141 / 351 | time 9[s] | loss 0.02
| epoch 23 |  iter 161 / 351 | time 10[s] | loss 0.01
| epoch 23 |  iter 181 / 351 | time 11[s] | loss 0.02
| epoch 23 |  iter 201 / 351 | time 12[s] | loss 0.02
| epoch 23 |  iter 221 / 351 | time 14[s] | loss 0.02
| epoch 23 |  iter 241 / 351 | time 15[s] | loss 0.02
| epoch 23 |  iter 261 / 351 | time 16[s] | loss 0.03
| epoch 23 |  iter 281 / 351 | time 17[s] | loss 0.02
| epoch 23 |  iter 301 / 351 | time 19[s] | loss 0.02
| epoch 23 |  iter 321 / 351 | time 20[s] | loss 0.03
| epoch 23 |  iter 341 / 351 | time 21[s] | loss 0.04
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☒ 854 
---
Q  39+341
T 236 
☑ 236 
---
val acc 93.260%
| epoch 24 |  iter 1 / 351 | time 0[s] | loss 0.04
| epoch 24 |  iter 21 / 351 | time 1[s] | loss 0.03
| epoch 24 |  iter 41 / 351 | time 2[s] | loss 0.03
| epoch 24 |  iter 61 / 351 | time 3[s] | loss 0.03
| epoch 24 |  iter 81 / 351 | time 5[s] | loss 0.02
| epoch 24 |  iter 101 / 351 | time 6[s] | loss 0.01
| epoch 24 |  iter 121 / 351 | time 7[s] | loss 0.02
| epoch 24 |  iter 141 / 351 | time 8[s] | loss 0.01
| epoch 24 |  iter 161 / 351 | time 10[s] | loss 0.01
| epoch 24 |  iter 181 / 351 | time 11[s] | loss 0.01
| epoch 24 |  iter 201 / 351 | time 12[s] | loss 0.01
| epoch 24 |  iter 221 / 351 | time 13[s] | loss 0.02
| epoch 24 |  iter 241 / 351 | time 15[s] | loss 0.03
| epoch 24 |  iter 261 / 351 | time 16[s] | loss 0.03
| epoch 24 |  iter 281 / 351 | time 17[s] | loss 0.03
| epoch 24 |  iter 301 / 351 | time 18[s] | loss 0.03
| epoch 24 |  iter 321 / 351 | time 20[s] | loss 0.02
| epoch 24 |  iter 341 / 351 | time 21[s] | loss 0.02
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 97.800%
| epoch 25 |  iter 1 / 351 | time 0[s] | loss 0.02
| epoch 25 |  iter 21 / 351 | time 1[s] | loss 0.01
| epoch 25 |  iter 41 / 351 | time 2[s] | loss 0.01
| epoch 25 |  iter 61 / 351 | time 3[s] | loss 0.01
| epoch 25 |  iter 81 / 351 | time 5[s] | loss 0.01
| epoch 25 |  iter 101 / 351 | time 6[s] | loss 0.01
| epoch 25 |  iter 121 / 351 | time 7[s] | loss 0.01
| epoch 25 |  iter 141 / 351 | time 9[s] | loss 0.01
| epoch 25 |  iter 161 / 351 | time 10[s] | loss 0.01
| epoch 25 |  iter 181 / 351 | time 11[s] | loss 0.01
| epoch 25 |  iter 201 / 351 | time 12[s] | loss 0.01
| epoch 25 |  iter 221 / 351 | time 14[s] | loss 0.01
| epoch 25 |  iter 241 / 351 | time 15[s] | loss 0.01
| epoch 25 |  iter 261 / 351 | time 16[s] | loss 0.01
| epoch 25 |  iter 281 / 351 | time 17[s] | loss 0.01
| epoch 25 |  iter 301 / 351 | time 19[s] | loss 0.01
| epoch 25 |  iter 321 / 351 | time 20[s] | loss 0.01
| epoch 25 |  iter 341 / 351 | time 21[s] | loss 0.01
Q   58+77
T 162 
☑ 162 
---
Q 461+579
T 1139
☑ 1139
---
Q  48+285
T 666 
☑ 666 
---
Q   551+8
T 163 
☑ 163 
---
Q  55+763
T 422 
☑ 422 
---
Q 752+006
T 857 
☑ 857 
---
Q 292+167
T 1053
☑ 1053
---
Q 795+038
T 1427
☑ 1427
---
Q  838+62
T 864 
☑ 864 
---
Q  39+341
T 236 
☑ 236 
---
val acc 97.760%

%python3
plt.ylim(0, 1)
plt.plot(acc_list_baseline)
plt.plot(acc_list_reversed)
plt.plot(acc_list_peeky)
plt.legend(labels=['baseline', 'reversed input', 'peeky'])
plt.show()

  • 最初の正解は遅い気がする
  • が、正解しだすと一気に賢くなる

%md