An unofficial implementation of Yogi optimizer in Chainer. See https://papers.nips.cc/paper/8186-adaptive-methods-for-nonconvex-optimization
Tested with chainer==5.0.0
and cupy==5.0.0
.
GPU: -1
# unit: 1000
# Minibatch-size: 100
# epoch: 20
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time
1 0.242103 0.151409 0.927 0.9557 16.0243
2 0.122487 0.119252 0.963483 0.963 38.1836
3 0.0926812 0.123334 0.97185 0.9656 63.7568
4 0.0745619 0.0973673 0.977933 0.9736 89.2653
5 0.0595758 0.112333 0.981867 0.972 116.109
6 0.0509271 0.0891344 0.98565 0.979 144.411
7 0.0391829 0.14601 0.98885 0.9711 179.207
8 0.029289 0.113656 0.991233 0.9768 217.336
9 0.0304781 0.123479 0.991117 0.9782 248.627
10 0.0239718 0.112454 0.993283 0.9796 289.354
11 0.0187226 0.105253 0.994433 0.983 326.473
12 0.0130421 0.125143 0.996467 0.9811 360.022
13 0.0191724 0.11144 0.994533 0.9816 401.037
14 0.0128045 0.103074 0.996567 0.9817 434.245
15 0.0117057 0.123512 0.996817 0.9792 470.056
16 0.0141359 0.124369 0.9962 0.9813 503.898
17 0.00920259 0.136098 0.9973 0.9808 538.22
18 0.00989504 0.145706 0.997533 0.98 569.842
19 0.0157152 0.132761 0.996767 0.9801 601.024
20 0.00798462 0.132843 0.997817 0.9812 646.47