chainer-eve
An Eve optimizer implementation in Chainer. See https://arxiv.org/abs/1611.01505v3
Results
Eve (python train_mnist.py -g 0 --noplot
):
GPU: 0
# unit: 1000
# Minibatch-size: 100
# epoch: 20
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time lr d_tilde f
1 0.192833 0.0895319 0.942116 0.971 11.7518 0.000717226 0.936704 0.0903957
2 0.0674112 0.0852023 0.978349 0.9729 14.278 0.000680821 1.22801 0.037441
3 0.0345352 0.0543691 0.989148 0.9824 16.794 0.000518859 1.76098 0.0136122
4 0.0162165 0.059911 0.995082 0.9823 19.3123 0.000377031 2.52929 0.0187974
5 0.00777608 0.0536649 0.998099 0.9855 21.8096 0.000348165 2.7999 0.00704089
6 0.00429833 0.0578413 0.998933 0.9848 24.4582 0.000302475 3.26066 0.0426459
7 0.00338734 0.0688266 0.9991 0.9825 27.0248 0.00029957 3.31304 0.00327397
8 0.00296734 0.0658664 0.99915 0.9827 29.53 0.000274632 3.62626 0.00115538
9 0.00325209 0.0634611 0.9991 0.9849 32.0536 0.000258712 3.85659 0.000464668
10 0.000825565 0.0625149 0.999933 0.9864 34.5527 0.00025892 3.85742 7.86829e-05
11 0.000624758 0.0642727 0.9999 0.986 37.0814 0.00027492 3.63496 0.000105081
12 0.00577406 0.0750651 0.997999 0.9837 39.6146 0.000241942 4.13169 0.000458369
13 0.00321643 0.0686685 0.998983 0.9845 42.1558 0.000228902 4.36779 9.47857e-05
14 0.000467157 0.0661637 0.999933 0.986 44.6661 0.000242063 4.1307 8.46958e-05
15 0.000369986 0.0698587 0.999933 0.9854 47.248 0.000253236 3.94864 0.000226092
16 0.000150206 0.0655219 0.999983 0.9864 49.7427 0.000281945 3.54667 3.27778e-05
17 0.000162297 0.0734155 0.999967 0.9864 52.2742 0.000299217 3.34199 0.0001577
18 0.0115315 0.0659616 0.9968 0.9847 54.777 0.000240933 4.15049 0.000924358
19 0.00045691 0.0709715 0.999983 0.9852 57.2734 0.000239325 4.17839 0.000353003
20 0.000153304 0.068501 0.999983 0.986 59.7914 0.000263265 3.79844 7.4482e-06
Adam (python train_mnist.py -g 0 --noplot --opt Adam
):
GPU: 0
# unit: 1000
# Minibatch-size: 100
# epoch: 20
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time lr d_tilde f
1 0.194638 0.0945798 0.94185 0.9701 10.8974 0.000671828
2 0.0723721 0.0831307 0.977699 0.9746 13.3633 0.000836054
3 0.0480908 0.0951917 0.985065 0.9713 15.8534 0.000913701
4 0.0362504 0.0869371 0.988315 0.974 18.308 0.00095362
5 0.0281614 0.0787104 0.990698 0.9809 20.7644 0.000974827
6 0.023696 0.0860714 0.992248 0.9785 23.2302 0.000986268
7 0.0212671 0.0768599 0.993182 0.9808 25.6866 0.00099249
8 0.0163347 0.0968411 0.994965 0.9768 28.1884 0.000995887
9 0.0195261 0.0759767 0.993699 0.9812 30.6438 0.000997745
10 0.0123887 0.0825914 0.996032 0.9806 33.1139 0.000998764
11 0.0156228 0.0821682 0.995099 0.9828 35.6785 0.000999322
12 0.0134235 0.0771391 0.996066 0.9831 38.1389 0.000999628
13 0.0118061 0.107356 0.996182 0.978 40.634 0.000999796
14 0.0130808 0.092472 0.995798 0.9821 43.1157 0.000999888
15 0.00811215 0.0897111 0.997332 0.9823 45.5749 0.000999939
16 0.00924951 0.0950187 0.997099 0.9818 48.1076 0.000999966
17 0.0106534 0.0962093 0.996849 0.9827 50.569 0.000999982
18 0.00851079 0.100032 0.997649 0.9826 53.0719 0.00099999
19 0.00668295 0.121721 0.997916 0.9789 55.5379 0.000999994
20 0.0142575 0.113598 0.996066 0.9819 58.0259 0.000999997