Copyright | (c) Justin Le 2019 |
---|---|
License | BSD3 |
Maintainer | justin@jle.im |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
Defining various numeric optimizers. Most of these implemtations are taken directly from http://ruder.io/optimizing-gradient-descent/
Synopsis
- steepestDescent :: LinearInPlace m c a => c -> Grad m r a -> Opto m r a
- newtype Momentum c = Momentum {
- momentumDecay :: c
- momentum :: forall m r a c. LinearInPlace m c a => Momentum c -> c -> Grad m r a -> Opto m r a
- newtype Nesterov c = Nesterov {
- nesterovDecay :: c
- nesterov :: forall m r a c. LinearInPlace m c a => Nesterov c -> c -> Grad m r a -> Opto m r a
- data Adagrad c = Adagrad {
- adagradRate :: c
- adagradEps :: c
- adagrad :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => Adagrad c -> Grad m r a -> Opto m r a
- data Adadelta c = Adadelta {
- adadeltaDecay :: c
- adadeltaEps :: c
- adadelta :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => Adadelta c -> Grad m r a -> Opto m r a
- data RMSProp c = RMSProp {
- rmsPropRate :: c
- rmsPropDecay :: c
- rmsPropEps :: c
- rmsProp :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => RMSProp c -> Grad m r a -> Opto m r a
- data Adam c = Adam {
- adamStep :: !c
- adamDecay1 :: !c
- adamDecay2 :: !c
- adamEps :: !c
- adam :: forall m r a c. (RealFloat c, Floating a, LinearInPlace m c a, Mutable m c) => Adam c -> Grad m r a -> Opto m r a
- data AdaMax c = AdaMax {
- adaMaxStep :: !c
- adaMaxDecay1 :: !c
- adaMaxDecay2 :: !c
- adaMaxEps :: !c
- adaMax :: forall m r a c. (RealFloat c, Metric c a, LinearInPlace m c a, Mutable m c) => AdaMax c -> Grad m r a -> Opto m r a
Documentation
:: LinearInPlace m c a | |
=> c | learning rate |
-> Grad m r a | gradient |
-> Opto m r a |
Steepest descent, acording to some learning rate. The simplest optimizer.
Hyperparameter for momentum
Momentum | |
|
:: LinearInPlace m c a | |
=> Momentum c | configuration |
-> c | learning rate |
-> Grad m r a | gradient |
-> Opto m r a |
Steepest descent with momentum. (Qian, 1999)
Hyperparameter for nesterov
Nesterov | |
|
:: LinearInPlace m c a | |
=> Nesterov c | configuration |
-> c | learning rate |
-> Grad m r a | gradient |
-> Opto m r a |
Nesterov accelerated gradient (NAG) (Nesterov, 1983)
Hyperparameters for adagrad
Adagrad | |
|
adagrad :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => Adagrad c -> Grad m r a -> Opto m r a Source #
Adaptive Gradient (Duchu, Hazan, Singer, 2011). Note that if the state is not reset periodically, updates tend to zero fairly quickly.
Hyperparameters for adadelta
Adadelta | |
|
adadelta :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => Adadelta c -> Grad m r a -> Opto m r a Source #
The Adadelta extension of Adagrad (Zeiler, 2012) that mitigates the decreasing learning rate.
Hyperparameters for rmsProp
RMSProp | |
|
rmsProp :: forall m r a c. (LinearInPlace m c a, Floating a, Real c) => RMSProp c -> Grad m r a -> Opto m r a Source #
RMSProp, as described by Geoff Hinton.
Hyperparameters for adam
Adam | |
|
:: (RealFloat c, Floating a, LinearInPlace m c a, Mutable m c) | |
=> Adam c | configuration |
-> Grad m r a | gradient |
-> Opto m r a |
Adaptive Moment Estimation (Kingma, Ba, 2015)
Hyperparameters for adaMax
AdaMax | |
|