Source code for neograd.nn.optim

import numpy as np
from ..autograd.utils import get_graph


[docs]class Optimizer: '''Base class for all optimizers Parameters: params (list of Param): Params that need to be updated lr (float): Learning rate ''' def __init__(self, params, lr): self.params = params self.lr = lr
[docs] def zero_grad(self, all_members=False): '''Resets the grads of tensors By default, since after loss.backward, only Tensors in memory are the params, only their gradients are reset since everytime a new graph is dynamically created However if retain_graph=True in backward, then all the members in the graph, need to be zero_grad-ed to get the correct gradients, to prevent this all_members can be set to True Args: all_members (bool): If all the members in the graph should be zero_grad-ed. Defaults to False ''' if all_members: graph = get_graph() graph.zero_grad() for param in self.params: # This is done for redundancy, if all_members=True on a graph that's been reset param.zero_grad()
[docs]class GD(Optimizer): '''Vanilla Gradient Descent ''' def __init__(self, params, lr): super().__init__(params, lr)
[docs] def step(self): '''Updates the params ''' for param in self.params: if param.requires_grad: param.data -= (self.lr*param.grad)
def __repr__(self): return f'GD(params={self.params}, lr={self.lr})' def __str__(self): return f'GD(params={self.params}, lr={self.lr})'
[docs]class Momentum(Optimizer): '''Gradient Descent with Momentum https://youtu.be/k8fTYJPd3_I Parameters: beta (float): Value of Beta ''' def __init__(self, params, lr, beta=0.9): super().__init__(params, lr) self.beta = beta self.init_momentum_grads()
[docs] def step(self): '''Updates the params Uses the momentum_grads to update the params ''' self.update_momentum_grads() for param in self.params: if param.requires_grad: param.data -= (self.lr*param.momentum_grad)
[docs] def init_momentum_grads(self): '''Initializes momentum grads For each param, it sets its momentum_grad to 0 ''' for param in self.params: if param.requires_grad: param.momentum_grad = 0
[docs] def update_momentum_grads(self): '''Updates momentum grads ''' for param in self.params: if param.requires_grad: param.momentum_grad = (self.beta*param.momentum_grad) + ((1-self.beta)*param.grad)
def __repr__(self): return f'Momentum(params={self.params}, lr={self.lr}, beta={self.beta})' def __str__(self): return f'Momentum(params={self.params}, lr={self.lr}, beta={self.beta})'
[docs]class RMSProp(Optimizer): '''RMSProp https://youtu.be/_e-LFe_igno Parameters: beta (float): Value of Beta epsilon (float): Value of epsilon ''' def __init__(self, params, lr, beta=0.9, epsilon=1e-8): super().__init__(params, lr) self.beta = beta self.epsilon = epsilon self.init_rms_grads()
[docs] def step(self): '''Updates the params Uses the rms_grads to update the params ''' self.update_rms_grads() for param in self.params: if param.requires_grad: param.data -= (self.lr*(param.grad/(np.sqrt(param.rms_grad) + self.epsilon)))
[docs] def init_rms_grads(self): '''Initializes rms grads For each param, it sets its rms_grad to 0 ''' for param in self.params: if param.requires_grad: param.rms_grad = 0
[docs] def update_rms_grads(self): '''Updates rms grads ''' for param in self.params: if param.requires_grad: param.rms_grad = (self.beta*param.rms_grad) + ((1-self.beta)*np.square(param.grad))
def __repr__(self): return f'RMSProp(params={self.params}, lr={self.lr}, beta={self.beta}, epsilon={self.epsilon})' def __str__(self): return f'RMSProp(params={self.params}, lr={self.lr}, beta={self.beta}, epsilon={self.epsilon})'
[docs]class Adam(Optimizer): '''Adam https://youtu.be/JXQT_vxqwIs Parameters: iter (int): The number of iterations that has occurred, used for bias correction beta1 (float): Value of beta1 beta2 (float): Value of beta2 epsilon (float): Value of epsilon ''' def __init__(self, params, lr, beta1=0.9, beta2=0.999, epsilon=1e-8): super().__init__(params, lr) self.iter = 0 self.beta1, self.beta2 = beta1, beta2 self.epsilon = epsilon self.init_adam_grads()
[docs] def step(self): '''Updates the params Uses the rms_grads and momentum_grads to update the params ''' self.iter+=1 self.update_adam_grads() for param in self.params: if param.requires_grad: bias_corrected_momentum_grad = param.momentum_grad/(1-(self.beta1**self.iter)) bias_corrected_rms_grad = param.rms_grad/(1-(self.beta2**self.iter)) param.data -= (self.lr*(bias_corrected_momentum_grad/(np.sqrt(bias_corrected_rms_grad)+self.epsilon)))
[docs] def init_adam_grads(self): '''Initializes rms grads and momentum grads For each param, it sets its rms_grad to 0, momentum_grad to 0 ''' for param in self.params: if param.requires_grad: param.momentum_grad = 0 param.rms_grad = 0
[docs] def update_adam_grads(self): '''Updates rms grads and momentum_grads ''' for param in self.params: if param.requires_grad: param.momentum_grad = (self.beta1*param.momentum_grad) + ((1-self.beta1)*param.grad) param.rms_grad = (self.beta2*param.rms_grad) + ((1-self.beta2)*np.square(param.grad))
[docs] def reset_iter(self): '''Resets iter to 0 ''' self.iter = 0
def __repr__(self): return f'Adam(params={self.params}, lr={self.lr}, beta1={self.beta1}, beta2={self.beta2}, epsilon={self.epsilon})' def __str__(self): return f'Adam(params={self.params}, lr={self.lr}, beta1={self.beta1}, beta2={self.beta2}, epsilon={self.epsilon})'