Local Extremas and Nesterov Accelerated Gradient

Python. To find a global minimum for a selected function, I compared brute force sweeping with Nesterov Accelerated Gradient (NAG)optimisation technique. By applying a small deltaX increments, I obtained local extremas in the range of selected lower and upper bounds. Then, I obtained the global minimum by iterating a nested NAG function. Finally, I compared iteration counts. I used the function y = x^2 + sin(4x) – 1.5 in the range of [-2, 2] with the deltaXs = 0.001 (brute force) and 0.2 (nested NAG) respectively. The brute force method’s iteration count is 4000 whereas Nesterov Accelerated Gradient’s iteration count is 1137 with its specific parameters.

import math
import numpy as np

def function(x):
    return x**2 + math.sin(4*x) - 1.5

def firstDerivative(x):
    return 2 * x + 4 * math.cos(4*x)

def secondDerivative(x):
    return 2 - 16 * math.sin(4*x)

def printResult(x):
    if secondDerivative(x) > 0:                
        print("The concave up extrema is at:", "(",round(x, 4),",",round(function(x), 4),")")
    elif secondDerivative(x) < 0:
        print("The concave down extrema is at:", "(",round(x, 4),",",round(function(x), 4),")")
        
def localExtremas(deltaX, lowerBound, upperBound):
    x = lowerBound
    isNegativeSlope = firstDerivative(x) < 0
    
    while x < upperBound:       
        if firstDerivative(x) > 0 and isNegativeSlope == True:
            isNegativeSlope = False
            printResult(x)
        elif firstDerivative(x) < 0 and isNegativeSlope == False:
            isNegativeSlope = True
            printResult(x)           
        x += deltaX       

def nesterovOptimizer(x, alpha=0.01, beta=0.9, maxIter=1000, tol=1e-5):
    v = 0   
    count = 0
    
    for i in range(maxIter):
        gradient = firstDerivative(x + beta * v)
        v = beta * v - alpha * gradient
        xNew = x + v
        
        count += 1       
        if np.abs(xNew - x) < tol:
            break    
        
        x = xNew
    
    return x, function(x), count

def globalMinimum(deltaX, lowerBound, upperBound):
    x = lowerBound
    optimalX = lowerBound
    minValue = function(x)
    totalCount = 0
    
    while x < upperBound:       
        newOptimalX, newMinValue, count = nesterovOptimizer(x) 
        
        if newMinValue < minValue:
            minValue = newMinValue
            optimalX = newOptimalX     
             
        x += deltaX
        totalCount += count 
       
    return optimalX, minValue, totalCount

if __name__ == "__main__":
      
    # Determine local extremas with incremental sweep in the range of [-2, 2] with 0.001 deltaX  
    lowerBound = -2.0
    upperBound = 2.0
    deltaX = 0.001
    
    localExtremas(deltaX , lowerBound, upperBound)   
    print("The iteration count for local extramas:", int(np.abs((upperBound - lowerBound)/ deltaX)))

    # Calling Nesterov Accelerated Gradent with 0.2 incremental deltaX advancements
    deltaX = 0.2
    optimalX, minValue, count = globalMinimum(deltaX, lowerBound, upperBound)
    
    print("The global minimum by Nesterov Accelerated Gradient is at:", "(",round(optimalX, 4),",", round(minValue, 4),")")
    print("The iteration count by Nesterov Accelerated Gradient:", count)

[1] On the Importance of Initialization and Momentum in Deep Learning (2013), I. Sutskever, J. Martens, G. Dahl, G. Hinton

[2] Nesterov Accelerated Gradient

[3] Top Optimizers for Neural Networks

[4] Desmos Calculator