多元函数求极值的方法:最速下降法、牛顿法、共轭梯度法、拉格朗日乘数法(python代码)

2025-12-21 15:34:58

数值优化方法:

在最优化中,经典的无约束最优化方法有:最速下降法、牛顿法和共轭梯度法等;多元函数有约束条件的极值的求法有拉格朗日乘数法等。

对于以上几种优化方法,网上有很多博客和论文资料对其进行了解释,在此不再对具体的内容进行讲解,本文主要参考以下几篇博主的文章:

最速下降法、牛顿法、共轭梯度法多元函数条件极值的求法 拉格朗日乘数法最速下降和Newton法:Matlab实现无约束优化问题 — 最速下降法

本文主要参考以上博主文章,编写python代码,对经典的无约束最优化方法学习,以提高自身编程能力和算法知识水平,若有错误还望大家指出并批评指正,谢谢!

1、最速下降法:

最速下降法(Steepest descent)是梯度下降法的一种更具体实现形式,主要是在每次迭代中计算出更合适的步长 ,步长动态变化,使得目标函数值每次迭代时,能够得到最大程度的减少。最速下降法的计算量不大且是收敛的,但是收敛速度慢,特别是当迭代点接近最优点时,每次迭代行进的距离越来越短。

import matplotlib.pyplot as plt

from sympy import *

import numpy as np

'''===================最速下降法(The steepest descent method)==================='''

# 定义符号变量

x_1 = symbols('x_1')

x_2 = symbols('x_2')

# 定义目标函数

# fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2

fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2

# fun = x_1 ** 2 + x_2 ** 2

# 画三维图

figure = plt.figure()

axes = plt.axes(projection='3d')

X = np.arange(-10, 10, 0.25)

Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围

X, Y = np.meshgrid(X, Y)

# Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面

Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y

# Z = X ** 2 + Y ** 2

axes.plot_surface(X, Y, Z, cmap='rainbow')

axes.set_xlabel('X')

axes.set_ylabel('Y')

axes.set_zlabel('Z')

axes.set_title('Gradient Descent Method')

# 对x_1和x_2求导

grad_1 = diff(fun, x_1)

grad_2 = diff(fun, x_2)

# 定义参数

MaxIter = 200

epsilon = 0.0001

# 定义迭代初始点

x_1_value = -2

x_2_value = 2

iter_cnt = 0

current_step_size = 100

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (

iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))

while (iter_cnt <= MaxIter and abs(grad_1_value) + abs(grad_2_value) >= epsilon):

# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):

iter_cnt += 1

# 求迭代步长

t = symbols('t')

x_1_updated = x_1_value + grad_1_value * t

x_2_updated = x_2_value + grad_2_value * t

Fun_updated = fun.subs({x_1: x_1_updated, x_2: x_2_updated})

grad_t = diff(Fun_updated, t)

t_value = solve(grad_t, t)[0] # solve grad_t == 0

# update x_1_value and x_2_value

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

x_1_value = (float)(x_1_value + t_value * grad_1_value)

x_2_value = (float)(x_2_value + t_value * grad_2_value)

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

current_step_size = t_value

# 迭代点变化

axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化

plt.pause(0.5)

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (

iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))

# 显示

plt.show()

2、牛顿法:

import matplotlib.pyplot as plt

from sympy import *

import numpy as np

'''===================牛顿法(Newton Method)==================='''

# 定义符号变量

x_1 = symbols('x_1')

x_2 = symbols('x_2')

# 定义目标函数

# fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2

# fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2

# fun = x_1 ** 2 + x_2 ** 2

fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2

# 画三维图

figure = plt.figure()

axes = plt.axes(projection='3d')

X = np.arange(-10, 10, 0.25)

Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围

X, Y = np.meshgrid(X, Y)

# Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面

# Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y

# Z = X ** 2 + Y ** 2

Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2

axes.plot_surface(X, Y, Z, cmap='rainbow')

axes.set_xlabel('X')

axes.set_ylabel('Y')

axes.set_zlabel('Z')

axes.set_title('Newton Method')

# 对x_1和x_2求导

# 一阶导数

grad_1 = diff(fun, x_1)

grad_2 = diff(fun, x_2)

# 二阶导数

grad_12 = diff(grad_1, x_2)

grad_11 = diff(fun, x_1, 2)

grad_22 = diff(fun, x_2, 2)

# 定义参数

MaxIter = 200

epsilon = 0.0001

# 定义迭代初始点

x_1_value = -10

x_2_value = 10

iter_cnt = 0

current_step_size = 100

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

current_obj_last = 1000

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % (

iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size))

while (iter_cnt <= MaxIter and abs(current_obj_last - current_obj) >= epsilon):

# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

iter_cnt += 1

# 构造梯度矩阵

G = np.vstack((grad_1_value, grad_2_value))

# 构造海森矩阵

H = np.vstack(([grad_11_value, grad_12_value],

[grad_12_value, grad_22_value]))

# 牛顿迭代法更新迭代点

z_value = np.vstack((x_1_value, x_2_value)) - np.linalg.inv(H) @ G

x_1_value = (float)(z_value[0])

x_2_value = (float)(z_value[1])

current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

# 迭代点变化

axes.scatter(x_1_value, x_2_value, current_obj_last, c='r', marker='*', linewidths=3) # 画出迭代点的变化

plt.pause(0.5)

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (

iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value))

# 显示

plt.show()

3、共轭梯度法:

import matplotlib.pyplot as plt

from sympy import *

import numpy as np

'''===================共轭梯度法(Conjugate gradient method,CG)==================='''

# 定义符号变量

x_1 = symbols('x_1')

x_2 = symbols('x_2')

# 定义目标函数

# fun = -(2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2)

# fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2

# fun = x_1 ** 2 + x_2 ** 2

fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2

# 画三维图

figure = plt.figure()

axes = plt.axes(projection='3d')

X = np.arange(-10, 10, 0.25)

Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围

X, Y = np.meshgrid(X, Y)

# Z = -(2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面

# Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y

# Z = X ** 2 + Y ** 2

Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2

axes.plot_surface(X, Y, Z, cmap='rainbow')

axes.set_xlabel('X')

axes.set_ylabel('Y')

axes.set_zlabel('Z')

axes.set_title('Conjugate gradient method')

# 对x_1和x_2求导

# 一阶导数

grad_1 = diff(fun, x_1)

grad_2 = diff(fun, x_2)

# 二阶导数

grad_12 = diff(grad_1, x_2)

grad_11 = diff(fun, x_1, 2)

grad_22 = diff(fun, x_2, 2)

# 定义参数

MaxIter = 200

epsilon = 0.0001

# 定义迭代初始点

x_1_value = -2

x_2_value = 2

iter_cnt = 0

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

current_obj_last = 1000

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (

iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value))

while (iter_cnt <= MaxIter and abs(current_obj_last - current_obj) >= epsilon):

# while (abs(grad_1_value) + abs(grad_2_value) >= epsilon):

current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

# 迭代点变化

axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化

plt.pause(0.5)

iter_cnt += 1

# 构造梯度矩阵

G = np.vstack((grad_1_value, grad_2_value))

# 构造海森矩阵

H = np.vstack(([grad_11_value, grad_12_value],

[grad_12_value, grad_22_value]))

# 更新方向

if iter_cnt == 1:

d = np.vstack((grad_1_value, grad_2_value))

else:

d = -G + ((d.T @ H @ G) / (d.T @ H @ d)) * d

# 更新步长

lamda = -(d.T @ G) / (d.T @ H @ d)

# 牛顿迭代法更新迭代点

z_value = np.vstack((x_1_value, x_2_value)) + lamda * d

x_1_value = (float)(z_value[0])

x_2_value = (float)(z_value[1])

current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf()

grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf())

print(

'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % (

iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value))

# 显示

plt.show()

4、拉格朗日乘数法:

from sympy import *

x1, x2, k = symbols('x1,x2,k')

f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2

g = x1 + x2 - 8

# 构造拉格朗日等式

L = f - k * g

# 求导,构造KKT条件

dx1 = diff(L, x1) # 对x1求偏导

dx2 = diff(L, x2) # 对x2求偏导

dk = diff(L, k) # 对k求偏导

# 求出个变量解

m = solve([dx1, dx2, dk], [x1, x2, k])

print('函数求得最值时,变量值为:{}'.format(m))

# {x1: 5, x2: 3, k: -3}

# 给变量重新赋值

x1 = m[x1]

x2 = m[x2]

k = m[k]

# 计算方程的值

f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2

print("方程的极小值为:", f)