吴恩达机器学习作业2（python）_吴恩达机器学习第二次作业

作者：Cpp五条 | 2024-04-03 07:00:40

踩

吴恩达机器学习第二次作业

git 参考（课程+代码+作业）

代码不包括画图部分

逻辑回归

op.minimize 高级算法计算代价最小值

import numpy as np
import scipy.optimize as op
# 逻辑回归，分类问题
# 梯度下降，高级算法求最小代价

def sigmoid(z):
    return 1/(1+np.exp(-z))

def costFunction(theta, x, y):
    m = np.size(y)
    h = sigmoid(x@theta)
    if np.sum(1-h < 1e-10) != 0:
        return np.inf
    # np.sum(1-h < 1e-10) 比较(1-h)的每个元素，结果相加
    # 即(1-h)中小于1e-10的元素个数，判断是否为零
    # np.inf 表示正无穷
    return (-y@np.log(h) - (1-y)@np.log(1-h)) /m

def gradFunction(theta, x, y):
    m = np.size(y)
    h = sigmoid(x@theta)
    grad = x.T@(h - y)/m
    return grad

data = np.loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
Y = data[:, 2]
m, n = np.shape(X) # 获取行列
init_theta = np.zeros(n+1)
X = np.column_stack((np.ones(m), X))

result = op.minimize(costFunction, x0=init_theta, args=(X,Y), method='BFGS', jac=gradFunction)
# minimize(fun, x0, args=(), method=None, jac=None, hess=None,
#              hessp=None, bounds=None, constraints=(), tol=None,
#              callback=None, options=None):
# fun：损失函数，在定义时，theta必须为第一个参数，且其shape必须为(n,)即一维数组
# x0：初始化的theta，其shape必须为shape(n,)即一维数组
# method：默认BFGS, L-BFGS-B, SLSQP中的一种，可选TNC
# jac：梯度计算函数，第一个必须为theta且其shape必须为(n,)即一维数组,返回的梯度也必须为一个一维数组。
# options：用来控制最大的迭代次数，以字典的形式来进行设置，例如：options={‘maxiter’:400}
theta = result.x

#预测给定值45, 85
student = np.array([1, 45, 85])
prob = sigmoid(student.dot(theta))
print('For a student with scores 45 and 85, we predict an admission probability of: ', prob)
#0.7762904731714242

# 预测训练集，查看模型准确度
def predict(theta, x):
    m = np.size(X, 0)
    p = np.zeros((m,))
    pos = np.where(x.dot(theta) >= 0)
    neg = np.where(x.dot(theta) < 0)
    p[pos] = 1
    p[neg] = 0
    # 不用计算sigmoid，直接判断np.exp(-z)是否大于1，即z的正负，z为x@theta
    # np.exp(-z)大于1, z小于0， 结果为negative
    return p
p = predict(theta, X)
print('Train Accuracy: ', np.sum(p == Y)/np.size(Y, 0))
#0.89
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

将训练集映射为多次多项式

训练集的坐标图形为环形，无法用直线分割时：

import numpy as np
import matplotlib.pylab as plt
import scipy.optimize as op

def sigmoid(z):
    return 1/(1+np.exp(-z))

def mapFeature(x1, x2):
    degree = 6 # 将训练集映射为6次多项式
    col = 28  # 7+6+5+4+3+2+1 
    # x1^0 * x2^[0-6]  7
    # x1^1 * x2^[0-5]  6
    # ...              ...
    # x1^6 * x2^[0]    1
    res = np.ones((np.size(x1), col))
    cnt = 0
    for i in range(degree+1):
        for j in range(degree-i+1):
            res[:, cnt] = np.power(x1, i)*np.power(x2, j)
            # print(i,j)
            cnt += 1
    # print(cnt)
    return res

def costFunction(theta, x, y, lam):
    m = np.size(y)
    h = sigmoid(x@theta)
    if np.sum(1-h < 1e-10) != 0:
        return np.inf
    first = (-y@np.log(h) - (1-y)@np.log(1-h)) /m
    second = lam/2/m * theta[1:]@theta[1:]
    # theta_0不需要正则化
    return first + second

def gradFunction(theta, x, y, lam):
    m = np.size(y)
    h = sigmoid(x@theta)
    grad = x.T@(h - y)/m
    grad[1:] += lam/m * theta[1:]
    # theta_0不需要正则化
    return grad

data = np.loadtxt('ex2data2.txt', delimiter=',')
X = data[:, 0:2]
Y = data[:, 2]
lamd = 1
X = mapFeature(X[:, 0], X[:, 1])
m, n = np.shape(X) # 获取行列
init_theta = np.zeros(n)

result = op.minimize(costFunction, x0=init_theta, args=(X,Y,lamd), method='BFGS', jac=gradFunction)
theta = result.x

# 预测给定值
def predict(theta, x):
    m = np.size(X, 0)
    p = np.zeros((m,))
    pos = np.where(x.dot(theta) >= 0)
    neg = np.where(x.dot(theta) < 0)
    p[pos] = 1
    p[neg] = 0
    return p

p = predict(theta, X)
print('Train Accuracy: ', np.sum(p == Y)/np.size(Y, 0))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/Cpp五条/article/detail/354688?site