线性回归-wnd exp1

发布于 2023-07-14  35 次阅读


https://www.heywhale.com/mw/project/5da16a37037db3002d441810

https://zhuanlan.zhihu.com/p/151408238

python code

import numpy as np
# numpy负责大维度的矩阵和数组运算
import pandas as pd
# 数据结构分析
import matplotlib.pyplot as plt
# 绘画api

path = 'Coursera-ML-AndrewNg-Notes-master/code/ex1-linear regression/ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.insert(0, 'Ones', 1) # theta为0,ones表示第一列,1表示X0=1
# header表头
# 读取前n行shave
#data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
# scatter绘制散点图
# plt.show()

# 初始化X和y,X是数据集矩阵,shape返回[行、列]
cols = data.shape[1]
X = data.iloc[:,:-1] # X删除矩阵最后一列,前面几列都属于Xi
y = data.iloc[:,cols-1:cols] # y是最后一列

X = np.matrix(X.values)
y = np.matrix(y.values)
# X,y进行类型转换
theta = np.matrix(np.array([0,0]))
# theta列为2,与Xi的数量保持一致


# 代价函数最小化
def computeCost(X,y,theta):
    # .T用于转置
    # [X1 X2] * [theta1;theta2]=theta1*X1 + theta2*X2
    #
    inner = np.power(((X * theta.T) - y),2)
    return np.sum(inner) / (2 * len(X))




# 梯度下降 iters为迭代次数
def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))
    # 保存theta维度(1,2),但是theta是变量,初始化
    parameters = int(theta.ravel().shape[1])
    # ravel将多维数组变成一维数组,并获得theta数量
    cost = np.zeros(iters)

    # 先计算,在赋值
    for i in range(iters):

        # 实际差异 = h(x)-y
        error = (X * theta.T) - y

        # j = theta的数量
        for j in range(parameters):
            term = np.multiply(error, X[:,j])
            # multply是对应元素相乘,在线性中相当于偏导
            # 梯度下降(导数拟合)
            temp[0:j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))

        theta = temp
        # 赋值,记录了每一次的值
        # 根据每一次的theta,去计算代价,得到方差,记录是为了判断cost内数值是否收敛
        cost[i] = computeCost(X, y, theta)

    return theta, cost

alpha = 0.01
iters = 1500
g, cost = gradientDescent(X, y, theta, alpha, iters)


x = np.linspace(data.Population.min(), data.Population.max(), 100)
# 一个包含100个元素的等间距数组x
f = g[0, 0] + (g[0, 1] * x)

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

调试过程中查看X,y的值会比较好理解

函数

梯度下降
代价函数

主要是这个J(theta)函数中theta是变量,在python函数中的实现和循环,以及线性偏导的实现需要注意学习和理解


间桐桜のお菓子屋さん