内容
隐藏
https://www.heywhale.com/mw/project/5da16a37037db3002d441810
https://zhuanlan.zhihu.com/p/151408238
python code
import numpy as np
# numpy负责大维度的矩阵和数组运算
import pandas as pd
# 数据结构分析
import matplotlib.pyplot as plt
# 绘画api
path = 'Coursera-ML-AndrewNg-Notes-master/code/ex1-linear regression/ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.insert(0, 'Ones', 1) # theta为0,ones表示第一列,1表示X0=1
# header表头
# 读取前n行shave
#data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
# scatter绘制散点图
# plt.show()
# 初始化X和y,X是数据集矩阵,shape返回[行、列]
cols = data.shape[1]
X = data.iloc[:,:-1] # X删除矩阵最后一列,前面几列都属于Xi
y = data.iloc[:,cols-1:cols] # y是最后一列
X = np.matrix(X.values)
y = np.matrix(y.values)
# X,y进行类型转换
theta = np.matrix(np.array([0,0]))
# theta列为2,与Xi的数量保持一致
# 代价函数最小化
def computeCost(X,y,theta):
# .T用于转置
# [X1 X2] * [theta1;theta2]=theta1*X1 + theta2*X2
#
inner = np.power(((X * theta.T) - y),2)
return np.sum(inner) / (2 * len(X))
# 梯度下降 iters为迭代次数
def gradientDescent(X, y, theta, alpha, iters):
temp = np.matrix(np.zeros(theta.shape))
# 保存theta维度(1,2),但是theta是变量,初始化
parameters = int(theta.ravel().shape[1])
# ravel将多维数组变成一维数组,并获得theta数量
cost = np.zeros(iters)
# 先计算,在赋值
for i in range(iters):
# 实际差异 = h(x)-y
error = (X * theta.T) - y
# j = theta的数量
for j in range(parameters):
term = np.multiply(error, X[:,j])
# multply是对应元素相乘,在线性中相当于偏导
# 梯度下降(导数拟合)
temp[0:j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))
theta = temp
# 赋值,记录了每一次的值
# 根据每一次的theta,去计算代价,得到方差,记录是为了判断cost内数值是否收敛
cost[i] = computeCost(X, y, theta)
return theta, cost
alpha = 0.01
iters = 1500
g, cost = gradientDescent(X, y, theta, alpha, iters)
x = np.linspace(data.Population.min(), data.Population.max(), 100)
# 一个包含100个元素的等间距数组x
f = g[0, 0] + (g[0, 1] * x)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

调试过程中查看X,y的值会比较好理解
函数


主要是这个J(theta)函数中theta是变量,在python函数中的实现和循环,以及线性偏导的实现需要注意学习和理解
Comments | NOTHING