线性回归
1.线性回归数学
\begin{aligned} J(\theta)=\frac 1 m \sum^{m}{i=1} \frac 1 2 (h{\theta}(x^{(i)})-y^{(i)})^2 \end{aligned} $$
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 9 08:18:10 2018
@author: [email protected]
"""
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import animation as amat
def loadDataSet(fileName):
"""从文本文件加载数据.
文件内容格式.
Args:
path: 传入文件名称.
Returns:
返回一个参数矩阵x和一个结论列向量y.
"""
dataX=[];dataY=[]
fr=open(fileName)
for line in fr.readlines():
dataArr= []
for item in line.strip().split('\t'):
dataArr.append(float(item))
dataX.append( dataArr[0:-1])
dataY.append( dataArr[-1])
print(dataX)
print(dataY)
return np.array( dataX),np.array(dataY)
def dataNomalize(dataX,dataY):
"""对数据进行归一化处理.
文件内容格式.
Args:
matriX: 参数组成的矩阵x,每行为一组样本个例参数.
vertY: 结论值组成的向量y.
Returns:
返回归一化后的一个参数矩阵x和一个结论列向量y
"""
m,n=np.shape(dataX)
colMaxMin=[dataX.max(axis=0),dataX.min(axis=0)]
for i in range(m):
for j in range(n):
#print (i,j,dataX[i][j])
dataX[i][j]= (dataX[i][j]-colMaxMin[1][j])/colMaxMin[0][j]
minY=dataY.min();maxY=dataY.max()
for i in range(len(dataY)):
dataY[i]=(dataY[i]-minY)/maxY
print(dataX)
print(dataY)
return dataX,dataY
def computeCost(x,y,theta):
m = y.shape[0]
# J = (np.sum((X.dot(theta) - y)**2)) / (2*m)
C = x.dot(theta) - y
J2 = (C.T.dot(C))/ (2*m)
return J2
def batchGradientDescent(dataX,dataY):
matX=np.mat(dataX)
matY=np.mat(dataY).transpose()
m,n=np.shape(matX)
alpha=0.001
theta=np.ones((n,1))
maxTimes=500
for k in range(maxTimes):
theta=theta-(alpha/m)*(np.dot(matX.T,np.dot(matX,theta)-matY))
cost=computeCost(matX,matY,theta)
print(theta,cost)
return theta
def stochastieGradientDeccent():
return 3
def miniBatchGradientDescent():
return 4
if __name__ == '__main__':
dataX,dataY=loadDataSet('house_prize.txt')
dataX,dataY=dataNomalize(dataX,dataY)
batchGradientDescent(dataX,dataY)