2018-07-30-多元回归问题

2018-07-30 | 阅读：次

基于numpy库concatenate是一个非常好用的数组操作函数。参考：https://blog.csdn.net/brucewong0516/article/details/79158758

Parameters参数

传入的参数必须是一个多个数组的元组或者列表
另外需要指定拼接的方向，默认是 axis = 0，也就是说对0轴的数组对象进行纵向的拼接（纵向的拼接沿着axis= 1方向）；注：一般axis = 0，就是对该轴向的数组进行操作，操作方向是另外一个轴，即axis=1。

In [23]: a = np.array([[1, 2], [3, 4]])

In [24]: b = np.array([[5, 6]])

In [25]: np.concatenate((a, b), axis=0)
Out[25]:
array([[1, 2],
       [3, 4],
       [5, 6]])

传入的数组必须具有相同的形状，这里的相同的形状可以满足在拼接方向axis轴上数组间的形状一致即可

如果对数组对象进行 axis= 1 轴的拼接，方向是横向0轴，a是一个22维数组，axis= 0轴为2，b是一个12维数组，axis= 0 是1，两者的形状不等，这时会报错

将b进行转置，得到b为2*1维数组：

In [28]: np.concatenate((a,b.T),axis = 1)
Out[28]:
array([[1, 2, 5],
       [3, 4, 6]])

numpy.power(x1, x2);参考：https://blog.csdn.net/lql0716/article/details/52910812
数组的元素分别求n次方。x2可以是数字，也可以是数组，但是x1和x2的列数要相同。

>>> x1 = range(6)
>>> x1
[0, 1, 2, 3, 4, 5]
>>> np.power(x1, 3)
array([  0,   1,   8,  27,  64, 125])
>>> x2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0]
>>> np.power(x1, x2)
array([  0.,   1.,   8.,  27.,  16.,   5.])
>>> x2 = np.array([[1, 2, 3, 3, 2, 1], [1, 2, 3, 3, 2, 1]])
>>> x2
array([[1, 2, 3, 3, 2, 1],
       [1, 2, 3, 3, 2, 1]])
>>> np.power(x1, x2)
array([[ 0,  1,  8, 27, 16,  5],
       [ 0,  1,  8, 27, 16,  5]])

例1：温度预测normal

# coding: utf-8
# linear_regression/test_temperature_normal.py
import regression
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    X, y = regression.loadDataSet('data/temperature.txt');

    m,n = X.shape
    X = np.concatenate((np.ones((m,1)), X), axis=1)
    """
        rate 学习率
        maxLoop 最大迭代次数
        epsilon 收敛精度
    """
    rate = 0.0001
    maxLoop = 1000
    epsilon =0.01

    result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)

    theta, errors, thetas = result

    # 绘制拟合曲线
    fittingFig = plt.figure()
    title = 'bgd: rate=%.3f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])

    xCopy = X.copy()
    #？
    xCopy.sort(0)
    yHat = xCopy*theta
    fittingLine, = ax.plot(xCopy[:,1], yHat, color='g')

    ax.set_xlabel('temperature')
    ax.set_ylabel('yield')

    plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])
    plt.show()

    # 绘制误差曲线
    errorsFig = plt.figure()
    ax = errorsFig.add_subplot(111)
    ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))

    ax.plot(range(len(errors)), errors)
    ax.set_xlabel('Number of iterations')
    ax.set_ylabel('Cost J')

    plt.show()

例2：温度预测多元

# coding: utf-8
# linear_regression/test_temperature_polynomial.py
import regression
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

if __name__ == "__main__":
    srcX, y = regression.loadDataSet('data/temperature.txt');
    #返回行和列
    m,n = srcX.shape
    #np.concatenate拼接；np.power()平方
    srcX = np.concatenate((srcX[:, 0], np.power(srcX[:, 0], 2)), axis=1)
    # 特征缩放
    # 标准化
    X = regression.standardize(srcX.copy())
    # 连接；np.ones((m,1) m行n列，元素值为1
    X = np.concatenate((np.ones((m,1)), X), axis=1)
    #学习率
    rate = 0.1
    #迭代次数
    maxLoop = 1000

    epsilon = 0.01

    result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)
    theta, errors, thetas = result

    # 打印特征点
    fittingFig = plt.figure()
    title = 'polynomial with bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)
    ax = fittingFig.add_subplot(111, title=title)
    trainingSet = ax.scatter(srcX[:, 0].flatten().A[0], y[:, 0].flatten().A[0])

    print(theta)

    # 打印拟合曲线
    xx = np.linspace(50,100,50)
    xx2 = np.power(xx,2)
    yHat = []
    for i in range(50):
        normalizedSize = (xx[i]-xx.mean())/xx.std(0)
        normalizedSize2 = (xx2[i]-xx2.mean())/xx2.std(0)
        x = np.matrix([[1,normalizedSize, normalizedSize2]])
        yHat.append(regression.h(theta, x.T))
    fittingLine, = ax.plot(xx, yHat, color='g')

    ax.set_xlabel('temperature')
    ax.set_ylabel('yield')

    plt.legend([trainingSet, fittingLine], ['Training Set', 'Polynomial Regression'])
    plt.show()

    # 打印误差曲线
    errorsFig = plt.figure()
    ax = errorsFig.add_subplot(111)
    ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))

    ax.plot(range(len(errors)), errors)
    ax.set_xlabel('Number of iterations')
    ax.set_ylabel('Cost J')

    plt.show()

stone

Linxia Yao

2018-07-30-多元回归问题