赞
踩
Logistic Regression(基本原理分析+python代码实现)
- def sigmoid(inX):
- return 1.0 / (1 + exp(-inX))
(2)、batch gradient descent algorithm,公式(1.3)
- #@param dataMatrix type: array
- #@param classLabels type: list
- #@param weights:alpha's
- def gradAscent(dataMatIn, classLabels):
- dataMatrix = mat(dataMatIn)
- labelMat = mat(classLabels).transpose()
- m, n = shape(dataMatrix)
- alpha = 0.001
- maxCycles = 500
- weights = ones((n, 1))
- for k in range(maxCycles): #最大循环次数
- h = sigmoid(dataMatrix * weights) #矢量
- error = (labelMat - h) #矢量
- weights = weights + alpha * dataMatrix.transpose() * error
- return weights
- #@param dataMatrix type: array
- #@param classLabels type: list
- #@param weights:alpha's
- def stocGradAscent(dataMatrix, classLabels, numIter = 150):
- m, n = shape(dataMatrix)
- weights = ones(n)
- for j in range(numIter): #最大循环次数
- dataIndex = range(m)
- for i in range(m): #更新每个alphas,m为数据组数
- alpha = 4 / (1.0 + j + i) + 0.01
- randIndex = int(random.uniform(0, len(dataIndex))) #选取随机更新alphas
- h = sigmoid(sum(dataMatrix[randIndex] * weights)) #标量
- error = classLabels[randIndex] - h #标量
- weights = weights + alpha * error * dataMatrix[randIndex]
- del(dataIndex[randIndex])
- return weights

- def classifyVector(inX, weights):
- prob = sigmoid(sum(inX*weights))
- if prob > 0.5: return 1.0
- else: return 0.0
- def colicTest():
- frTrain = open('horseColicTraining.txt')
- frTest = open('horseColicTest.txt')
- trainingSet = []; trainingLabels = []
- for line in frTrain.readlines():
- currLine = line.strip().split('\t')
- lineArr = []
- for i in range(21):
- lineArr.append(float(currLine[i]))
- trainingSet.append(lineArr)
- trainingLabels.append(float(currLine[21]))
- trainWeights = stocGradAscent1(array(trainingSet), array(trainingLabels), 500)#通过训练数据得到的alpha's
- # return trainWeights, trainingSet, trainingLabels
- errorCount = 0; numTestVec = 0.0
- for line in frTest.readlines():
- numTestVec += 1.0
- currLine = line.strip().split('\t')
- lineArr = []
- for i in range(21):
- lineArr.append(float(currLine[i]))
- if int(classifyVector(array(lineArr), trainWeights)) != int(currLine[21]):
- errorCount += 1
- errorRate = (float(errorCount) / numTestVec)
- print "the error of this test is: %f" % errorRate
- return errorRate
-
- def multiTest():
- numTests = 10; errorSum = 0.0
- for k in range(numTests):
- errorSum += colicTest()
- print "after %d iterations the average error rate is: %f" % (numTests, errorSum / float(numTests))

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。