我正在尝试在Go中实现梯度下降。我的目标是根据行驶里程来预测汽车的成本。 这是我的数据集:
km,price
240000,3650
139800,3800
150500,4400
185530,4450
176000,5250
114800,5350
166800,5800
89000,5990
144500,5999
84000,6200
82029,6390
63060,6390
74000,6600
97500,6800
67000,6800
76025,6900
48235,6900
93000,6990
60949,7490
65674,7555
54000,7990
68500,7990
22899,7990
61789,8290
我尝试了各种方法,例如规范化数据集,不规范化数据集,保留thetas不变,对thetas进行非规范化……但是我无法获得正确的结果。 我的数学一定不在某个地方,但我不知道在哪里。 我想要得到的结果应该是大约t0 = 8500,t1 = -0.02 我的实现如下:
package main
import (
"encoding/csv"
"fmt"
"log"
"math"
"os"
"strconv"
)
const (
dataFile = "data.csv"
iterations = 20000
learningRate = 0.1
)
type dataSet [][]float64
var minKm,maxKm,minPrice,maxPrice float64
func (d dataSet) getExtremes(column int) (float64,float64) {
min := math.Inf(1)
max := math.Inf(-1)
for _,row := range d {
item := row[column]
if item > max {
max = item
}
if item < min {
min = item
}
}
return min,max
}
func normalizeItem(item,min,max float64) float64 {
return (item - min) / (max - min)
}
func (d *dataSet) normalize() {
minKm,maxKm = d.getExtremes(0)
minPrice,maxPrice = d.getExtremes(1)
for _,row := range *d {
row[0],row[1] = normalizeItem(row[0],minKm,maxKm),normalizeItem(row[1],maxPrice)
}
}
func processEntry(entry []string) []float64 {
if len(entry) != 2 {
log.Fatalln("expected two fields")
}
km,err := strconv.ParseFloat(entry[0],64)
if err != nil {
log.Fatalln(err)
}
price,err := strconv.ParseFloat(entry[1],64)
if err != nil {
log.Fatalln(err)
}
return []float64{km,price}
}
func getData() dataSet {
file,err := os.Open(dataFile)
if err != nil {
log.Fatalln(err)
}
reader := csv.NewReader(file)
entries,err := reader.ReadAll()
if err != nil {
log.Fatalln(err)
}
entries = entries[1:]
data := make(dataSet,len(entries))
for k,entry := range entries {
data[k] = processEntry(entry)
}
return data
}
func outputResult(theta0,theta1 float64) {
file,err := os.OpenFile("weights.csv",os.O_WRONLY,0644)
if err != nil {
log.Fatalln(err)
}
defer file.Close()
file.Truncate(0)
file.Seek(0,0)
file.WriteString(fmt.Sprintf("theta0,%.6f\ntheta1,%.6f\n",theta0,theta1))
}
func estimatePrice(theta0,theta1,mileage float64) float64 {
return theta0 + theta1*mileage
}
func (d dataSet) computeThetas(theta0,theta1 float64) (float64,float64) {
dataSize := float64(len(d))
t0sum,t1sum := 0.0,0.0
for _,it := range d {
mileage := it[0]
price := it[1]
err := estimatePrice(theta0,mileage) - price
t0sum += err
t1sum += err * mileage
}
return theta0 - (t0sum / dataSize * learningRate),theta1 - (t1sum / dataSize * learningRate)
}
func denormalize(theta,max float64) float64 {
return theta*(max-min) + min
}
func main() {
data := getData()
data.normalize()
theta0,theta1 := 0.0,0.0
for k := 0; k < iterations; k++ {
theta0,theta1 = data.computeThetas(theta0,theta1)
}
theta0 = denormalize(theta0,maxKm)
theta1 = denormalize(theta1,maxPrice)
outputResult(theta0,theta1)
}
要正确实施梯度下降,我应该解决什么?