实施梯度下降

我正在尝试在Go中实现梯度下降。我的目标是根据行驶里程来预测汽车的成本。 这是我的数据集:

km,price
240000,3650
139800,3800
150500,4400
185530,4450
176000,5250
114800,5350
166800,5800
89000,5990
144500,5999
84000,6200
82029,6390
63060,6390
74000,6600
97500,6800
67000,6800
76025,6900
48235,6900
93000,6990
60949,7490
65674,7555
54000,7990
68500,7990
22899,7990
61789,8290

我尝试了各种方法,例如规范化数据集,不规范化数据集,保留thetas不变,对thetas进行非规范化……但是我无法获得正确的结果。 我的数学一定不在某个地方,但我不知道在哪里。 我想要得到的结果应该是大约t0 = 8500,t1 = -0.02 我的实现如下:

package main

import (
    "encoding/csv"
    "fmt"
    "log"
    "math"
    "os"
    "strconv"
)

const (
    dataFile     = "data.csv"
    iterations   = 20000
    learningRate = 0.1
)

type dataSet [][]float64

var minKm,maxKm,minPrice,maxPrice float64

func (d dataSet) getExtremes(column int) (float64,float64) {

    min := math.Inf(1)
    max := math.Inf(-1)
    for _,row := range d {
        item := row[column]
        if item > max {
            max = item
        }
        if item < min {
            min = item
        }
    }

    return min,max
}

func normalizeItem(item,min,max float64) float64 {

    return (item - min) / (max - min)
}

func (d *dataSet) normalize() {

    minKm,maxKm = d.getExtremes(0)
    minPrice,maxPrice = d.getExtremes(1)
    for _,row := range *d {
        row[0],row[1] = normalizeItem(row[0],minKm,maxKm),normalizeItem(row[1],maxPrice)
    }
}

func processEntry(entry []string) []float64 {

    if len(entry) != 2 {
        log.Fatalln("expected two fields")
    }
    km,err := strconv.ParseFloat(entry[0],64)
    if err != nil {
        log.Fatalln(err)
    }
    price,err := strconv.ParseFloat(entry[1],64)
    if err != nil {
        log.Fatalln(err)
    }
    return []float64{km,price}
}

func getData() dataSet {

    file,err := os.Open(dataFile)
    if err != nil {
        log.Fatalln(err)
    }
    reader := csv.NewReader(file)
    entries,err := reader.ReadAll()
    if err != nil {
        log.Fatalln(err)
    }
    entries = entries[1:]

    data := make(dataSet,len(entries))
    for k,entry := range entries {
        data[k] = processEntry(entry)
    }
    return data
}

func outputResult(theta0,theta1 float64) {
    file,err := os.OpenFile("weights.csv",os.O_WRONLY,0644)
    if err != nil {
        log.Fatalln(err)
    }
    defer file.Close()
    file.Truncate(0)
    file.Seek(0,0)
    file.WriteString(fmt.Sprintf("theta0,%.6f\ntheta1,%.6f\n",theta0,theta1))
}

func estimatePrice(theta0,theta1,mileage float64) float64 {

    return theta0 + theta1*mileage
}

func (d dataSet) computeThetas(theta0,theta1 float64) (float64,float64) {

    dataSize := float64(len(d))
    t0sum,t1sum := 0.0,0.0
    for _,it := range d {
        mileage := it[0]
        price := it[1]
        err := estimatePrice(theta0,mileage) - price
        t0sum += err
        t1sum += err * mileage
    }

    return theta0 - (t0sum / dataSize * learningRate),theta1 - (t1sum / dataSize * learningRate)
}

func denormalize(theta,max float64) float64 {

    return theta*(max-min) + min
}

func main() {

    data := getData()
    data.normalize()
    theta0,theta1 := 0.0,0.0
    for k := 0; k < iterations; k++ {
        theta0,theta1 = data.computeThetas(theta0,theta1)
    }
    theta0 = denormalize(theta0,maxKm)
    theta1 = denormalize(theta1,maxPrice)
    outputResult(theta0,theta1)
}

要正确实施梯度下降,我应该解决什么?

lcflhc 回答:实施梯度下降

Linear Regression非常简单:

// yi = alpha + beta*xi + ei
func linearRegression(x,y []float64) (float64,float64) {
    EX := expected(x)
    EY := expected(y)
    EXY := expectedXY(x,y)
    EXX := expectedXY(x,x)

    covariance := EXY - EX*EY
    variance := EXX - EX*EX
    beta := covariance / variance
    alpha := EY - beta*EX
    return alpha,beta
}

尝试here,输出:

8499.599649933218 -0.021448963591702314 396270.87871142407

代码:

package main

import (
    "encoding/csv"
    "fmt"
    "strconv"
    "strings"
)

func main() {
    x,y := readXY(`data.csv`)
    alpha,beta := linearRegression(x,y)
    fmt.Println(alpha,beta,-alpha/beta) // 8499.599649933218 -0.021448963591702314 396270.87871142407
}

// https://en.wikipedia.org/wiki/Ordinary_least_squares#Simple_linear_regression_model
// yi = alpha + beta*xi + ei
func linearRegression(x,beta
}

// E[X]
func expected(x []float64) float64 {
    sum := 0.0
    for _,v := range x {
        sum += v
    }
    return sum / float64(len(x))
}

// E[XY]
func expectedXY(x,y []float64) float64 {
    sum := 0.0
    for i,v := range x {
        sum += v * y[i]
    }
    return sum / float64(len(x))
}

func readXY(filename string) ([]float64,[]float64) {
    // file,err := os.Open(filename)
    // if err != nil {
    //  panic(err)
    // }
    // defer file.Close()
    file := strings.NewReader(data)

    reader := csv.NewReader(file)
    records,err := reader.ReadAll()
    if err != nil {
        panic(err)
    }
    records = records[1:]
    size := len(records)
    x := make([]float64,size)
    y := make([]float64,size)
    for i,v := range records {
        val,err := strconv.ParseFloat(v[0],64)
        if err != nil {
            panic(err)
        }
        x[i] = val
        val,err = strconv.ParseFloat(v[1],64)
        if err != nil {
            panic(err)
        }
        y[i] = val
    }
    return x,y
}

var data = `km,price
240000,3650
139800,3800
150500,4400
185530,4450
176000,5250
114800,5350
166800,5800
89000,5990
144500,5999
84000,6200
82029,6390
63060,6390
74000,6600
97500,6800
67000,6800
76025,6900
48235,6900
93000,6990
60949,7490
65674,7555
54000,7990
68500,7990
22899,7990
61789,8290`

Gradient descent基于以下观察结果:如果定义了多变量函数F(x)并且在点a的附近是可微的,则F(x)最快下降从aFa-∇F(a)的负梯度方向移动,例如:

// F(x)
f := func(x float64) float64 {
    return alpha + beta*x // write your target function here
}

微分函数:

h := 0.000001
// Derivative function ∇F(x)
df := func(x float64) float64 {
    return (f(x+h) - f(x-h)) / (2 * h) // write your target function derivative here
}

搜索:

minimunAt := 1.0       // We start the search here
gamma := 0.01          // Step size multiplier
precision := 0.0000001 // Desired precision of result
max := 100000          // Maximum number of iterations
currentX := 0.0
step := 0.0
for i := 0; i < max; i++ {
    currentX = minimunAt
    minimunAt = currentX - gamma*df(currentX)
    step = minimunAt - currentX
    if math.Abs(step) <= precision {
        break
    }
}

fmt.Printf("Minimum at %.8f value: %v\n",minimunAt,f(minimunAt))
本文链接:https://www.f2er.com/3111761.html

大家都在问