NaN by Matrix Factorization

我使用SGD算法实现了矩阵分解，但是在运行预测矩阵时经常得到NaN。当我在非常小的（6 x 7）矩阵上运行算法时，错误出现的次数很少。当我移至“电影镜头”数据集时，每次运行算法时，所有单元格中都会出现错误。只有在某些单元格中错误消失的唯一时间是当我将优化步骤（迭代次数）设置为1时。

    private static Matrix matrixFactorizationLarge (Matrix realRatingMatrix,Matrix factor_1,Matrix factor_2)
    {
        int features = (int) factor_1.getcolumnCount();
        double learningRate = 0.02;
        double regularization = 0.02;
        int optimizationSteps = 10;
        Matrix predictedRatingMatrix = SparseMatrix.Factory.zeros(realRatingMatrix.getRowCount(),realRatingMatrix.getcolumnCount());

        for (int step = 0; step < optimizationSteps; step++)
        {   
            for (int row = 0; row < predictedRatingMatrix.getRowCount(); row++)
            {
                for (int col = 0; col < predictedRatingMatrix.getcolumnCount(); col++)
                {
                    if (realRatingMatrix.getasInt(row,col) > 0)
                    {
                        Matrix vector_1 = getRow(factor_1,row);
                        Matrix vector_2 = getcolumn(factor_2,col);
                        predictedRatingMatrix.setasDouble( ( Math.floor ( dotProduct(vector_1,vector_2) * 100 ) ) / 100,row,col);

                        for (int f = 0; f < features; f++)
                        {
                            factor_1.setasDouble( ( Math.floor ( ( factor_1.getasDouble(row,f) + ( learningRate * ( ( calculateDerivative(realRatingMatrix.getasDouble(row,col),predictedRatingMatrix.getasDouble(row,factor_2.getasDouble(f,col) ) ) - ( regularization * factor_1.getasDouble(row,f) ) ) ) ) * 100 ) / 100),f); 

                            factor_2.setasDouble( ( Math.floor ( ( factor_2.getasDouble(f,col) + ( learningRate * ( ( calculateDerivative(realRatingMatrix.getasDouble(row,factor_1.getasDouble(row,f) ) ) - ( regularization * factor_2.getasDouble(f,col) ) ) ) ) * 100 ) / 100),f,col); 
                        }
                    }
                }
            }
        }

        return predictedRatingMatrix;
    }

相关方法如下：


    private static double dotProduct (Matrix vector_A,Matrix vector_B)
    {
        double dotProduct = 0.0;

        for (int index = 0; index < vector_A.getcolumnCount(); index++)
        {
            dotProduct =  dotProduct + ( vector_A.getasDouble(0,index) * vector_B.getasDouble(0,index) );
        }

        return dotProduct;
    }

    private static double errorOfDotProduct (double original,double dotProduct)
    {
        double error = 0.0;

        error = Math.pow( ( original - dotProduct ),2 );

        return error;
    }

    private static double calculateDerivative(double realValue,double predictedValue,double value)
    {
        return ( 2 * (realValue - predictedValue) * (value) );
    }

    private static double calculateRMSE (Matrix realRatingMatrix,Matrix predictedRatingMatrix)
    {
        double rmse = 0.0;
        double summation = 0.0;

        for (int row = 0; row < realRatingMatrix.getRowCount(); row++)
        {
            for (int col = 0; col < realRatingMatrix.getcolumnCount(); col++)
            {
                if (realRatingMatrix.getasDouble(row,col) != 0)
                {
                    summation = summation + errorOfDotProduct(realRatingMatrix.getasDouble(row,col));
                }
            }
        }

        rmse = Math.sqrt(summation);

        return rmse;
    }

    private static Matrix csvToMatrixLarge (File csvFile) 
    {

        Scanner inputStream;
        Matrix realRatingMatrix = SparseMatrix.Factory.zeros(610,17000);
//      Matrix realRatingMatrix = SparseMatrix.Factory.zeros(6,7);

        try     
        {
            inputStream = new Scanner(csvFile);

            while (inputStream.hasnext()) {
                String ln = inputStream.next();
                String[] values = ln.split(",");

                double rating = Double.parseDouble(values[2]);
                int row = Integer.parseInt(values[0])-1;
                int col = Integer.parseInt(values[1])-1;

                if (col < 1000)
                {
                    realRatingMatrix.setasDouble(rating,col);
                }
            }

            inputStream.close();
        } 

        catch (FileNotFoundException e) 
        {
            e.printStackTrace();
        }

        return realRatingMatrix;
    }

    private static Matrix createFactorLarge (long rows,long features)
    {
        Matrix factor = DenseMatrix.Factory.zeros(rows,features);

        return factor;
    }

    private static void fillInmatrixLarge (Matrix matrix)
    {
        for (int row = 0; row < matrix.getRowCount() ; row++)
        {
            for (int col = 0; col < matrix.getcolumnCount(); col++)
            {
                double random = ThreadLocalRandom.current().nextDouble(5.1);
                matrix.setasDouble( (Math.floor (random * 10 ) / 10),col);
            }
        }

//      return matrix;
    }

    private static Matrix getRow (Matrix matrix,int rowOfIntresst)
    {
        Matrix row = Matrix.Factory.zeros(1,matrix.getcolumnCount());

        for (int col = 0; col < matrix.getcolumnCount(); col++)
        {
            row.setasDouble(matrix.getasDouble(rowOfIntresst,col);
        }

        return row;
    }

    private static Matrix getcolumn (Matrix matrix,int colOfInteresst)
    {
        Matrix column = Matrix.Factory.zeros(1,matrix.getRowCount());

        for (int index = 0; index < matrix.getRowCount(); index++)
        {
            column.setasDouble(matrix.getasDouble(index,colOfInteresst),index);   //column[row] = matrix[row][colOfInteresst];

        }

        return column;
    }

是什么导致错误，因为我在算法中未除以零？而我该如何解决呢？

P.S。我正在使用通用矩阵库软件包

避免矩阵分解中的Not a Number-NaN-错误的关键是选择正确的学习率。重要的是要注意，正确的学习率始终取决于迭代次数。下面是一个说明问题的示例：

No. Of Iterations: 3
Learning Rate: 0.02
Regularization Rate: 0.02

在优化之前的迭代1中，我们以以下因素为例：

预测评分，第4行第2列：（4.96 * 1.26）+（4.9 * 2.25）= 17.27

优化因素后，我们将得到：

第4行和第2列经过优化，直到在迭代2中回到它们为止。

预测评分，第4行第2列：（-2.31 * 233089.24）+（-1.67 * -888.59）= -536952.2

第4行和第2列中的每个单元格均已优化。我将显示第1行第1列的优化步骤：

-2.31 + 0.02 [ ( 2 ( 4 + 536952.2 ) ( 233089.24 ) ) - ( 0.02 * -2.31 ) ] =
-2.31 + 0.02 [ ( 2 * 536956.2 * 233089.24 ) - ( 0.02 * -2.31 ) ] =
-2.31 + 0.02 [ ( 250317425142.57 ) - ( 0.04 ) ] =

我们可以看到，在这一步，我们得到了很大的导数。这里的关键是选择正确的学习率。学习速率决定了接近最小值的速率。如果我们将其设置得太大，则可能会跳过该最小值并转向无穷大，从而错过最小值。

-2.31 + 0.02 [ 250317425142,53 ] =
-2.31 + 5006348502,85 =
5006348500,54

随着优化的继续，我们将在下一次迭代中对该单元格获取 Infinity ，这将导致在添加数字时出现NaN错误。

通过选择较小的学习率，我们将避免错误并迅速达到最低点。

NaN by Matrix Factorization

y4328978 回答：NaN by Matrix Factorization

大家都在问