为什么我在切换循环时会得到NaN

Why I am getting NaN when I toggle the loop?

本文关键字:NaN 循环 为什么      更新时间:2023-09-26

我试图在javascript中编写k-means函数。这是我的代码。

function kmeans(arrayToProcess,cluster_n){
    var pointDimension = arrayToProcess[0].length;
    var ClusterResult = new Array();
    var ClusterCenter = new Array();
    var oldClusterCenter = new Array();
    var changed=false;
    for(var i = 0;i<cluster_n;i++)
        ClusterCenter.push(arrayToProcess[randomInt(arrayToProcess.length-1)]);
    console.log(ClusterCenter);
    // do{
    for(var k=0;k<50;k++){//loop
        for(var i = 0; i<cluster_n; i++){
            ClusterResult[i] = new Array();
        }
        for(var i = 0; i<arrayToProcess.length; i++){
            //for every point element
            var oldDistance=-1;
            var newClusterNumber = 0;
            for(var j = 0; j<cluster_n; j++){
                //for every cluster
                var distance = Math.abs(computeDistanceBetween(arrayToProcess[i], ClusterCenter[j]));   
                if (oldDistance == -1){
                    oldDistance = distance;
                    newClusterNumber = j;
                }else if ( distance <= oldDistance ){
                    newClusterNumber = j;
                    oldDistance = distance;
                }
            }
            ClusterResult[newClusterNumber].push(arrayToProcess[i]);
        }
        oldClusterCenter = ClusterCenter;
        //compute new centroid
        for(var i = 0; i<cluster_n; i++){
            newCentroid = pinit(pointDimension);
            for(var j = 0; j<ClusterResult[i].length; j++){
                newCentroid = padd(ClusterResult[i][j], newCentroid);
            }
            ClusterCenter[i] = pdivide(newCentroid, ClusterResult[i].length);
        }
        changed=false;
        for(var i = 0; i<cluster_n; i++){
            if(!pequal(ClusterCenter[i],oldClusterCenter[i]))
                changed = true;
        }
    }//while (changed == true);
    return ClusterResult;
}

function computeDistanceBetween(a,b){
    var result = 0;
    for(var i = 0; i<a.length;i++) result += a[i] * b[i];
    return result;
}
function pinit(n){
    var result = new Array(n);
    for(var i=0;i<n;i++) result[i] = 0;
    return result;
}
function padd(a,b){
    var result = new Array(a.length);
    for(var i = 0; i<a.length;i++) result[i] = a[i] + b[i];
    return result;
}
function pdivide(a,d){
    var result = new Array(a.length);
    for(var i = 0; i<a.length;i++) result[i] = a[i] / d;
    return result;
}
function pequal(a,b){
    for(var i = 0; i<a.length;i++) 
        if(a[i] != b[i]) return false;
    return true;
}
function randomInt(max){
    return randomIntBetween(0,max);
}
function randomIntBetween(min,max){
    return Math.floor(Math.random() * (max - min + 1)) + min;
}

如果我停止for循环(k<0),控制台给出正确的答案。但是,如果我启动for循环(k<1),数组ClusterCenter将始终具有一些NaN项。NaN是如何出现的?

编辑:进一步解释:如果执行了第14行for循环,上面的ClusterCenter将给出一些NaN项,为什么?

输入

的例子

var testArray = new Array();
for(var i=0; i<100; i++) testArray.push([randomInt(-150,150),randomInt(-150,150)]);
kmeans(testArray,4);

上面的ClusterCenter会给出一些NaN项,为什么?

因为你在0乘0,这不是一个数字。对于ClusterResult中的每个空集群都会发生这种情况-它将创建ClusterCenter[i] = pdivide(pinit(pointDimension), 0);

如何处理空簇?我能想到的可能的策略是选择0/0 = 0,选择一个新的随机集群中心,或者将集群全部丢弃(cluster_n--)。

但是为什么会有这么多空簇呢?因为你的computeDistanceBetween函数有严重缺陷。每个(非0|0)点与自身之间的距离为。选择更合理的距离函数,如欧氏距离。它应该总是返回一个正数,使循环中的Math.abs变得超级流畅。


其他要点:

  • newCentroid错过了var语句并泄漏到全局作用域
  • 你的changed有缺陷。当设置oldClusterCenter = ClusterCenter时,两个变量都将保存相同的数组,然后该数组会发生变化。不仅pequal(ClusterCenter[i],oldClusterCenter[i])总是正确的,甚至ClusterCenter[i]===oldClusterCenter[i]也因为oldClusterCenter === ClusterCenter而正确。

    要解决这个问题,要么创建oldClusterCenter = ClusterCenter.slice(),要么在分配后引入ClusterCenter = new Array(cluster_n);

  • 计算最近群集的代码可以简化为

    var newClusterNumber = 0,
        oldDistance = computeDistanceBetween(arrayToProcess[i], ClusterCenter[0]));
    for (var j=1; j<cluster_n; j++) {
        var distance = computeDistanceBetween(arrayToProcess[i], ClusterCenter[j]);
        if (distance <= oldDistance) {
            newClusterNumber = j;
            oldDistance = distance;
        }
    }
    

    var onewClusterNumber, ldDistance=Infinity;
    for (var j=0; j<cluster_n; j++) {
        var distance = computeDistanceBetween(arrayToProcess[i], ClusterCenter[j]);
        if (distance <= oldDistance) {
            newClusterNumber = j;
            oldDistance = distance;
        }
    }