为什么我的神经网络对每个输入都给出相同的预测?

我正在尝试创建一个神经网络,它接受 294 个输入并预测哪些输入有可能成为输出。然后,我还想回归以找出实际值和预测值之间有多少差异。所以我在输出层添加了两个回归输出节点。之前,我在输出中添加了回归,模型预测得足够好,但是在添加之后,无论我做什么,模型都开始达到相同的值。然后我决定检查权重然后我发现了这样的东西:

[[ 0.19589818  0.45867598 -0.1103735  -0.11739671  0.3524462   0.3615998
  -0.11838996]
 [-0.37149632  0.29049385  0.27328718  0.39140654 -0.22933161  0.07160628
   0.33962536]
 [ 0.21745765  0.19408011 -0.28868628 -0.0097748   0.06756687 -0.40600073
   0.0485481 ]
 [-0.4144268   0.4770614  -0.1586262   0.06003821  0.01309896  0.47136605
  -0.41377842]
 [-0.25865722 -0.3038118   0.2767954   0.33988214 -0.48508477  0.33661437
  -0.20484531]
 [ 0.4246924  -0.4958439   0.2031511   0.4845667   0.18330884 -0.1708759
   0.28903925]
 [-0.4602847  -0.02263796  0.27997506 -0.33072484 -0.44759667 -0.14221525
   0.2714281 ]
 [-0.3839649  -0.13256657 -0.03424132 -0.36362755 -0.4561025  -0.12396967
   0.15885079]
 [-0.273561   -0.09750211 -0.4644209   0.4556396  -0.3021226   0.26363683
  -0.43606043]
 [ 0.2392633  -0.1741817   0.48888505 -0.43252754  0.101964    0.02732563
  -0.28655064]
 [ 0.41151023 -0.16941857 -0.48709846  0.23205352 -0.22945309  0.2136854]
.
.
.
.
[-0.01252615 -0.19594312  0.26858175 -0.07100904  0.16546512  0.11748069
   0.36638904]]

以上是任何更新之前第 294 层的权重。然后经过一些更新权重:

weights for layer294:[[[ 0.19589818  0.19589818  0.19589818 ...  0.19589818  0.19589818
    0.19589818]
  [ 0.45867598  0.45867598  0.45867598 ...  0.45867598  0.45867598
    0.45867598]
  [-0.1103735  -0.1103735  -0.1103735  ... -0.1103735  -0.1103735
   -0.1103735 ]
  ...
  [ 0.3524462   0.3524462   0.3524462  ...  0.3524462   0.3524462
    0.3524462 ]
  [ 0.3615998   0.3615998   0.3615998  ...  0.3615998   0.3615998
    0.3615998 ]
  [-0.11838996 -0.11838996 -0.11838996 ... -0.11838996 -0.11838996
   -0.11838996]]

 [[-0.37149632 -0.37149632 -0.37149632 ... -0.37149632 -0.37149632
   -0.37149632]
  [ 0.29049385  0.29049385  0.29049385 ...  0.29049385  0.29049385
    0.29049385]
  [ 0.27328718  0.27328718  0.27328718 ...  0.27328718  0.27328718
    0.27328718]
  ...
  [-0.22933161 -0.22933161 -0.22933161 ... -0.22933161 -0.22933161
   -0.22933161]
  [ 0.07160628  0.07160628  0.07160628 ...  0.07160628  0.07160628
    0.07160628]
  [ 0.33962536  0.33962536  0.33962536 ...  0.33962536  0.33962536
    0.33962536]]

 [[ 0.21745765  0.21745765  0.21745765 ...  0.21745765  0.21745765
    0.21745765]
  [ 0.19408011  0.19408011  0.19408011 ...  0.19408011  0.19408011
    0.19408011]
  [-0.28868628 -0.28868628 -0.28868628 ... -0.28868628 -0.28868628
   -0.28868628]
  ...
  [ 0.06756687  0.06756687  0.06756687 ...  0.06756687  0.06756687
    0.06756687]
  [-0.40600073 -0.40600073 -0.40600073 ... -0.40600073 -0.40600073
   -0.40600073]
  [ 0.0485481   0.0485481   0.0485481  ...  0.0485481   0.0485481
    0.0485481 ]]
.
.
.
.
.
.
[ 0.36638904  0.36638904  0.36638904 ...  0.36638904  0.36638904
    0.36638904]]]

似乎权重似乎没有改变,而是在维度上增长。这是应该的吗? 这就是我构建模型的方式:

 import warnings
    import pandas as pd
    pd.options.mode.chained_assignment = None  # default='warn'
    
    
    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf
    from tensorflow.keras import Input
    import tensorflow.keras.callbacks
    from keras.models import Sequential 
    from keras.layers.core import Dense
    from keras.optimizers import SGD,Adam
    from keras.models import Model
    from keras.layers import concatenate,activation
    from keras.layers.advanced_activations import ELU
    from sklearn.metrics import classification_report
    from sklearn.preprocessing import LabelBinarizer
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt
    import numpy as np
    from getWeights import GetWeights
    
    def build(layer_str):
        #take the input layer structure and convert it into a list
        layers=layer_str.split("-")
    
        
        #convert the strings in the list to integer
        layers=list(map(int,layers))
        
        #let's build our model
            
        #we add the first layer and the input layer to our network
        inputs = Input(shape=(layers[0],))
        H_inputs=inputs    
        
        #we add the hidden layers 
        Hidden_list=[]
        for (x,i) in enumerate(layers):
            if(x>0 and x!=(len(layers)-1)):
                layer=Dense(i)(H_inputs)
                Hidden_list.append(ELU(alpha=1.0)(layer))
                H_inputs=Hidden_list[-1]
    
        #then add the final layer        
        classifier = Dense(layers[-1],activation="sigmoid")(Hidden_list[-1])
    
        model = Model(inputs=inputs,outputs=classifier)
    
        return model
        
    def split(data,label,split_ratio):
        train_list=[]
        test_list=[]
        for a in data:
            split=round(len(a)*(1-split_ratio))
            train_list.append(a[:split])
            test_list.append(a[split:])
        for l in label:
            split=round(len(l)*(1-split_ratio))
            train_list.append(l[:split])
            test_list.append(l[split:])
        return train_list,test_list
    def train_eval(data,model,lr=0.01,epochs_in=100,batch_size_in=16):
        warnings.filterwarnings("ignore",category=FutureWarning)
        
        #split your data and labels into test and train data,we usually use 25% of the total data for testing                   
        initial_learning_rate=lr             
        
        #for merged model
        split_ratio=0.25
        train_list,test_list=split(data,split_ratio)
       
        #extract label
        trainY=train_list[-3:]
        del train_list[-3:]
        testY=test_list[-3:]
        del test_list[-3:]
            
        #training the network
        print("[INFO]Trainig the network....")
        decay_steps = 1000
        lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate,decay_steps)
        sgd=SGD(lr_decayed_fn,momentum=0.8)
        model.compile(loss=["categorical_crossentropy","mean_squared_error","mean_squared_error"],optimizer=sgd,metrics=["accuracy"])
        checkpoint_filepath = 'checkpoint1'
        model_checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor='val_pred_accuracy',mode='max',save_best_only=True) 
        gw = GetWeights()
        H=model.fit(train_list,trainY,validation_data=(test_list,testY),epochs=epochs_in,batch_size=batch_size_in,callbacks=[[model_checkpoint_callback],[gw]])
            
        #evalute the network
        
        print("[INFO]Evaluating the network....")
        predictions=model.predict(test_list,batch_size=batch_size_in)
        
       return(predictions)


    def Merge_model(layer,nbx,regress=False):
        model_list=[]
        for i in range(nbx):
            model=build(layer)
            model_list.append(model)
        merged_layers = concatenate([tf.convert_to_tensor(model_list[i].output) for i in range(nbx)])
        x = Dense(nbx,activation="relu")(merged_layers)
        out = Dense(nbx,activation="softmax",name="pred")(x)
        if(regress==True):
            adj1 = Dense(1,activation='linear',name="x")(x)
            adj2 = Dense(1,name="y")(x)
            merged_model = Model([model_list[i].input for i in range(nbx)],[out,adj1,adj2])
        else:
            merged_model = Model([model_list[i].input for i in range(nbx)],[out])
        
        return merged_model

我是这样实现的:

with open("dataframe.pkl","rb") as vector_file:
    vect_df=pickle.load(vector_file) 
    input_list=[np.stack(vect_df[str(i)]) for i in range(294) ]


#hyperparameters 
nbx=294  
lr=1e-8 
epochs=100 
batch_size=16

#input data 
data=input_list 
label_path=glob.glob("test_image/*.pkl") 
label=lb. read_label_file(label_path)

#if regressing uncomment the following 
label1=np.array([a[0] for a in label])
label2=np.array([a[1] for a in label]) 
label3=np.array([a[2] for a in label])
input_label=[label1,label2,label3] 


model=nn.Merge_model("17-7-1",regress=True)
plot_model(model,to_file='model.png',rankdir='LR')
prediction=nn.train_eval(data,input_label,lr,epochs,batch_size) 

我的神经网络图: https://drive.google.com/file/d/1w_Obek1fzyrUBRfXilEBD4LH5urP0kal/view?usp=sharing

zclan2008 回答:为什么我的神经网络对每个输入都给出相同的预测?

暂时没有好的解决方案,如果你有好的解决方案,请发邮件至:iooj@foxmail.com
本文链接:https://www.f2er.com/6261.html

大家都在问