我正在尝试创建一个神经网络,它接受 294 个输入并预测哪些输入有可能成为输出。然后,我还想回归以找出实际值和预测值之间有多少差异。所以我在输出层添加了两个回归输出节点。之前,我在输出中添加了回归,模型预测得足够好,但是在添加之后,无论我做什么,模型都开始达到相同的值。然后我决定检查权重然后我发现了这样的东西:
[[ 0.19589818 0.45867598 -0.1103735 -0.11739671 0.3524462 0.3615998
-0.11838996]
[-0.37149632 0.29049385 0.27328718 0.39140654 -0.22933161 0.07160628
0.33962536]
[ 0.21745765 0.19408011 -0.28868628 -0.0097748 0.06756687 -0.40600073
0.0485481 ]
[-0.4144268 0.4770614 -0.1586262 0.06003821 0.01309896 0.47136605
-0.41377842]
[-0.25865722 -0.3038118 0.2767954 0.33988214 -0.48508477 0.33661437
-0.20484531]
[ 0.4246924 -0.4958439 0.2031511 0.4845667 0.18330884 -0.1708759
0.28903925]
[-0.4602847 -0.02263796 0.27997506 -0.33072484 -0.44759667 -0.14221525
0.2714281 ]
[-0.3839649 -0.13256657 -0.03424132 -0.36362755 -0.4561025 -0.12396967
0.15885079]
[-0.273561 -0.09750211 -0.4644209 0.4556396 -0.3021226 0.26363683
-0.43606043]
[ 0.2392633 -0.1741817 0.48888505 -0.43252754 0.101964 0.02732563
-0.28655064]
[ 0.41151023 -0.16941857 -0.48709846 0.23205352 -0.22945309 0.2136854]
.
.
.
.
[-0.01252615 -0.19594312 0.26858175 -0.07100904 0.16546512 0.11748069
0.36638904]]
以上是任何更新之前第 294 层的权重。然后经过一些更新权重:
weights for layer294:[[[ 0.19589818 0.19589818 0.19589818 ... 0.19589818 0.19589818
0.19589818]
[ 0.45867598 0.45867598 0.45867598 ... 0.45867598 0.45867598
0.45867598]
[-0.1103735 -0.1103735 -0.1103735 ... -0.1103735 -0.1103735
-0.1103735 ]
...
[ 0.3524462 0.3524462 0.3524462 ... 0.3524462 0.3524462
0.3524462 ]
[ 0.3615998 0.3615998 0.3615998 ... 0.3615998 0.3615998
0.3615998 ]
[-0.11838996 -0.11838996 -0.11838996 ... -0.11838996 -0.11838996
-0.11838996]]
[[-0.37149632 -0.37149632 -0.37149632 ... -0.37149632 -0.37149632
-0.37149632]
[ 0.29049385 0.29049385 0.29049385 ... 0.29049385 0.29049385
0.29049385]
[ 0.27328718 0.27328718 0.27328718 ... 0.27328718 0.27328718
0.27328718]
...
[-0.22933161 -0.22933161 -0.22933161 ... -0.22933161 -0.22933161
-0.22933161]
[ 0.07160628 0.07160628 0.07160628 ... 0.07160628 0.07160628
0.07160628]
[ 0.33962536 0.33962536 0.33962536 ... 0.33962536 0.33962536
0.33962536]]
[[ 0.21745765 0.21745765 0.21745765 ... 0.21745765 0.21745765
0.21745765]
[ 0.19408011 0.19408011 0.19408011 ... 0.19408011 0.19408011
0.19408011]
[-0.28868628 -0.28868628 -0.28868628 ... -0.28868628 -0.28868628
-0.28868628]
...
[ 0.06756687 0.06756687 0.06756687 ... 0.06756687 0.06756687
0.06756687]
[-0.40600073 -0.40600073 -0.40600073 ... -0.40600073 -0.40600073
-0.40600073]
[ 0.0485481 0.0485481 0.0485481 ... 0.0485481 0.0485481
0.0485481 ]]
.
.
.
.
.
.
[ 0.36638904 0.36638904 0.36638904 ... 0.36638904 0.36638904
0.36638904]]]
似乎权重似乎没有改变,而是在维度上增长。这是应该的吗? 这就是我构建模型的方式:
import warnings
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras import Input
import tensorflow.keras.callbacks
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD,Adam
from keras.models import Model
from keras.layers import concatenate,activation
from keras.layers.advanced_activations import ELU
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from getWeights import GetWeights
def build(layer_str):
#take the input layer structure and convert it into a list
layers=layer_str.split("-")
#convert the strings in the list to integer
layers=list(map(int,layers))
#let's build our model
#we add the first layer and the input layer to our network
inputs = Input(shape=(layers[0],))
H_inputs=inputs
#we add the hidden layers
Hidden_list=[]
for (x,i) in enumerate(layers):
if(x>0 and x!=(len(layers)-1)):
layer=Dense(i)(H_inputs)
Hidden_list.append(ELU(alpha=1.0)(layer))
H_inputs=Hidden_list[-1]
#then add the final layer
classifier = Dense(layers[-1],activation="sigmoid")(Hidden_list[-1])
model = Model(inputs=inputs,outputs=classifier)
return model
def split(data,label,split_ratio):
train_list=[]
test_list=[]
for a in data:
split=round(len(a)*(1-split_ratio))
train_list.append(a[:split])
test_list.append(a[split:])
for l in label:
split=round(len(l)*(1-split_ratio))
train_list.append(l[:split])
test_list.append(l[split:])
return train_list,test_list
def train_eval(data,model,lr=0.01,epochs_in=100,batch_size_in=16):
warnings.filterwarnings("ignore",category=FutureWarning)
#split your data and labels into test and train data,we usually use 25% of the total data for testing
initial_learning_rate=lr
#for merged model
split_ratio=0.25
train_list,test_list=split(data,split_ratio)
#extract label
trainY=train_list[-3:]
del train_list[-3:]
testY=test_list[-3:]
del test_list[-3:]
#training the network
print("[INFO]Trainig the network....")
decay_steps = 1000
lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate,decay_steps)
sgd=SGD(lr_decayed_fn,momentum=0.8)
model.compile(loss=["categorical_crossentropy","mean_squared_error","mean_squared_error"],optimizer=sgd,metrics=["accuracy"])
checkpoint_filepath = 'checkpoint1'
model_checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor='val_pred_accuracy',mode='max',save_best_only=True)
gw = GetWeights()
H=model.fit(train_list,trainY,validation_data=(test_list,testY),epochs=epochs_in,batch_size=batch_size_in,callbacks=[[model_checkpoint_callback],[gw]])
#evalute the network
print("[INFO]Evaluating the network....")
predictions=model.predict(test_list,batch_size=batch_size_in)
return(predictions)
def Merge_model(layer,nbx,regress=False):
model_list=[]
for i in range(nbx):
model=build(layer)
model_list.append(model)
merged_layers = concatenate([tf.convert_to_tensor(model_list[i].output) for i in range(nbx)])
x = Dense(nbx,activation="relu")(merged_layers)
out = Dense(nbx,activation="softmax",name="pred")(x)
if(regress==True):
adj1 = Dense(1,activation='linear',name="x")(x)
adj2 = Dense(1,name="y")(x)
merged_model = Model([model_list[i].input for i in range(nbx)],[out,adj1,adj2])
else:
merged_model = Model([model_list[i].input for i in range(nbx)],[out])
return merged_model
我是这样实现的:
with open("dataframe.pkl","rb") as vector_file:
vect_df=pickle.load(vector_file)
input_list=[np.stack(vect_df[str(i)]) for i in range(294) ]
#hyperparameters
nbx=294
lr=1e-8
epochs=100
batch_size=16
#input data
data=input_list
label_path=glob.glob("test_image/*.pkl")
label=lb. read_label_file(label_path)
#if regressing uncomment the following
label1=np.array([a[0] for a in label])
label2=np.array([a[1] for a in label])
label3=np.array([a[2] for a in label])
input_label=[label1,label2,label3]
model=nn.Merge_model("17-7-1",regress=True)
plot_model(model,to_file='model.png',rankdir='LR')
prediction=nn.train_eval(data,input_label,lr,epochs,batch_size)
我的神经网络图: https://drive.google.com/file/d/1w_Obek1fzyrUBRfXilEBD4LH5urP0kal/view?usp=sharing