背景:我正在CT成像恢复的每个迭代之间实现cnn架构。通过使用LEARN中提到的3个简单层cnn,我得到了不错的结果,但是我很好奇是否可以通过使用更复杂的cnn体系结构(例如UNet)来获得更好的结果。
到目前为止,对于更复杂的UNet架构,我还没有任何改进。部分原因是内存限制-与3个简单层的cnn相比,UNet生成的参数更多,因此我无法在每次迭代中都实现UNet。因此,我每3个正常迭代中添加一次UNet。总迭代为30,并且由于mod 3,在这些迭代中实现了10个UNet。
另一个原因可能只是因为UNet体系结构太深。我首先对UNet进行了修改,使其仅具有2条收缩和2条扩展路径,并且使用3个简单层的cnn(损失收敛到1-3个)得到了可比的结果。由于没有任何过拟合的问题,因此我决定更深入地介绍3条收缩和3条扩展路径。但是,损失(实像和去噪图像之间的12范数)收敛到30-40,这比2条收缩和2条扩展路径大得多。
我不知道为什么,所以我决定看一下Tensorboard直方图梯度和可变图。我发现了这些
我所有的梯度直方图如下所示。我能否将其分析为“梯度下降在所有步骤中均以不同的值完全均匀地分布”?但是,在所有步骤中梯度下降的分布如何相同呢?然后,我怀疑我的网络无法正常工作,但是随后发现损耗曲线确实在下降。
这是经过修改的UNet的代码,稍后将在LEARN(使用迭代方法进行CT成像重建)中实现
with tf.variable_scope('cnn_layer{}'.format(iteration)):
user_input = tf.expand_dims(user_input,-1) #add channel dimension
conv1_1 = tf.keras.layers.Conv2D(64,3,padding='same',activation=tf.keras.layers.ELU(),kernel_initializer=tf.keras.initializers.he_normal(),name='conv1_1')(user_input)
conv1_2 = tf.keras.layers.Conv2D(64,name='conv1_2')(conv1_1)
pool1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=(2,2),name='pool1')(conv1_2)
conv2_1 = tf.keras.layers.Conv2D(128,name='conv2_1')(pool1)
conv2_2 = tf.keras.layers.Conv2D(128,name='conv2_2')(conv2_1)
pool2 = tf.keras.layers.MaxPool2D(pool_size=[2,name='pool2')(conv2_2)
conv3_1 = tf.keras.layers.Conv2D(256,name='conv3_1')(pool2)
conv3_2 = tf.keras.layers.Conv2D(256,name='conv3_2')(conv3_1)
drop3 = tf.keras.layers.Dropout(0.5,name='drop3')(conv3_2)
pool3 = tf.keras.layers.MaxPool2D(pool_size=[2,name='pool3')(drop3)
conv4_1 = tf.keras.layers.Conv2D(512,name='conv4_1')(pool3)
conv4_2 = tf.keras.layers.Conv2D(512,name='conv4_2')(conv4_1)
drop4 = tf.keras.layers.Dropout(0.5,name='drop4')(conv4_2)
# pool4 = tf.keras.layers.MaxPool2D(pool_size=(2,2))(drop4)
#
# conv5_1 = tf.keras.layers.Conv2D(1024,activation='relu',# kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False),# name='conv5_1')(pool4)
# conv5_2 = tf.keras.layers.Conv2D(1024,# name='conv5_2')(conv5_1)
# drop5 = tf.keras.layers.Dropout(0.5,name='drop5')(conv5_2)
# up6 = tf.keras.layers.Conv2D(512,2,# kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False),# name='up6')(tf.keras.layers.UpSampling2D(size=(2,2))(drop5))
# merge6 = tf.keras.layers.concatenate([drop4,up6],axis=3)
# conv6_1 = tf.keras.layers.Conv2D(512,# name='conv6_1')(merge6)
# conv6_2 = tf.keras.layers.Conv2D(512,# name='conv6_2')(conv6_1)
up7 = tf.keras.layers.Conv2D(256,name='up7')(tf.keras.layers.UpSampling2D(size=(2,2))(drop4))
merge7 = tf.keras.layers.concatenate([drop3,up7],axis=3)
conv7_1 = tf.keras.layers.Conv2D(256,name='conv7_1')(merge7)
conv7_2 = tf.keras.layers.Conv2D(256,name='conv7_2')(conv7_1)
up8 = tf.keras.layers.Conv2D(128,name='up8')(tf.keras.layers.UpSampling2D(size=(2,2))(conv7_2))
merge8 = tf.keras.layers.concatenate([conv2_2,up8],axis=3)
conv8_1 = tf.keras.layers.Conv2D(128,name='conv8_1')(merge8)
conv8_2 = tf.keras.layers.Conv2D(128,name='conv8_2')(conv8_1)
up9 = tf.keras.layers.Conv2D(64,name='up9')(tf.keras.layers.UpSampling2D(size=(2,2))(conv8_2))
merge9 = tf.keras.layers.concatenate([conv1_2,up9],axis=3)
conv9_1 = tf.keras.layers.Conv2D(64,name='conv9_1')(merge9)
conv9_2 = tf.keras.layers.Conv2D(64,name='conv9_2')(conv9_1)
conv9_3 = tf.keras.layers.Conv2D(2,name='conv9_3')(conv9_2)
# conv10 = tf.keras.layers.Conv2D(1,1,activation='sigmoid',name='conv10')(norm9_3)
conv10 = tf.keras.layers.Conv2D(1,name='conv10')(conv9_3)
user_input = tf.math.subtract(user_input,conv10,name="Reunion")
return tf.squeeze(user_input,-1) # get rid of channel dimension
这里是LEARN迭代。请注意,方法cnn_layers每3次迭代执行一次。
# This is the LEARN algorithm with unet implement in every 3 iterations
def LEARN(sino,geometry,dx,numpix,numits,is_training):
current_x = tf.zeros([1,numpix[0],numpix[0]])
for iteration in range(numits):
# pdb.set_trace()
if iteration % 3:
cnn_ret_val = current_x
else:
cnn_ret_val = cnn_layers(current_x,iteration,is_training)
with tf.variable_scope('LEARN_layer{}'.format(iteration)):
if isinstance(geometry,GeometryParallel2D):
fp = projection_2d.parallel_projection2d(current_x,geometry)
else:
fp = projection_2d.fan_projection2d(current_x,geometry)
fp = tf.reshape(fp,sino.shape)
diff = tf.subtract(tf.multiply(fp,dx),sino,name="diff")
# A^t (Ax^k-b)
if isinstance(geometry,GeometryParallel2D):
bp = backprojection_2d.parallel_backprojection2d(diff,geometry)
else:
bp = backprojection_2d.fan_backprojection2d(diff,geometry)
bp = tf.reshape(bp,current_x.shape)
bp = tf.multiply(bp,dx)
lam = tf.Variable(0.001,trainable=True,dtype=tf.float32,name='stepsize')
grad = tf.multiply(lam,bp)
current_x = tf.math.subtract(cnn_ret_val,grad) # cnn_ret_val = current_x - regularization term
return current_x
我使用Adam优化器,其初始学习率为0.0005。我尝试过较大的学习率(0.001)和较小的学习率(0.0001),这会导致损失曲线上出现更多“打击”,或者收敛非常缓慢。因此,我决定返回0.0005。
我还实现了梯度裁剪
optimizer = tf.train.AdamOptimizer(self.lr,name='AdamOptimizer')
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.gvs = optimizer.compute_gradients(self.loss)
self.capped_gvs = [(tf.clip_by_value(grad,-.5,.5),var) for grad,var in self.gvs]
self.train_op = optimizer.apply_gradients(self.capped_gvs)
这是我在tf.summary.histogram上写渐变和变量的方法:
for gradient,variable in self.gvs:
tf.summary.histogram("gradients/" + variable.name,gradient)
tf.summary.histogram("variables/" + variable.name,variable)
tf.summary.scalar('loss',self.loss)
tf.summary.scalar('lr',self.lr)
tempY = tf.reshape(self.Y,[batch_size,self.numpix[0],self.numpix[1],1])
img = tf.summary.image('denoised image',tempY,max_outputs=1)
writer = tf.summary.FileWriter(self.logdir,self.sess.graph)
merged = tf.summary.merge_all()