我的网络有两个步骤。有一个使用Connectionist时间分类(CTC)进行特征提取和识别的预训练网络。加载预训练的模型并提取feature layer ('conv5_3')
,然后通过roi_pooling裁剪部分特征并使用CTC进行识别。
代码如下。
************重新训练了预训练的模型***********************
graph = tf.Graph()
with graph.as_default():
global_step = slim.get_or_create_global_step()
input_data,input_labels,input_boxes = input_train_data.input_fn()
input_boxes = tf.reshape(input_boxes,[input_boxes.shape[0]*2,-1])
rows = tf.expand_dims(tf.range(input_boxes.shape[0]),1)/2
add_index = tf.concat([tf.cast(rows,tf.float32),input_boxes],-1)
index = tf.not_equal(tf.reduce_sum(add_index[:,4:],axis=1),0)
input_boxes = tf.gather_nd(add_index,tf.where(index))
# build model and loss
net = pixel_link_symbol.PixelLinkNet(input_data,is_training = False)
masks = pixel_link.tf_decode_score_map_to_mask_in_batch(net.pixel_pos_scores,net.link_pos_scores)
f_saver = tf.train.Saver(max_to_keep=1000,write_version=tf.train.SaverDef.V2,save_relative_paths=True)
sess_config = tf.ConfigProto(log_device_placement = False,allow_soft_placement = True)
if flaGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif flaGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = flaGS.gpu_memory_fraction;
session = tf.Session(graph=graph,config=sess_config)
coordinator = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=session,coord=coordinator)
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
f_saver.restore(session,config.train.init_checkpoint)
**********提取特征('conv5_3'),通过roi_pooling并使用CTC进行训练***************
#Get feature layers
conv5_3 = net.end_points['conv5_3']
with tf.variable_scope("Recognition"):
#Pass through recognition net
r_net = regnet.ConstructRecNet(conv5_3)
conv7_7 = r_net.end_points['pool7']
#implement ROI Pooling
input_boxes=tf.dtypes.cast(input_boxes,tf.int32)
prob = roi_pooling(conv7_7,input_boxes,pool_height=1,pool_width=28)
#Get features for CTC training
prob = tf.transpose(prob,(1,2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]],tf.shape(prob)[0]) # input seq length,batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels,[input_labels],[tf.int64,tf.int64,tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0],ctc[1],ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob,data_length,merge_repeated=False,beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions,default_value=-1,name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions,ctc_labels,normalize=False),name='error_rate')
loss = tf.reduce_mean(tf.nn.ctc_loss(inputs=prob,labels=ctc_labels,sequence_length=data_length,ctc_merge_repeated=True),name='loss')
learning_rate = tf.train.piecewise_constant(global_step,[150000,200000],[config.train.learning_rate,0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
opt_loss = tf.contrib.layers.optimize_loss(loss,global_step,learning_rate,config.train.opt_type,config.train.grad_noise_scale,name='train_step')
********************初始化并训练************************* *
tf.global_variables_initializer()
writer = None
if config.train.need_to_save_log:
writer = tf.summary.FileWriter(config.model_dir,session.graph)
graph.finalize()
for i in range(train.steps):
**********
**********
我对以下内容感到困惑。 (1)f_saver仅用于恢复预训练模型,或仍可用于保存整个图形,包括预训练和CTC识别部分。 (2)要初始化CTC识别部分,请使用tf.global_variables_initializer()。 这是正确的使用方式吗?还是只需要初始化识别范围中的变量? (3)