这是我正在使用的代码...
我在适合我的304行上安装了一个断点...
estimator.train(input_fn = train_input_fn,max_steps = num_train_steps)
有人看过吗?我确定我已经安装了TensorFlow和berT的正确版本。
完整的堆栈跟踪如下。...
Exception has occurred: ValueError
model_fn should return an EstimatorSpec.
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1153,in _call_model_fn
raise ValueError('model_fn should return an EstimatorSpec.')
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1191,in _train_model_default
features,labels,ModeKeys.TRAIN,self.config)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1161,in _train_model
return self._train_model_default(input_fn,hooks,saving_listeners)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 370,in train
loss = self._train_model(input_fn,saving_listeners)
File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py",line 304,in <module>
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
File "C:\Program Files\Python36\Lib\runpy.py",line 85,in _run_code
exec(code,run_globals)
File "C:\Program Files\Python36\Lib\runpy.py",line 96,in _run_module_code
mod_name,mod_spec,pkg_name,script_name)
File "C:\Program Files\Python36\Lib\runpy.py",line 263,in run_path
pkg_name=pkg_name,script_name=fname)
ValueError: model_fn should return an EstimatorSpec.
此代码是我尝试从此处运行一些Google colab代码-
# Copyright 2019 Google Inc.
# Licensed under the Apache License,Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,software
# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
# Set the output directory for saving model file
# Optionally,set a GCP bucket location
OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput'
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}
if USE_BUCKET:
OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET,OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()
if DO_DELETE:
try:
tf.gfile.DeleteRecursively(OUTPUT_DIR)
except:
# Doesn't matter if the directory didn't exist
pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
'''
First,let's download the dataset,hosted by Stanford. The code below,which downloads,extracts,and imports the IMDB Large Movie Review Dataset,is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
data = {}
data["sentence"] = []
data["sentiment"] = []
for file_path in os.listdir(directory):
with tf.gfile.GFile(os.path.join(directory,file_path),"r") as f:
data["sentence"].append(f.read())
data["sentiment"].append(re.match("\d+_(\d+)\.txt",file_path).group(1))
return pd.DataFrame.from_dict(data)
# Merge positive and negative examples,add a polarity column and shuffle.
def load_dataset(directory):
pos_df = load_directory_data(os.path.join(directory,"pos"))
neg_df = load_directory_data(os.path.join(directory,"neg"))
pos_df["polarity"] = 1
neg_df["polarity"] = 0
return pd.concat([pos_df,neg_df]).sample(frac=1).reset_index(drop=True)
# Download and process the dataset files.
def download_and_load_datasets():
#dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz",origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz",extract=True)
trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
train_df = load_dataset(trainPath)
test_df = load_dataset(testPath)
return train_df,test_df
train,test = download_and_load_datasets()
#To keep training fast,we'll take a sample of 5000 train and test examples,respectively.
train = train.sample(5000)
test = test.sample(5000)
train.columns
#Index(['sentence','sentiment','polarity'],dtype='object')
#For us,our input data is the 'sentence' column and our label is the 'polarity' column (0,1 for negative and positive,respectively)
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels,i.e. True,False or 0,1 or 'dog','cat'
label_list = [0,1]
#Data Preprocessing We'll need to transform our data into a format berT understands. This involves two steps. First,we create InputExample's using the constructor provided in the berT library.
#text_a is the text we want to classify,which in this case,is the Request field in our Dataframe.
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task,so we can leave text_b blank.
#label is the label for our example,False
# Use the InputExample class from berT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,# Globally unique ID for bookkeeping,unused in this example
text_a = x[DATA_COLUMN],text_b = None,label = x[LABEL_COLUMN]),axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,text_a = x[DATA_COLUMN],axis = 1)
# This is a path to an uncased (all lowercase) version of berT
berT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"
def create_tokenizer_from_hub_module():
with tf.Graph().as_default():
bert_module = hub.Module(berT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info",as_dict=True)
with tf.Session() as sess:
vocab_file,do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(vocab_file=vocab_file,do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
tokenizer.tokenize("This here's an example of using the berT tokenizer")
# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that berT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples,label_list,MAX_SEQ_LENGTH,tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples,tokenizer)
#Creating a model
def create_model(is_predicting,input_ids,input_mask,segment_ids,num_labels):
#Creates a classification model.
bert_module = hub.Module(berT_MODEL_HUB,trainable=True)
bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.get_variable("output_weights",[num_labels,hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable("output_bias",[num_labels],initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer,keep_prob=0.9)
logits = tf.matmul(output_layer,output_weights,transpose_b=True)
logits = tf.nn.bias_add(logits,output_bias)
log_probs = tf.nn.log_softmax(logits,axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels,depth=num_labels,dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs,axis=-1,output_type=tf.int32))
# If we're predicting,we want predicted labels and the probabilities.
if is_predicting:
return (predicted_labels,log_probs)
# If we're train/eval,compute loss between predicted and actual label
per_example_loss = tf.reduce_sum(one_hot_labels * log_probs,axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss,predicted_labels,log_probs)
'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training,evaluation,and prediction.'''
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels,learning_rate,etc.
def model_fn_builder(num_labels,num_train_steps,num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
def model_fn(features,mode,params): # pylint: disable=unused-argument
#"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss,log_probs) = create_model(is_predicting,label_ids,num_labels)
train_op = bert.optimization.create_optimizer(loss,num_warmup_steps,use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids,predicted_labels):
accuracy = tf.metrics.accuracy(label_ids,predicted_labels)
f1_score = tf.contrib.metrics.f1_score(
label_ids,predicted_labels)
auc = tf.metrics.auc(
label_ids,predicted_labels)
recall = tf.metrics.recall(
label_ids,predicted_labels)
precision = tf.metrics.precision(
label_ids,predicted_labels)
true_pos = tf.metrics.true_positives(
label_ids,predicted_labels)
true_neg = tf.metrics.true_negatives(
label_ids,predicted_labels)
false_pos = tf.metrics.false_positives(
label_ids,predicted_labels)
false_neg = tf.metrics.false_negatives(
label_ids,predicted_labels)
return {
"eval_accuracy": accuracy,"f1_score": f1_score,"auc": auc,"precision": precision,"recall": recall,"true_positives": true_pos,"true_negatives": true_neg,"false_positives": false_pos,"false_negatives": false_neg
}
eval_metrics = metric_fn(label_ids,predicted_labels)
if mode == tfe.estimator.ModeKeys.TRAIN:
return tfe.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
else:
return tfe.estimator.EstimatorSpec(mode=mode,eval_metric_ops=eval_metrics)
else:
(predicted_labels,num_labels)
predictions = {'probabilities': log_probs,'labels': predicted_labels}
return tfe.estimator.EstimatorSpec(mode,predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.Runconfig(
model_dir=OUTPUT_DIR,save_summary_steps=SAVE_SUMMARY_STEPS,save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
model_fn = model_fn_builder(
num_labels=len(label_list),learning_rate=LEARNING_RATE,num_train_steps=num_train_steps,num_warmup_steps=num_warmup_steps)
estimator = tfe.estimator.Estimator(
model_fn=model_fn,config=run_config,params={"batch_size": BATCH_SIZE}
)
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
features=train_features,seq_length=MAX_SEQ_LENGTH,is_training=True,drop_remainder=False)
#Now we train our model! For me,using a colab notebook running on Google's GPUs,my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
print("Training took time ",datetime.now() - current_time)
#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
features=test_features,is_training=False,drop_remainder=False)
estimator.evaluate(input_fn=test_input_fn,steps=None)
def getPrediction(in_sentences):
labels = ["Negative","Positive"]
input_examples = [run_classifier.InputExample(guid="",text_a = x,label = 0) for x in in_sentences] # here,"" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples,tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features,drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence,prediction['probabilities'],labels[prediction['labels']]) for sentence,prediction in zip(in_sentences,predictions)]
pred_sentences = [
"That movie was absolutely awful","The acting was a bit lacking","The film was creative and surprising","Absolutely fantastic!"
]
predictions = getPrediction(pred_sentences)
predictions