estimator.train抛出ValueError：model_fn应该返回EstimatorSpec

这是我正在使用的代码...

我在适合我的304行上安装了一个断点...

estimator.train（input_fn = train_input_fn，max_steps = num_train_steps）

有人看过吗？我确定我已经安装了TensorFlow和berT的正确版本。

完整的堆栈跟踪如下。...

    Exception has occurred: ValueError
    model_fn should return an EstimatorSpec.
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1153,in _call_model_fn
 raise ValueError('model_fn should return an EstimatorSpec.')
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1191,in _train_model_default
features,labels,ModeKeys.TRAIN,self.config)
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 1161,in _train_model
return self._train_model_default(input_fn,hooks,saving_listeners)
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py",line 370,in train
loss = self._train_model(input_fn,saving_listeners)
    File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py",line 304,in <module>
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
    File "C:\Program Files\Python36\Lib\runpy.py",line 85,in _run_code
exec(code,run_globals)
    File "C:\Program Files\Python36\Lib\runpy.py",line 96,in _run_module_code
mod_name,mod_spec,pkg_name,script_name)
    File "C:\Program Files\Python36\Lib\runpy.py",line 263,in run_path
pkg_name=pkg_name,script_name=fname)
    ValueError: model_fn should return an EstimatorSpec.

此代码是我尝试从此处运行一些Google colab代码-

https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb#scrollTo=t6Nukby2EB6-

# Copyright 2019 Google Inc.

# Licensed under the Apache License,Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,software
# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow

import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime

import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization


# Set the output directory for saving model file
# Optionally,set a GCP bucket location

OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput' 
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
    OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET,OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()

if DO_DELETE:
    try:
        tf.gfile.DeleteRecursively(OUTPUT_DIR)
    except:
            # Doesn't matter if the directory didn't exist
            pass
    tf.gfile.MakeDirs(OUTPUT_DIR)
    print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

'''
First,let's download the dataset,hosted by Stanford. The code below,which downloads,extracts,and imports the IMDB Large Movie Review Dataset,is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
    data = {}
    data["sentence"] = []
    data["sentiment"] = []
    for file_path in os.listdir(directory):
        with tf.gfile.GFile(os.path.join(directory,file_path),"r") as f:
            data["sentence"].append(f.read())
            data["sentiment"].append(re.match("\d+_(\d+)\.txt",file_path).group(1))
    return pd.DataFrame.from_dict(data)

# Merge positive and negative examples,add a polarity column and shuffle.
def load_dataset(directory):
    pos_df = load_directory_data(os.path.join(directory,"pos"))
    neg_df = load_directory_data(os.path.join(directory,"neg"))
    pos_df["polarity"] = 1
    neg_df["polarity"] = 0
    return pd.concat([pos_df,neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets():
    #dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz",origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz",extract=True)
    trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
    testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
    train_df = load_dataset(trainPath)
    test_df = load_dataset(testPath)

    return train_df,test_df

train,test = download_and_load_datasets()

#To keep training fast,we'll take a sample of 5000 train and test examples,respectively.

train = train.sample(5000)
test = test.sample(5000)

train.columns

#Index(['sentence','sentiment','polarity'],dtype='object')

#For us,our input data is the 'sentence' column and our label is the 'polarity' column (0,1 for negative and positive,respectively)

DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels,i.e. True,False or 0,1 or 'dog','cat'
label_list = [0,1]


#Data Preprocessing We'll need to transform our data into a format berT understands. This involves two steps. First,we create InputExample's using the constructor provided in the berT library.
#text_a is the text we want to classify,which in this case,is the Request field in our Dataframe. 
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task,so we can leave text_b blank.
#label is the label for our example,False

# Use the InputExample class from berT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,# Globally unique ID for bookkeeping,unused in this example
                                                                text_a = x[DATA_COLUMN],text_b = None,label = x[LABEL_COLUMN]),axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,text_a = x[DATA_COLUMN],axis = 1)



# This is a path to an uncased (all lowercase) version of berT
berT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"

def create_tokenizer_from_hub_module():
    with tf.Graph().as_default():
        bert_module = hub.Module(berT_MODEL_HUB)
        tokenization_info = bert_module(signature="tokenization_info",as_dict=True)
        with tf.Session() as sess:
                vocab_file,do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])      
    return bert.tokenization.FullTokenizer(vocab_file=vocab_file,do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

tokenizer.tokenize("This here's an example of using the berT tokenizer")

# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that berT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples,label_list,MAX_SEQ_LENGTH,tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples,tokenizer)

#Creating a model

def create_model(is_predicting,input_ids,input_mask,segment_ids,num_labels):
#Creates a classification model.
    bert_module = hub.Module(berT_MODEL_HUB,trainable=True)
    bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
    bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)

# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    hidden_size = output_layer.shape[-1].value

# Create our own layer to tune for politeness data.  
    output_weights = tf.get_variable("output_weights",[num_labels,hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias",[num_labels],initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer,keep_prob=0.9)

        logits = tf.matmul(output_layer,output_weights,transpose_b=True)
        logits = tf.nn.bias_add(logits,output_bias)
        log_probs = tf.nn.log_softmax(logits,axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels,depth=num_labels,dtype=tf.float32)
        predicted_labels = tf.squeeze(tf.argmax(log_probs,axis=-1,output_type=tf.int32))
        # If we're predicting,we want predicted labels and the probabilities.
        if is_predicting:
            return (predicted_labels,log_probs)

        # If we're train/eval,compute loss between predicted and actual label
    per_example_loss = tf.reduce_sum(one_hot_labels * log_probs,axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss,predicted_labels,log_probs)


'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training,evaluation,and prediction.'''

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels,learning_rate,etc.
def model_fn_builder(num_labels,num_train_steps,num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features,mode,params):  # pylint: disable=unused-argument
    #"""The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)

    # TRAIN and EVAL
        if not is_predicting:

            (loss,log_probs) = create_model(is_predicting,label_ids,num_labels)
            train_op = bert.optimization.create_optimizer(loss,num_warmup_steps,use_tpu=False)

# Calculate evaluation metrics. 
            def metric_fn(label_ids,predicted_labels):
                        accuracy = tf.metrics.accuracy(label_ids,predicted_labels)
                        f1_score = tf.contrib.metrics.f1_score(
                            label_ids,predicted_labels)
                        auc = tf.metrics.auc(
                            label_ids,predicted_labels)
                        recall = tf.metrics.recall(
                            label_ids,predicted_labels)
                        precision = tf.metrics.precision(
                            label_ids,predicted_labels) 
                        true_pos = tf.metrics.true_positives(
                            label_ids,predicted_labels)
                        true_neg = tf.metrics.true_negatives(
                            label_ids,predicted_labels)   
                        false_pos = tf.metrics.false_positives(
                            label_ids,predicted_labels)  
                        false_neg = tf.metrics.false_negatives(
                            label_ids,predicted_labels)
                        return {
                            "eval_accuracy": accuracy,"f1_score": f1_score,"auc": auc,"precision": precision,"recall": recall,"true_positives": true_pos,"true_negatives": true_neg,"false_positives": false_pos,"false_negatives": false_neg
                        }

                        eval_metrics = metric_fn(label_ids,predicted_labels)

                        if mode == tfe.estimator.ModeKeys.TRAIN:
                            return tfe.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
                        else:
                            return tfe.estimator.EstimatorSpec(mode=mode,eval_metric_ops=eval_metrics)
        else:
            (predicted_labels,num_labels)
            predictions = {'probabilities': log_probs,'labels': predicted_labels}
            return tfe.estimator.EstimatorSpec(mode,predictions=predictions)

# Return the actual model function in the closure
    return model_fn

# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.Runconfig(
    model_dir=OUTPUT_DIR,save_summary_steps=SAVE_SUMMARY_STEPS,save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

model_fn = model_fn_builder(
  num_labels=len(label_list),learning_rate=LEARNING_RATE,num_train_steps=num_train_steps,num_warmup_steps=num_warmup_steps)

estimator = tfe.estimator.Estimator(
  model_fn=model_fn,config=run_config,params={"batch_size": BATCH_SIZE}
  )

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,seq_length=MAX_SEQ_LENGTH,is_training=True,drop_remainder=False)

#Now we train our model! For me,using a colab notebook running on Google's GPUs,my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
print("Training took time ",datetime.now() - current_time)

#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,is_training=False,drop_remainder=False)

estimator.evaluate(input_fn=test_input_fn,steps=None)

def getPrediction(in_sentences):
    labels = ["Negative","Positive"]
    input_examples = [run_classifier.InputExample(guid="",text_a = x,label = 0) for x in in_sentences] # here,"" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples,tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features,drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence,prediction['probabilities'],labels[prediction['labels']]) for sentence,prediction in zip(in_sentences,predictions)]




pred_sentences = [
  "That movie was absolutely awful","The acting was a bit lacking","The film was creative and surprising","Absolutely fantastic!"
]

predictions = getPrediction(pred_sentences)

predictions

estimator.train抛出ValueError：model_fn应该返回EstimatorSpec

visionhansome 回答：estimator.train抛出ValueError：model_fn应该返回EstimatorSpec

大家都在问