model_fn = model_fn_builder(
num_labels=len(label_list) + 1,
def file_based_input_fn_builder(input_file, seq_length, is_training, drop_remainder):
name_to_features = {
"input_ids": tf.FixedLenFeature([seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
"label_ids": tf.FixedLenFeature([seq_length], tf.int64),
def _decode_record(record, name_to_features):
example = tf.parse_single_example(record, name_to_features)
for name in list(example.keys()):
t = example[name]
if t.dtype == tf.int64:
t = tf.to_int32(t)
example[name] = t
return example
def input_fn(params):
params["batch_size"] = 32
batch_size = params["batch_size"]
d = tf.data.TFRecordDataset(input_file)
if is_training:
d = d.repeat()
d = d.shuffle(buffer_size=300)
d = d.apply(tf.contrib.data.map_and_batch(
lambda record: _decode_record(record, name_to_features),
return d
return input_fn
train_input_fn = file_based_input_fn_builder(
num_train_size = len(train_examples)
tf.logging.info("***** Running training *****")
tf.logging.info(" Num examples = %d", num_train_size)
tf.logging.info(" Batch size = %d", batch_size)
tf.logging.info(" Num steps = %d", num_train_steps)
estimator = tf.estimator.Estimator(
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
INFO:tensorflow:***** Running training *****
INFO:tensorflow: Num examples = 20864
INFO:tensorflow: Batch size = 64
INFO:tensorflow: Num steps = 1630
INFO:tensorflow:Using config: {'_model_dir': './ner/output', '_tf_random_seed': None, '_save_summary_steps': 1000, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f68463962b0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From <ipython-input-8-ff8f40149364>:37: map_and_batch (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.experimental.map_and_batch(...)`.
WARNING:tensorflow:From <ipython-input-8-ff8f40149364>:23: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:*** Features ***
INFO:tensorflow: name = input_ids, shape = (32, 128)
INFO:tensorflow: name = input_mask, shape = (32, 128)
INFO:tensorflow: name = label_ids, shape = (32, 128)
INFO:tensorflow: name = segment_ids, shape = (32, 128)
WARNING:tensorflow:From /home/ma-user/work/ma_share/nlp_ner_0/ner/bert/modeling.py:358: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /home/ma-user/work/ma_share/nlp_ner_0/ner/bert/modeling.py:671: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
shape of input_ids (32, 128)
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/contrib/crf/python/ops/crf.py:213: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/training/learning_rate_decay_v2.py:321: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into ./ner/output/model.ckpt.
INFO:tensorflow:loss = 138.63965, step = 0
INFO:tensorflow:global_steps = 0, loss = 138.63965
INFO:tensorflow:global_step/sec: 2.12094
INFO:tensorflow:loss = 36.574963, step = 100 (47.149 sec)
INFO:tensorflow:global_steps = 100, loss = 36.574963 (47.149 sec)
INFO:tensorflow:global_step/sec: 2.82263
INFO:tensorflow:loss = 30.633581, step = 200 (35.428 sec)
INFO:tensorflow:global_steps = 200, loss = 30.633581 (35.428 sec)
INFO:tensorflow:global_step/sec: 2.8278
INFO:tensorflow:loss = 39.07569, step = 300 (35.366 sec)
INFO:tensorflow:global_steps = 300, loss = 39.07569 (35.366 sec)
INFO:tensorflow:global_step/sec: 2.81413
INFO:tensorflow:loss = 41.826813, step = 400 (35.533 sec)
INFO:tensorflow:global_steps = 400, loss = 41.826813 (35.533 sec)
INFO:tensorflow:global_step/sec: 2.82309
INFO:tensorflow:loss = 35.313362, step = 500 (35.422 sec)
INFO:tensorflow:global_steps = 500, loss = 35.313362 (35.422 sec)
INFO:tensorflow:global_step/sec: 2.82153
INFO:tensorflow:loss = 33.8309, step = 600 (35.442 sec)
INFO:tensorflow:global_steps = 600, loss = 33.8309 (35.442 sec)
INFO:tensorflow:global_step/sec: 2.82503
INFO:tensorflow:loss = 32.518703, step = 700 (35.398 sec)
INFO:tensorflow:global_steps = 700, loss = 32.518703 (35.399 sec)
INFO:tensorflow:global_step/sec: 2.81644
INFO:tensorflow:loss = 35.52353, step = 800 (35.505 sec)
INFO:tensorflow:global_steps = 800, loss = 35.52353 (35.505 sec)
INFO:tensorflow:global_step/sec: 2.82622
INFO:tensorflow:loss = 37.82669, step = 900 (35.383 sec)
INFO:tensorflow:global_steps = 900, loss = 37.82669 (35.383 sec)
INFO:tensorflow:Saving checkpoints for 1000 into ./ner/output/model.ckpt.
INFO:tensorflow:global_step/sec: 2.20189
INFO:tensorflow:loss = 30.113663, step = 1000 (45.416 sec)
INFO:tensorflow:global_steps = 1000, loss = 30.113663 (45.415 sec)
INFO:tensorflow:global_step/sec: 2.82651
INFO:tensorflow:loss = 35.227222, step = 1100 (35.381 sec)
INFO:tensorflow:global_steps = 1100, loss = 35.227222 (35.381 sec)
INFO:tensorflow:global_step/sec: 2.82383
INFO:tensorflow:loss = 32.62957, step = 1200 (35.411 sec)
INFO:tensorflow:global_steps = 1200, loss = 32.62957 (35.411 sec)
INFO:tensorflow:global_step/sec: 2.82625
INFO:tensorflow:loss = 33.095165, step = 1300 (35.383 sec)
INFO:tensorflow:global_steps = 1300, loss = 33.095165 (35.383 sec)
INFO:tensorflow:global_step/sec: 2.82021
INFO:tensorflow:loss = 31.56667, step = 1400 (35.458 sec)
INFO:tensorflow:global_steps = 1400, loss = 31.56667 (35.458 sec)
INFO:tensorflow:global_step/sec: 2.82301
INFO:tensorflow:loss = 30.289349, step = 1500 (35.425 sec)
INFO:tensorflow:global_steps = 1500, loss = 30.289349 (35.425 sec)
INFO:tensorflow:global_step/sec: 2.82841
INFO:tensorflow:loss = 32.09925, step = 1600 (35.354 sec)
INFO:tensorflow:global_steps = 1600, loss = 32.09925 (35.354 sec)
INFO:tensorflow:Saving checkpoints for 1630 into ./ner/output/model.ckpt.
INFO:tensorflow:Loss for final step: 34.578945.
<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7f684fdad6d8>
eval_examples = processor.get_dev_examples(data_dir)
eval_file = os.path.join(output_dir, "eval.tf_record")
eval_examples, label_list, max_seq_length, tokenizer, eval_file)
data_config['eval.tf_record_path'] = eval_file
data_config['num_eval_size'] = len(eval_examples)
num_eval_size = data_config.get('num_eval_size', 0)
tf.logging.info("***** Running evaluation *****")
tf.logging.info(" Num examples = %d", num_eval_size)
tf.logging.info(" Batch size = %d", batch_size)
eval_steps = None
eval_drop_remainder = False
eval_input_fn = file_based_input_fn_builder(
result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
output_eval_file = os.path.join(output_dir, "eval_results.txt")
with codecs.open(output_eval_file, "w", encoding='utf-8') as writer:
tf.logging.info("***** Eval results *****")
for key in sorted(result.keys()):
tf.logging.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key])))
if not os.path.exists(data_config_path):
with codecs.open(data_config_path, 'a', encoding='utf-8') as fd:
json.dump(data_config, fd)
INFO:tensorflow:Writing example 0 of 4631
INFO:tensorflow:***** Running evaluation *****
INFO:tensorflow: Num examples = 4631
INFO:tensorflow: Batch size = 64
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:*** Features ***
INFO:tensorflow: name = input_ids, shape = (?, 128)
INFO:tensorflow: name = input_mask, shape = (?, 128)
INFO:tensorflow: name = label_ids, shape = (?, 128)
INFO:tensorflow: name = segment_ids, shape = (?, 128)
shape of input_ids (?, 128)
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/ops/metrics_impl.py:363: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-03-11T07:52:30Z
INFO:tensorflow:Graph was finalized.
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./ner/output/model.ckpt-1630
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2021-03-11-07:52:57
INFO:tensorflow:Saving dict for global step 1630: eval_loss = 0.040791675, global_step = 1630, loss = 27.17968
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1630: ./ner/output/model.ckpt-1630
INFO:tensorflow:***** Eval results *****
INFO:tensorflow: eval_loss = 0.040791675
INFO:tensorflow: global_step = 1630
INFO:tensorflow: loss = 27.17968
token_path = os.path.join(output_dir, "token_test.txt")
if os.path.exists(token_path):
with codecs.open(os.path.join(output_dir, 'label2id.pkl'), 'rb') as rf:
label2id = pickle.load(rf)
id2label = {value: key for key, value in label2id.items()}
predict_examples = processor.get_test_examples(data_dir)
predict_file = os.path.join(output_dir, "predict.tf_record")
filed_based_convert_examples_to_features(predict_examples, label_list,
max_seq_length, tokenizer,
predict_file, mode="test")
tf.logging.info("***** Running prediction*****")
tf.logging.info(" Num examples = %d", len(predict_examples))
tf.logging.info(" Batch size = %d", batch_size)
predict_drop_remainder = False
predict_input_fn = file_based_input_fn_builder(
predicted_result = estimator.evaluate(input_fn=predict_input_fn)
output_eval_file = os.path.join(output_dir, "predicted_results.txt")
with codecs.open(output_eval_file, "w", encoding='utf-8') as writer:
tf.logging.info("***** Predict results *****")
for key in sorted(predicted_result.keys()):
tf.logging.info(" %s = %s", key, str(predicted_result[key]))
writer.write("%s = %s\n" % (key, str(predicted_result[key])))
result = estimator.predict(input_fn=predict_input_fn)
output_predict_file = os.path.join(output_dir, "label_test.txt")
def result_to_pair(writer):
for predict_line, prediction in zip(predict_examples, result):
idx = 0
line = ''
line_token = str(predict_line.text).split(' ')
label_token = str(predict_line.label).split(' ')
if len(line_token) != len(label_token):
for id in prediction:
if id == 0:
curr_labels = id2label[id]
if curr_labels in ['[CLS]', '[SEP]']:
line += line_token[idx] + ' ' + label_token[idx] + ' ' + curr_labels + '\n'
except Exception as e:
line = ''
idx += 1
writer.write(line + '\n')
from ner.src.conlleval import return_report
with codecs.open(output_predict_file, 'w', encoding='utf-8') as writer:
eval_result = return_report(output_predict_file)
for line in eval_result:
INFO:tensorflow:Writing example 0 of 68
INFO:tensorflow:***** Running prediction*****
INFO:tensorflow: Num examples = 68
INFO:tensorflow: Batch size = 64
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:*** Features ***
INFO:tensorflow: name = input_ids, shape = (?, 128)
INFO:tensorflow: name = input_mask, shape = (?, 128)
INFO:tensorflow: name = label_ids, shape = (?, 128)
INFO:tensorflow: name = segment_ids, shape = (?, 128)
shape of input_ids (?, 128)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-03-11T07:53:01Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./ner/output/model.ckpt-1630
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2021-03-11-07:53:04
INFO:tensorflow:Saving dict for global step 1630: eval_loss = 0.007008272, global_step = 1630, loss = 25.96924
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1630: ./ner/output/model.ckpt-1630
INFO:tensorflow:***** Predict results *****
INFO:tensorflow: eval_loss = 0.007008272
INFO:tensorflow: global_step = 1630
INFO:tensorflow: loss = 25.96924
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:*** Features ***
INFO:tensorflow: name = input_ids, shape = (?, 128)
INFO:tensorflow: name = input_mask, shape = (?, 128)
INFO:tensorflow: name = label_ids, shape = (?, 128)
INFO:tensorflow: name = segment_ids, shape = (?, 128)
shape of input_ids (?, 128)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./ner/output/model.ckpt-1630
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
processed 2270 tokens with 78 phrases; found: 76 phrases; correct: 67.
accuracy: 99.47%; precision: 88.16%; recall: 85.90%; FB1: 87.01
LOC: precision: 100.00%; recall: 100.00%; FB1: 100.00 45
ORG: precision: 77.78%; recall: 87.50%; FB1: 82.35 9
PER: precision: 68.18%; recall: 60.00%; FB1: 63.83 22
11.命名实体识别效果测试 —— 交互式测试方式
- 找到本页面顶部的菜单栏,点击 Kernel -> Restart;
- 执行下面的 “%run ner/src/terminal_predict.py” 命令;
- 手动输入一个句子,按回车进行预测;
- 如果想结束测试,则输入“再见”。
%run ner/src/terminal_predict.py
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
/home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
checkpoint path:./ner/output/checkpoint
going to restore checkpoint
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /home/ma-user/work/ma_share/nlp_ner_0/ner/bert/modeling.py:671: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/contrib/crf/python/ops/crf.py:567: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py:626: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
WARNING:tensorflow:From /home/ma-user/anaconda3/envs/TensorFlow-1.13.1/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./ner/output/model.ckpt-1630
{1: 'O', 2: 'B-PER', 3: 'I-PER', 4: 'B-ORG', 5: 'I-ORG', 6: 'B-LOC', 7: 'I-LOC', 8: 'X', 9: '[CLS]', 10: '[SEP]'}
[['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'B-PER', 'I-PER', 'I-PER', 'O', 'O', 'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'O', 'I-LOC', 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O']]
LOC, 人民大会堂, 楼, 金色, 中
PER, 李克强
ORG, 第十三届全国人大, 国务院
time used: 0.831476 sec
