在TensorFlow RNN 深度学习下 BiLSTM+CRF 实现 sequence labeling
双向LSTM+CRF序列标注问题
源码
去年底样子一直在做NLP相关task,是个关于序列标注问题。这sequence labeling属于NLP的经典问题了,开始尝试用HMM,哦不,用CRF做baseline,by the way,用的CRF++。
关于CRF的理论就不再啰嗦了,街货。顺便提下,CRF比HMM在理论上以及实际效果上都要好不少。但我要说的是CRF跑我这task还是不太乐观。P值0.6样子,R低的离谱,所以F1很不乐观。mentor告诉我说是特征不足,师兄说是这个task本身就比较难做,F1低算是正常了。
CRF做完baseline后,一直在着手用BiLSTM+CRF跑sequence labeling,奈何项目繁多,没有多余的精力去按照正常的计划做出来。后来还是一点一点的,按照大牛们的步骤以及参考现有的代码,把BiLSTM+CRF的实现拿下了。后来发现,跑出来的效果也不太理想……可能是这个task确实变态……抑或模型还要加强吧~
这里对比下CRF与LSTM的cell,先说RNN吧,RNN其实是比CNN更适合做序列问题的模型,RNN隐层当前时刻的输入有一部分是前一时刻的隐层输出,这使得他能通过循环反馈连接看到前面的信息,将一段序列的前面的context capture 过来参与此刻的计算,并且还具备非线性的拟合能力,这都是CRF无法超越的地方。而LSTM的cell很好的将RNN的梯度弥散问题优化解决了,他对门卫gate说:老兄,有的不太重要的信息,你该忘掉就忘掉吧,免得占用现在的资源。而双向LSTM就更厉害了,不仅看得到过去,还能将未来的序列考虑进来,使得上下文信息充分被利用。而CRF,他不像LSTM能够考虑长远的上下文信息,它更多地考虑整个句子的局部特征的线性加权组合(通过特征模板扫描整个句子),特别的一点,他计算的是联合概率,优化了整个序列,而不是拼接每个时刻的最优值。那么,将BILSTM与CRF一起就构成了还比较不错的组合,这目前也是学术界的流行做法~
另外针对目前的跑通结果提几个改进点:
1.+CNN,通过CNN的卷积操作去提取英文单词的字母细节。
2.+char representation,作用与上相似,提取更细粒度的细节。
3.more joint model to go.
fine,叨了不少。codes time:
完整代码以及相关预处理的数据请移步github:scofiled's github/bilstm+crf
requirements:
ubuntu14
python2.7
tensorflow 0.8
numpy
pandas0.15
BILSTM_CRF.py
import math
import helper
import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn,rnn_cell
class BILSTM_CRF(object):
def __init__(self,num_chars,num_classes,num_steps=200,num_epochs=100,embedding_matrix=None,is_training=True,is_crf=True,weight=False):
# Parameter
self.max_f1 = 0
self.learning_rate = 0.002
self.dropout_rate = 0.5
self.batch_size = 128
self.num_layers = 1
self.emb_dim = 100
self.hidden_dim = 100
self.num_epochs = num_epochs
self.num_steps = num_steps
self.num_chars = num_chars
self.num_classes = num_classes
# placeholder of x,y and weight
self.inputs = tf.placeholder(tf.int32,[None,self.num_steps])
self.targets = tf.placeholder(tf.int32,self.num_steps])
self.targets_weight = tf.placeholder(tf.float32,self.num_steps])
self.targets_transition = tf.placeholder(tf.int32,[None])
# char embedding
if embedding_matrix != None:
self.embedding = tf.Variable(embedding_matrix,trainable=False,name="emb",dtype=tf.float32)
else:
self.embedding = tf.get_variable("emb",[self.num_chars,self.emb_dim])
self.inputs_emb = tf.nn.embedding_lookup(self.embedding,self.inputs)
self.inputs_emb = tf.transpose(self.inputs_emb,[1,2])
self.inputs_emb = tf.reshape(self.inputs_emb,[-1,self.emb_dim])
self.inputs_emb = tf.split(0,self.num_steps,self.inputs_emb)
# lstm cell
lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)
lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)
# dropout
if is_training:
lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw,output_keep_prob=(1 - self.dropout_rate))
lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw,output_keep_prob=(1 - self.dropout_rate))
lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers)
lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers)
# get the length of each sample
self.length = tf.reduce_sum(tf.sign(self.inputs),reduction_indices=1)
self.length = tf.cast(self.length,tf.int32)
# forward and backward
self.outputs,_,_ = rnn.bidirectional_rnn(
lstm_cell_fw,lstm_cell_bw,self.inputs_emb,dtype=tf.float32,sequence_length=self.length
)
# softmax
self.outputs = tf.reshape(tf.concat(1,self.outputs),self.hidden_dim * 2])
self.softmax_w = tf.get_variable("softmax_w",[self.hidden_dim * 2,self.num_classes])
self.softmax_b = tf.get_variable("softmax_b",[self.num_classes])
self.logits = tf.matmul(self.outputs,self.softmax_w) + self.softmax_b
if not is_crf:
pass
else:
self.tags_scores = tf.reshape(self.logits,[self.batch_size,self.num_classes])
self.transitions = tf.get_variable("transitions",[self.num_classes + 1,self.num_classes + 1])
dummy_val = -1000
class_pad = tf.Variable(dummy_val * np.ones((self.batch_size,1)),dtype=tf.float32)
self.observations = tf.concat(2,[self.tags_scores,class_pad])
begin_vec = tf.Variable(np.array([[dummy_val] * self.num_classes + [0] for _ in range(self.batch_size)]),dtype=tf.float32)
end_vec = tf.Variable(np.array([[0] + [dummy_val] * self.num_classes for _ in range(self.batch_size)]),dtype=tf.float32)
begin_vec = tf.reshape(begin_vec,1,self.num_classes + 1])
end_vec = tf.reshape(end_vec,self.num_classes + 1])
self.observations = tf.concat(1,[begin_vec,self.observations,end_vec])
self.mask = tf.cast(tf.reshape(tf.sign(self.targets),[self.batch_size * self.num_steps]),tf.float32)
# point score
self.point_score = tf.gather(tf.reshape(self.tags_scores,[-1]),tf.range(0,self.batch_size * self.num_steps) * self.num_classes + tf.reshape(self.targets,[self.batch_size * self.num_steps]))
self.point_score *= self.mask
# transition score
self.trans_score = tf.gather(tf.reshape(self.transitions,self.targets_transition)
# real score
self.target_path_score = tf.reduce_sum(self.point_score) + tf.reduce_sum(self.trans_score)
# all path score
self.total_path_score,self.max_scores,self.max_scores_pre = self.forward(self.observations,self.transitions,self.length)
# loss
self.loss = - (self.target_path_score - self.total_path_score)
# summary
self.train_summary = tf.scalar_summary("loss",self.loss)
self.val_summary = tf.scalar_summary("loss",self.loss)
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
def logsumexp(self,x,axis=None):
x_max = tf.reduce_max(x,reduction_indices=axis,keep_dims=True)
x_max_ = tf.reduce_max(x,reduction_indices=axis)
return x_max_ + tf.log(tf.reduce_sum(tf.exp(x - x_max),reduction_indices=axis))
def forward(self,observations,transitions,length,is_viterbi=True,return_best_seq=True):
length = tf.reshape(length,[self.batch_size])
transitions = tf.reshape(tf.concat(0,[transitions] * self.batch_size),6,6])
observations = tf.reshape(observations,self.num_steps + 2,1])
observations = tf.transpose(observations,2,3])
prevIoUs = observations[0,:,:]
max_scores = []
max_scores_pre = []
alphas = [prevIoUs]
for t in range(1,self.num_steps + 2):
prevIoUs = tf.reshape(prevIoUs,1])
current = tf.reshape(observations[t,:],6])
alpha_t = prevIoUs + current + transitions
if is_viterbi:
max_scores.append(tf.reduce_max(alpha_t,reduction_indices=1))
max_scores_pre.append(tf.argmax(alpha_t,dimension=1))
alpha_t = tf.reshape(self.logsumexp(alpha_t,axis=1),1])
alphas.append(alpha_t)
prevIoUs = alpha_t
alphas = tf.reshape(tf.concat(0,alphas),[self.num_steps + 2,self.batch_size,1])
alphas = tf.transpose(alphas,3])
alphas = tf.reshape(alphas,[self.batch_size * (self.num_steps + 2),1])
last_alphas = tf.gather(alphas,self.batch_size) * (self.num_steps + 2) + length)
last_alphas = tf.reshape(last_alphas,1])
max_scores = tf.reshape(tf.concat(0,max_scores),(self.num_steps + 1,6))
max_scores_pre = tf.reshape(tf.concat(0,max_scores_pre),6))
max_scores = tf.transpose(max_scores,2])
max_scores_pre = tf.transpose(max_scores_pre,2])
return tf.reduce_sum(self.logsumexp(last_alphas,axis=1)),max_scores,max_scores_pre
def train(self,sess,save_file,X_train,y_train,X_val,y_val):
saver = tf.train.Saver()
char2id,id2char = helper.loadMap("char2id")
label2id,id2label = helper.loadMap("label2id")
merged = tf.merge_all_summaries()
summary_writer_train = tf.train.SummaryWriter('loss_log/train_loss',sess.graph)
summary_writer_val = tf.train.SummaryWriter('loss_log/val_loss',sess.graph)
num_iterations = int(math.ceil(1.0 * len(X_train) / self.batch_size))
cnt = 0
for epoch in range(self.num_epochs):
# shuffle train in each epoch
sh_index = np.arange(len(X_train))
np.random.shuffle(sh_index)
X_train = X_train[sh_index]
y_train = y_train[sh_index]
print "current epoch: %d" % (epoch)
for iteration in range(num_iterations):
# train
X_train_batch,y_train_batch = helper.nextBatch(X_train,start_index=iteration * self.batch_size,batch_size=self.batch_size)
y_train_weight_batch = 1 + np.array((y_train_batch == label2id['B']) | (y_train_batch == label2id['E']),float)
transition_batch = helper.getTransition(y_train_batch)
_,loss_train,max_scores_pre,train_summary =\
sess.run([
self.optimizer,self.loss,self.max_scores_pre,self.length,self.train_summary
],Feed_dict={
self.targets_transition:transition_batch,self.inputs:X_train_batch,self.targets:y_train_batch,self.targets_weight:y_train_weight_batch
})
predicts_train = self.viterbi(max_scores,predict_size=self.batch_size)
if iteration % 10 == 0:
cnt += 1
precision_train,recall_train,f1_train = self.evaluate(X_train_batch,y_train_batch,predicts_train,id2char,id2label)
summary_writer_train.add_summary(train_summary,cnt)
print "iteration: %5d,train loss: %5d,train precision: %.5f,train recall: %.5f,train f1: %.5f" % (iteration,precision_train,f1_train)
# validation
if iteration % 100 == 0:
X_val_batch,y_val_batch = helper.nextRandomBatch(X_val,y_val,batch_size=self.batch_size)
y_val_weight_batch = 1 + np.array((y_val_batch == label2id['B']) | (y_val_batch == label2id['E']),float)
transition_batch = helper.getTransition(y_val_batch)
loss_val,val_summary =\
sess.run([
self.loss,self.val_summary
],Feed_dict={
self.targets_transition:transition_batch,self.inputs:X_val_batch,self.targets:y_val_batch,self.targets_weight:y_val_weight_batch
})
predicts_val = self.viterbi(max_scores,predict_size=self.batch_size)
precision_val,recall_val,f1_val = self.evaluate(X_val_batch,y_val_batch,predicts_val,id2label)
summary_writer_val.add_summary(val_summary,valid loss: %5d,valid precision: %.5f,valid recall: %.5f,valid f1: %.5f" % (iteration,loss_val,precision_val,f1_val)
if f1_val > self.max_f1:
self.max_f1 = f1_val
save_path = saver.save(sess,save_file)
print "saved the best model with f1: %.5f" % (self.max_f1)
def test(self,X_test,X_test_str,output_path):
char2id,id2label = helper.loadMap("label2id")
num_iterations = int(math.ceil(1.0 * len(X_test) / self.batch_size))
print "number of iteration: " + str(num_iterations)
with open(output_path,"wb") as outfile:
for i in range(num_iterations):
print "iteration: " + str(i + 1)
results = []
X_test_batch = X_test[i * self.batch_size : (i + 1) * self.batch_size]
X_test_str_batch = X_test_str[i * self.batch_size : (i + 1) * self.batch_size]
if i == num_iterations - 1 and len(X_test_batch) < self.batch_size:
X_test_batch = list(X_test_batch)
X_test_str_batch = list(X_test_str_batch)
last_size = len(X_test_batch)
X_test_batch += [[0 for j in range(self.num_steps)] for i in range(self.batch_size - last_size)]
X_test_str_batch += [['x' for j in range(self.num_steps)] for i in range(self.batch_size - last_size)]
X_test_batch = np.array(X_test_batch)
X_test_str_batch = np.array(X_test_str_batch)
results = self.predictBatch(sess,X_test_batch,X_test_str_batch,id2label)
results = results[:last_size]
else:
X_test_batch = np.array(X_test_batch)
results = self.predictBatch(sess,id2label)
for i in range(len(results)):
doc = ''.join(X_test_str_batch[i])
outfile.write(doc + "<@>" +" ".join(results[i]).encode("utf-8") + "\n")
def viterbi(self,predict_size=128):
best_paths = []
for m in range(predict_size):
path = []
last_max_node = np.argmax(max_scores[m][length[m]])
# last_max_node = 0
for t in range(1,length[m] + 1)[::-1]:
last_max_node = max_scores_pre[m][t][last_max_node]
path.append(last_max_node)
path = path[::-1]
best_paths.append(path)
return best_paths
def predictBatch(self,X,X_str,id2label):
results = []
length,max_scores_pre = sess.run([self.length,self.max_scores_pre],Feed_dict={self.inputs:X})
predicts = self.viterbi(max_scores,self.batch_size)
for i in range(len(predicts)):
x = ''.join(X_str[i]).decode("utf-8")
y_pred = ''.join([id2label[val] for val in predicts[i] if val != 5 and val != 0])
entitys = helper.extractEntity(x,y_pred)
results.append(entitys)
return results
def evaluate(self,y_true,y_pred,id2label):
precision = -1.0
recall = -1.0
f1 = -1.0
hit_num = 0
pred_num = 0
true_num = 0
for i in range(len(y_true)):
x = ''.join([str(id2char[val].encode("utf-8")) for val in X[i]])
y = ''.join([str(id2label[val].encode("utf-8")) for val in y_true[i]])
y_hat = ''.join([id2label[val] for val in y_pred[i] if val != 5])
true_labels = helper.extractEntity(x,y)
pred_labels = helper.extractEntity(x,y_hat)
hit_num += len(set(true_labels) & set(pred_labels))
pred_num += len(set(pred_labels))
true_num += len(set(true_labels))
if pred_num != 0:
precision = 1.0 * hit_num / pred_num
if true_num != 0:
recall = 1.0 * hit_num / true_num
if precision > 0 and recall > 0:
f1 = 2.0 * (precision * recall) / (precision + recall)
return precision,recall,f1
util.py
#encoding:utf-8
import re
import os
import csv
import time
import pickle
import numpy as np
import pandas as pd
def getEmbedding(infile_path="embedding"):
char2id,id_char = loadMap("char2id")
row_index = 0
with open(infile_path,"rb") as infile:
for row in infile:
row = row.strip()
row_index += 1
if row_index == 1:
num_chars = int(row.split()[0])
emb_dim = int(row.split()[1])
emb_matrix = np.zeros((len(char2id.keys()),emb_dim))
continue
items = row.split()
char = items[0]
emb_vec = [float(val) for val in items[1:]]
if char in char2id:
emb_matrix[char2id[char]] = emb_vec
return emb_matrix
def nextBatch(X,y,start_index,batch_size=128):
last_index = start_index + batch_size
X_batch = list(X[start_index:min(last_index,len(X))])
y_batch = list(y[start_index:min(last_index,len(X))])
if last_index > len(X):
left_size = last_index - (len(X))
for i in range(left_size):
index = np.random.randint(len(X))
X_batch.append(X[index])
y_batch.append(y[index])
X_batch = np.array(X_batch)
y_batch = np.array(y_batch)
return X_batch,y_batch
def nextRandomBatch(X,batch_size=128):
X_batch = []
y_batch = []
for i in range(batch_size):
index = np.random.randint(len(X))
X_batch.append(X[index])
y_batch.append(y[index])
X_batch = np.array(X_batch)
y_batch = np.array(y_batch)
return X_batch,y_batch
# use "0" to padding the sentence
def padding(sample,seq_max_len):
for i in range(len(sample)):
if len(sample[i]) < seq_max_len:
sample[i] += [0 for _ in range(seq_max_len - len(sample[i]))]
return sample
def prepare(chars,labels,seq_max_len,is_padding=True):
X = []
y = []
tmp_x = []
tmp_y = []
for record in zip(chars,labels):
c = record[0]
l = record[1]
# empty line
if c == -1:
if len(tmp_x) <= seq_max_len:
X.append(tmp_x)
y.append(tmp_y)
tmp_x = []
tmp_y = []
else:
tmp_x.append(c)
tmp_y.append(l)
if is_padding:
X = np.array(padding(X,seq_max_len))
else:
X = np.array(X)
y = np.array(padding(y,seq_max_len))
return X,y
def extractEntity(sentence,labels):
entitys = []
re_entity = re.compile(r'BM*E')
m = re_entity.search(labels)
while m:
entity_labels = m.group()
start_index = labels.find(entity_labels)
entity = sentence[start_index:start_index + len(entity_labels)]
labels = list(labels)
# replace the "BM*E" with "OO*O"
labels[start_index: start_index + len(entity_labels)] = ['O' for i in range(len(entity_labels))]
entitys.append(entity)
labels = ''.join(labels)
m = re_entity.search(labels)
return entitys
def loadMap(token2id_filepath):
if not os.path.isfile(token2id_filepath):
print "file not exist,building map"
buildMap()
token2id = {}
id2token = {}
with open(token2id_filepath) as infile:
for row in infile:
row = row.rstrip().decode("utf-8")
token = row.split('\t')[0]
token_id = int(row.split('\t')[1])
token2id[token] = token_id
id2token[token_id] = token
return token2id,id2token
def saveMap(id2char,id2label):
with open("char2id","wb") as outfile:
for idx in id2char:
outfile.write(id2char[idx] + "\t" + str(idx) + "\r\n")
with open("label2id","wb") as outfile:
for idx in id2label:
outfile.write(id2label[idx] + "\t" + str(idx) + "\r\n")
print "saved map between token and id"
def buildMap(train_path="train.in"):
df_train = pd.read_csv(train_path,delimiter='\t',quoting=csv.QUOTE_NONE,skip_blank_lines=False,header=None,names=["char","label"])
chars = list(set(df_train["char"][df_train["char"].notnull()]))
labels = list(set(df_train["label"][df_train["label"].notnull()]))
char2id = dict(zip(chars,range(1,len(chars) + 1)))
label2id = dict(zip(labels,len(labels) + 1)))
id2char = dict(zip(range(1,len(chars) + 1),chars))
id2label = dict(zip(range(1,len(labels) + 1),labels))
id2char[0] = "<PAD>"
id2label[0] = "<PAD>"
char2id["<PAD>"] = 0
label2id["<PAD>"] = 0
id2char[len(chars) + 1] = "<NEW>"
char2id["<NEW>"] = len(chars) + 1
saveMap(id2char,id2label)
return char2id,label2id,id2label
def getTrain(train_path,val_path,train_val_ratio=0.99,use_custom_val=False,seq_max_len=200):
char2id,id2label = buildMap(train_path)
df_train = pd.read_csv(train_path,"label"])
# map the char and label into id
df_train["char_id"] = df_train.char.map(lambda x : -1 if str(x) == str(np.nan) else char2id[x])
df_train["label_id"] = df_train.label.map(lambda x : -1 if str(x) == str(np.nan) else label2id[x])
# convert the data in maxtrix
X,y = prepare(df_train["char_id"],df_train["label_id"],seq_max_len)
# shuffle the samples
num_samples = len(X)
indexs = np.arange(num_samples)
np.random.shuffle(indexs)
X = X[indexs]
y = y[indexs]
if val_path != None:
X_train = X
y_train = y
X_val,y_val = getTest(val_path,is_validation=True,seq_max_len=seq_max_len)
else:
# split the data into train and validation set
X_train = X[:int(num_samples * train_val_ratio)]
y_train = y[:int(num_samples * train_val_ratio)]
X_val = X[int(num_samples * train_val_ratio):]
y_val = y[int(num_samples * train_val_ratio):]
print "train size: %d,validation size: %d" %(len(X_train),len(y_val))
return X_train,y_val
def getTest(test_path="test.in",is_validation=False,id2char = loadMap("char2id")
label2id,id2label = loadMap("label2id")
df_test = pd.read_csv(test_path,"label"])
def mapFunc(x,char2id):
if str(x) == str(np.nan):
return -1
elif x.decode("utf-8") not in char2id:
return char2id["<NEW>"]
else:
return char2id[x.decode("utf-8")]
df_test["char_id"] = df_test.char.map(lambda x:mapFunc(x,char2id))
df_test["label_id"] = df_test.label.map(lambda x : -1 if str(x) == str(np.nan) else label2id[x])
if is_validation:
X_test,y_test = prepare(df_test["char_id"],df_test["label_id"],seq_max_len)
return X_test,y_test
else:
df_test["char"] = df_test.char.map(lambda x : -1 if str(x) == str(np.nan) else x)
X_test,_ = prepare(df_test["char_id"],df_test["char_id"],seq_max_len)
X_test_str,_ = prepare(df_test["char"],is_padding=False)
print "test size: %d" %(len(X_test))
return X_test,X_test_str
def getTransition(y_train_batch):
transition_batch = []
for m in range(len(y_train_batch)):
y = [5] + list(y_train_batch[m]) + [0]
for t in range(len(y)):
if t + 1 == len(y):
continue
i = y[t]
j = y[t + 1]
if i == 0:
break
transition_batch.append(i * 6 + j)
transition_batch = np.array(transition_batch)
return transition_batch
train.py
import time
import helper
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf
from BILSTM_CRF import BILSTM_CRF
# python train.py train.in model -v validation.in -c char_emb -e 10 -g 2
parser = argparse.ArgumentParser()
parser.add_argument("train_path",help="the path of the train file")
parser.add_argument("save_path",help="the path of the saved model")
parser.add_argument("-v","--val_path",help="the path of the validation file",default=None)
parser.add_argument("-e","--epoch",help="the number of epoch",default=100,type=int)
parser.add_argument("-c","--char_emb",help="the char embedding file",default=None)
parser.add_argument("-g","--gpu",help="the id of gpu,the default is 0",default=0,type=int)
args = parser.parse_args()
train_path = args.train_path
save_path = args.save_path
val_path = args.val_path
num_epochs = args.epoch
emb_path = args.char_emb
gpu_config = "/cpu:0"
#gpu_config = "/gpu:"+str(args.gpu)
num_steps = 200 # it must consist with the test
start_time = time.time()
print "preparing train and validation data"
X_train,y_val = helper.getTrain(train_path=train_path,val_path=val_path,seq_max_len=num_steps)
char2id,id2char = helper.loadMap("char2id")
label2id,id2label = helper.loadMap("label2id")
num_chars = len(id2char.keys())
num_classes = len(id2label.keys())
if emb_path != None:
embedding_matrix = helper.getEmbedding(emb_path)
else:
embedding_matrix = None
print "building model"
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
with tf.device(gpu_config):
initializer = tf.random_uniform_initializer(-0.1,0.1)
with tf.variable_scope("model",reuse=None,initializer=initializer):
model = BILSTM_CRF(num_chars=num_chars,num_classes=num_classes,num_steps=num_steps,num_epochs=num_epochs,embedding_matrix=embedding_matrix,is_training=True)
print "training model"
tf.initialize_all_variables().run()
model.train(sess,save_path,y_val)
print "final best f1 is: %f" % (model.max_f1)
end_time = time.time()
print "time used %f(hour)" % ((end_time - start_time) / 3600)
test.py
import time
import helper
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf
from BILSTM_CRF import BILSTM_CRF
# python test.py model test.in test.out -c char_emb -g 2
parser = argparse.ArgumentParser()
parser.add_argument("model_path",help="the path of model file")
parser.add_argument("test_path",help="the path of test file")
parser.add_argument("output_path",help="the path of output file")
parser.add_argument("-c",type=int)
args = parser.parse_args()
model_path = args.model_path
test_path = args.test_path
output_path = args.output_path
gpu_config = "/cpu:0"
emb_path = args.char_emb
num_steps = 200 # it must consist with the train
start_time = time.time()
print "preparing test data"
X_test,X_test_str = helper.getTest(test_path=test_path,is_training=False)
print "loading model parameter"
saver = tf.train.Saver()
saver.restore(sess,model_path)
print "testing"
model.test(sess,output_path)
end_time = time.time()
print "time used %f(hour)" % ((end_time - start_time) / 3600)
相关预处理的数据请参考github:scofiled's github/bilstm+crf