创新互联www.cdcxhl.cn八线动态BGP香港云服务器提供商,新人活动买多久送多久,划算不套路!
成都创新互联坚持“要么做到,要么别承诺”的工作理念,服务领域包括:网站设计制作、网站设计、企业官网、英文网站、手机端网站、网站推广等服务,满足客户于互联网时代的宝坻网站设计、移动媒体设计的需求,帮助企业找到有效的互联网解决方案。努力成为您成熟可靠的网络建设合作伙伴!这篇文章主要介绍使用keras如何实现BiLSTM+CNN+CRF文字标记NER,文中示例代码介绍的非常详细,具有一定的参考价值,感兴趣的小伙伴们一定要看完!
我就废话不多说了,大家还是直接看代码吧~
import keras from sklearn.model_selection import train_test_split import tensorflow as tf from keras.callbacks import ModelCheckpoint,Callback # import keras.backend as K from keras.layers import * from keras.models import Model from keras.optimizers import SGD, RMSprop, Adagrad,Adam from keras.models import * from keras.metrics import * from keras import backend as K from keras.regularizers import * from keras.metrics import categorical_accuracy # from keras.regularizers import activity_l1 #通过L1正则项,使得输出更加稀疏 from keras_contrib.layers import CRF from visual_callbacks import AccLossPlotter plotter = AccLossPlotter(graphs=['acc', 'loss'], save_graph=True, save_graph_path=sys.path[0]) # from crf import CRFLayer,create_custom_objects class LossHistory(Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) # def on_epoch_end(self, epoch, logs=None): word_input = Input(shape=(max_len,), dtype='int32', name='word_input') word_emb = Embedding(len(char_value_dict)+2, output_dim=64, input_length=max_len, dropout=0.2, name='word_emb')(word_input) bilstm = Bidirectional(LSTM(32, dropout_W=0.1, dropout_U=0.1, return_sequences=True))(word_emb) bilstm_d = Dropout(0.1)(bilstm) half_window_size = 2 paddinglayer = ZeroPadding1D(padding=half_window_size)(word_emb) conv = Conv1D(nb_filter=50, filter_length=(2 * half_window_size + 1), border_mode='valid')(paddinglayer) conv_d = Dropout(0.1)(conv) dense_conv = TimeDistributed(Dense(50))(conv_d) rnn_cnn_merge = merge([bilstm_d, dense_conv], mode='concat', concat_axis=2) dense = TimeDistributed(Dense(class_label_count))(rnn_cnn_merge) crf = CRF(class_label_count, sparse_target=False) crf_output = crf(dense) model = Model(input=[word_input], output=[crf_output]) model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) model.summary() # serialize model to JSON model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) #编译模型 # model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['acc',]) # 用于保存验证集误差最小的参数,当验证集误差减少时,立马保存下来 checkpointer = ModelCheckpoint(filepath="bilstm_1102_k205_tf130.w", verbose=0, save_best_only=True, save_weights_only=True) #save_weights_only=True history = LossHistory() history = model.fit(x_train, y_train, batch_size=32, epochs=500,#validation_data = ([x_test, seq_lens_test], y_test), callbacks=[checkpointer, history, plotter], verbose=1, validation_split=0.1, )