赞
踩
- tensorflow=2.10.1
- keras=2.10.0
conll2003数据集下载与预处理_茫茫人海一粒沙的博客-CSDN博客
- import tensorflow as tf
- from keras.models import Model
- from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed
- import keras as keras
- from keras.callbacks import EarlyStopping, ModelCheckpoint
- from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
- import numpy as np
-
- def load_dataset():
- dataset = np.load('dataset/dataset.npz')
- train_X = dataset['train_X']
- train_y = dataset['train_y']
- valid_X = dataset['valid_X']
- valid_y = dataset['valid_y']
- test_X = dataset['test_X']
- test_y = dataset['test_y']
- return train_X, train_y, valid_X, valid_y, test_X, test_y

- max_len =64
-
- def create_model():
- word2idx = load_dict('dataset/word2idx.json')
- tag2idx = load_dict('dataset/idx2Label.json')
-
- num_words = len(word2idx) + 1
- num_tags = len(tag2idx)
-
- # Define the model
- input_layer = Input(shape=(None,))
- embedding_layer = Embedding(input_dim=num_words, output_dim=60, input_length=max_len)(input_layer)
- lstm_layer = LSTM(units=50, return_sequences=True, dropout=0.5)(embedding_layer)
- output_layer = TimeDistributed(Dense(num_tags, activation="softmax"))(lstm_layer)
-
- model = Model(input_layer, output_layer)
-
- return model

- def train( model, train_X, train_y, valid_X, valid_y):
-
- # 定义保存模型的路径和文件名
- model_path = './dataset/ner_model.h5'
-
- # 定义早停回调函数
- early_stop = EarlyStopping(monitor='val_accuracy', patience=3, mode='max', verbose=1)
- # 定义ModelCheckpoint回调函数
- checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
- # Compile and train the model
- model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
-
-
- print(np.array(train_X).shape)
- print(np.array(train_y).shape)
-
- print(np.array(valid_X).shape)
- print(np.array(valid_y).shape)
-
- model.fit(train_X, train_y, batch_size=32, epochs=20, validation_data=(valid_X, valid_y), callbacks=[early_stop, checkpoint])

- def test(test_X, test_y ):
- model = keras.models.load_model('./dataset/ner_model.h5')
- # 评估模型
- scores = model.evaluate(test_X, test_y, verbose=0)
- print("Test accuracy:", scores[1])
- def save_dict(dict, file_path):
- import json
- # Saving the dictionary to a file
- with open(file_path, 'w') as f:
- json.dump(dict, f)
-
-
- def load_dict(path_file):
- import json
-
- # Loading the dictionary from the file
- with open(path_file, 'r') as f:
- loaded_dict = json.load(f)
- return loaded_dict;
-
- print(loaded_dict) # Output: {'key1': 'value1', 'key2': 'value2'}

- if __name__ == '__main__':
- train_X, train_y, valid_X, valid_y, test_X, test_y =load_dataset()
- model= create_model()
- train(model, np.concatenate([train_X, valid_X]), np.concatenate([train_y, valid_y]),test_X, test_y)
- test(test_X, test_y )
- # predict()
执行结果. 一共应用了12个Epoch,训练集上的准确率是99.7%,测试集上准确率是98%。
- Epoch 11: val_accuracy did not improve from 0.99427
- 577/577 [==============================] - 28s 49ms/step - loss: 0.0043 - accuracy: 0.9987 - val_loss: 0.0269 - val_accuracy: 0.9942
- Epoch 12/20
- 576/577 [============================>.] - ETA: 0s - loss: 0.0038 - accuracy: 0.9988
- Epoch 12: val_accuracy did not improve from 0.99427
- 577/577 [==============================] - 28s 49ms/step - loss: 0.0038 - accuracy: 0.9988 - val_loss: 0.0270 - val_accuracy: 0.9942
- Epoch 13/20
- 577/577 [==============================] - ETA: 0s - loss: 0.0034 - accuracy: 0.9990
- Epoch 13: val_accuracy did not improve from 0.99427
- 577/577 [==============================] - 28s 49ms/step - loss: 0.0034 - accuracy: 0.9990 - val_loss: 0.0280 - val_accuracy: 0.9939
- Epoch 13: early stopping
- Test accuracy: 0.9942699670791626
- def predict():
-
- # Example sentences to predict
- test_sentences = [
- "John Wilson works at Apple",
- "Harry works at Citi",
- "I have a meeting with Peter Blackburn tomorrow .",
- "George Smith has writen many books",
- "BRUSSELS",
- "Peter Blackburn",
- 'EU rejects German call to boycott British lamb .',
- 'The European Commission said on Thursday it disagreed with German advice to consumers to shun British'
- ]
-
- word2idx = load_dict('dataset/word2idx.json')
- tag2idx = load_dict('dataset/idx2Label.json')
- model = keras.models.load_model('./dataset/ner_model.h5')
-
-
- # Convert test sentences to numerical sequences
- test_sequences = [[word2idx.get(word.lower(), 1) for word in sentence.split()] for sentence in test_sentences]
- print('test_sequences:',test_sequences)
- test_sequences = tf.keras.preprocessing.sequence.pad_sequences(maxlen=max_len, sequences=test_sequences, padding='post' , value=0)
-
- # Make predictions
- predictions = model.predict(test_sequences)
- # print(predictions)
- predicted_tags = tf.argmax(predictions, axis=-1)
-
- # Convert predicted tags back to labels
- predicted_labels = []
- for tags in predicted_tags:
- labels = [list(tag2idx.keys())[tag] for tag in tags]
- predicted_labels.append(labels)
-
- # Print the predicted labels
- for sentence, labels in zip(test_sentences, predicted_labels):
- print(f"Sentence: {sentence}")
- print(f"Predicted Labels: {labels}\n")

执行结果
- test_sequences: [[19297, 14241, 635, 6061, 5862], [17574, 635, 6061, 1], [23991, 4403, 5466, 26487, 14180, 22793, 7357, 10690, 13690], [986, 22945, 318, 1, 16871, 16227], [20014], [22793, 7357], [10799, 8816, 5569, 17034, 15182, 26639, 3124, 12927, 13690], [2858, 26758, 3140, 3214, 21958, 17324, 21359, 26677, 14180, 5569, 5119, 15182, 24318, 15182, 728, 3124]]
- 1/1 [==============================] - 0s 450ms/step
- Sentence: John Wilson works at Apple
- Predicted Labels: ['B-PER', 'I-PER', 'O', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: Harry works at Citi
- Predicted Labels: ['B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: I have a meeting with Peter Blackburn tomorrow .
- Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'B-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: George Smith has writen many books
- Predicted Labels: ['B-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: BRUSSELS
- Predicted Labels: ['B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: Peter Blackburn
- Predicted Labels: ['B-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: EU rejects German call to boycott British lamb .
- Predicted Labels: ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
- Sentence: The European Commission said on Thursday it disagreed with German advice to consumers to shun British
- Predicted Labels: ['O', 'B-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
-
-
- Process finished with exit code 0

- import tensorflow as tf
- from keras.models import Model
- from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed
- import keras as keras
- from keras.callbacks import EarlyStopping, ModelCheckpoint
- from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
- import numpy as np
-
- def load_dataset():
- dataset = np.load('dataset/dataset.npz')
- train_X = dataset['train_X']
- train_y = dataset['train_y']
- valid_X = dataset['valid_X']
- valid_y = dataset['valid_y']
- test_X = dataset['test_X']
- test_y = dataset['test_y']
- return train_X, train_y, valid_X, valid_y, test_X, test_y
-
- max_len =64
-
- def create_model():
- word2idx = load_dict('dataset/word2idx.json')
- tag2idx = load_dict('dataset/idx2Label.json')
-
- num_words = len(word2idx) + 1
- num_tags = len(tag2idx)
-
- # Define the model
- input_layer = Input(shape=(None,))
- embedding_layer = Embedding(input_dim=num_words, output_dim=60, input_length=max_len)(input_layer)
- lstm_layer = LSTM(units=50, return_sequences=True, dropout=0.5)(embedding_layer)
- output_layer = TimeDistributed(Dense(num_tags, activation="softmax"))(lstm_layer)
-
- model = Model(input_layer, output_layer)
-
- return model
-
-
- def train( model, train_X, train_y, valid_X, valid_y):
-
- # 定义保存模型的路径和文件名
- model_path = './dataset/ner_model.h5'
-
- # 定义早停回调函数
- early_stop = EarlyStopping(monitor='val_accuracy', patience=3, mode='max', verbose=1)
- # 定义ModelCheckpoint回调函数
- checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
- # Compile and train the model
- model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
-
-
- print(np.array(train_X).shape)
- print(np.array(train_y).shape)
-
- print(np.array(valid_X).shape)
- print(np.array(valid_y).shape)
-
- model.fit(train_X, train_y, batch_size=32, epochs=20, validation_data=(valid_X, valid_y), callbacks=[early_stop, checkpoint])
- def save_dict(dict, file_path):
- import json
- # Saving the dictionary to a file
- with open(file_path, 'w') as f:
- json.dump(dict, f)
-
-
- def load_dict(path_file):
- import json
-
- # Loading the dictionary from the file
- with open(path_file, 'r') as f:
- loaded_dict = json.load(f)
- return loaded_dict;
-
- print(loaded_dict) # Output: {'key1': 'value1', 'key2': 'value2'}
-
- def test(test_X, test_y ):
- model = keras.models.load_model('./dataset/ner_model.h5')
- # 评估模型
- scores = model.evaluate(test_X, test_y, verbose=0)
- print("Test accuracy:", scores[1])
-
-
- def predict():
-
- # Example sentences to predict
- test_sentences = [
- "John Wilson works at Apple .",
- "I have a meeting with Peter Blackburn tomorrow.",
- "BRUSSELS",
- "Peter Blackburn",
- 'EU rejects German call to boycott British lamb.',
- 'The European Commission said on Thursday it disagreed with German advice to consumers to shun British'
- ]
-
- word2idx = load_dict('dataset/word2idx.json')
- tag2idx = load_dict('dataset/idx2Label.json')
- model = keras.models.load_model('./dataset/ner_model.h5')
-
-
- # Convert test sentences to numerical sequences
- test_sequences = [[word2idx.get(word.lower(), 0) for word in sentence.split()] for sentence in test_sentences]
- print('test_sequences:',test_sequences)
- test_sequences = tf.keras.preprocessing.sequence.pad_sequences(maxlen=max_len, sequences=test_sequences, padding='post' , value=0)
-
- # Make predictions
- predictions = model.predict(test_sequences)
- # print(predictions)
- predicted_tags = tf.argmax(predictions, axis=-1)
-
- # Convert predicted tags back to labels
- predicted_labels = []
- for tags in predicted_tags:
- labels = [list(tag2idx.keys())[tag] for tag in tags if tag != 0]
- predicted_labels.append(labels)
-
- # Print the predicted labels
- for sentence, labels in zip(test_sentences, predicted_labels):
- print(f"Sentence: {sentence}")
- print(f"Predicted Labels: {labels}\n")
-
-
- if __name__ == '__main__':
- train_X, train_y, valid_X, valid_y, test_X, test_y =load_dataset()
- model= create_model()
- train(model, np.concatenate([train_X, valid_X]), np.concatenate([train_y, valid_y]),test_X, test_y)
- test(test_X, test_y )
- # predict()

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。