1.网络结构

2.数据源
数据集的训练集和测试集分别包含25000个已标注过的电影评论。其中,负面评论的得分小于等于4,正面评论的得分大于等于7,满分10分。
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
import math
CLASS_DIM = 2 #情感分类的类别数
EMB_DIM = 128 #词向量的维度
HID_DIM = 512 #隐藏层的维度
STACKED_NUM = 3 #LSTM双向栈的层数
BATCH_SIZE = 128 #batch的大小
word_dict = paddle.dataset.imdb.word_dict()
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
word_dict是词和index的映射,paddle.dataset.imdb.train(word_dict)中获得的数据集格式为[([index1,index2……,indexn],a)]a是0或1
use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place)data = fluid.data(name="word", shape=[None,1], dtype="int64", lod_level=1)
创建程序,并定义数据源头
3.构建神经网络
emb = fluid.layers.embedding(input=data, size=[len(word_dict), 128], is_sparse=True)
fc1 = fluid.layers.fc(input=emb, size=HID_DIM)
lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=HID_DIM)
inputs = [fc1, lstm1]
for i in range(2, STACKED_NUM + 1):
fc = fluid.layers.fc(input=inputs, size=HID_DIM)
lstm, cell = fluid.layers.dynamic_lstm(input=fc, size=HID_DIM, is_reverse=(i % 2) == 0)
inputs = [fc, lstm]
fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
prediction = fluid.layers.fc(input=[fc_last, lstm_last], size=CLASS_DIM, act='softmax')
4.构建损失函数和优化器
label = fluid.data(name="label", shape=[None, 1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=prediction, label=label) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) sgd_optimizer.minimize(avg_cost)
5.初始化项目定义feeder
exe.run(fluid.default_startup_program()) feed_var_list_loop = [main_program.global_block().var(var_name) for var_name in ['word', 'label']] feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) test_program = fluid.default_main_program().clone(for_test=True)
6.训练数据
_end = False
for epoch_id in range(pass_num):
if _end:
break
for step_id, data in enumerate(train_reader()):
metrics = exe.run(main_program, feed=feeder.feed(data),fetch_list=[avg_cost, accuracy])
if step_id == 30:
if params_dirname is not None:
fluid.io.save_inference_model('o.model', ["words"], prediction, exe)
_end = True
break
来源:https://www.cnblogs.com/yangyang12138/p/12590710.html