使用张量流了解LSTM模型以进行情感分析

本文介绍了使用张量流了解LSTM模型以进行情感分析的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我正在尝试使用Tensorflow学习用于情感分析的LSTM模型，我经历了 LSTM模型.

以下代码(create_sentiment_featuresets.py)从 5000个肯定句和5000个否定句子生成词典.

import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import random
from collections import Counter
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def create_lexicon(pos, neg):
    lexicon = []
    with open(pos, 'r') as f:
        contents = f.readlines()
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            all_words = word_tokenize(l)
            lexicon += list(all_words)
    f.close()

    with open(neg, 'r') as f:
        contents = f.readlines()    
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            all_words = word_tokenize(l)
            lexicon += list(all_words)
    f.close()

    lexicon = [lemmatizer.lemmatize(i) for i in lexicon]
    w_counts = Counter(lexicon)
    l2 = []
    for w in w_counts:
        if 1000 > w_counts[w] > 50:
            l2.append(w)
    print("Lexicon length create_lexicon: ",len(lexicon))
    return l2

def sample_handling(sample, lexicon, classification):
    featureset = []
    print("Lexicon length Sample handling: ",len(lexicon))
    with open(sample, 'r') as f:
        contents = f.readlines()
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            current_words = word_tokenize(l.lower())
            current_words= [lemmatizer.lemmatize(i) for i in current_words]
            features = np.zeros(len(lexicon))
            for word in current_words:
                if word.lower() in lexicon:
                    index_value = lexicon.index(word.lower())
                    features[index_value] +=1
            features = list(features)
            featureset.append([features, classification])
    f.close()
    print("Feature SET------")
    print(len(featureset))
    return featureset

def create_feature_sets_and_labels(pos, neg, test_size = 0.1):
    global m_lexicon
    m_lexicon = create_lexicon(pos, neg)
    features = []
    features += sample_handling(pos, m_lexicon, [1,0])
    features += sample_handling(neg, m_lexicon, [0,1])
    random.shuffle(features)
    features = np.array(features)

    testing_size = int(test_size * len(features))

    train_x = list(features[:,0][:-testing_size])
    train_y = list(features[:,1][:-testing_size])
    test_x = list(features[:,0][-testing_size:])
    test_y = list(features[:,1][-testing_size:])
    return train_x, train_y, test_x, test_y

def get_lexicon():
    global m_lexicon
    return m_lexicon

以下代码(sentiment_analysis.py)用于使用简单神经网络模型进行情感分析，并且工作正常

from create_sentiment_featuresets import create_feature_sets_and_labels
from create_sentiment_featuresets import get_lexicon
import tensorflow as tf
import numpy as np
# extras for testing
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
#- end extras

train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt')


# pt A-------------

n_nodes_hl1 = 1500
n_nodes_hl2 = 1500
n_nodes_hl3 = 1500

n_classes = 2
batch_size = 100
hm_epochs = 10

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

hidden_1_layer = {'f_fum': n_nodes_hl1,
                'weight': tf.Variable(tf.random_normal([len(train_x[0]), n_nodes_hl1])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'f_fum': n_nodes_hl2,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'f_fum': n_nodes_hl3,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'f_fum': None,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                'bias': tf.Variable(tf.random_normal([n_classes]))}


def nueral_network_model(data):
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weight']), hidden_1_layer['bias'])
    l1 = tf.nn.relu(l1)
    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weight']), hidden_2_layer['bias'])
    l2 = tf.nn.relu(l2)
    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weight']), hidden_3_layer['bias'])
    l3 = tf.nn.relu(l3)
    output = tf.matmul(l3, output_layer['weight']) + output_layer['bias']
    return output

# pt B--------------

def train_neural_network(x):
    prediction = nueral_network_model(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits= prediction, labels= y))
    optimizer = tf.train.AdamOptimizer(learning_rate= 0.001).minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0
            i = 0
            while i < len(train_x):
                start = i
                end = i+ batch_size
                batch_x = np.array(train_x[start: end])
                batch_y = np.array(train_y[start: end])
                _, c = sess.run([optimizer, cost], feed_dict= {x: batch_x, y: batch_y})
                epoch_loss += c
                i+= batch_size
            print('Epoch', epoch+ 1, 'completed out of ', hm_epochs, 'loss:', epoch_loss)

        correct= tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy:', accuracy.eval({x:test_x, y:test_y}))

        # testing --------------
        m_lexicon= get_lexicon()
        print('Lexicon length: ',len(m_lexicon))        
        input_data= "David likes to go out with Kary"       
        current_words= word_tokenize(input_data.lower())
        current_words = [lemmatizer.lemmatize(i) for i in current_words]
        features = np.zeros(len(m_lexicon))
        for word in current_words:
            if word.lower() in m_lexicon:
                index_value = m_lexicon.index(word.lower())
                features[index_value] +=1

        features = np.array(list(features)).reshape(1,-1)
        print('features length: ',len(features))
        result = sess.run(tf.argmax(prediction.eval(feed_dict={x:features}), 1))
        print(prediction.eval(feed_dict={x:features}))
        if result[0] == 0:
            print('Positive: ', input_data)
        elif result[0] == 1:
            print('Negative: ', input_data)

train_neural_network(x)

我正在尝试为LSTM模型修改以上(sentiment_analysis.py)在阅读在TensorFlow和Python中使用LSTM的RNN单元格示例，用于 mnist图像数据集上的LSTM:

通过许多运行轨迹，我能够获得以下运行代码(sentiment_demo_lstm.py):

import tensorflow as tf
from tensorflow.contrib import rnn
from create_sentiment_featuresets import create_feature_sets_and_labels
from create_sentiment_featuresets import get_lexicon

import numpy as np

# extras for testing
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
#- end extras

train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt')

n_steps= 100
input_vec_size= len(train_x[0])
hm_epochs = 8
n_classes = 2
batch_size = 128
n_hidden = 128

x = tf.placeholder('float', [None, input_vec_size, 1])
y = tf.placeholder('float')

def recurrent_neural_network(x):
    layer = {'weights': tf.Variable(tf.random_normal([n_hidden, n_classes])),   # hidden_layer, n_classes
            'biases': tf.Variable(tf.random_normal([n_classes]))}

    h_layer = {'weights': tf.Variable(tf.random_normal([1, n_hidden])), # hidden_layer, n_classes
            'biases': tf.Variable(tf.random_normal([n_hidden], mean = 1.0))}

    x = tf.transpose(x, [1,0,2])
    x = tf.reshape(x, [-1, 1])
    x= tf.nn.relu(tf.matmul(x, h_layer['weights']) + h_layer['biases'])

    x = tf.split(x, input_vec_size, 0)

    lstm_cell = rnn.BasicLSTMCell(n_hidden, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype= tf.float32)
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    return output

def train_neural_network(x):
    prediction = recurrent_neural_network(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits= prediction, labels= y))
    optimizer = tf.train.AdamOptimizer(learning_rate= 0.001).minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(hm_epochs):
            epoch_loss = 0
            i = 0
            while (i+ batch_size) < len(train_x):
                start = i
                end = i+ batch_size
                batch_x = np.array(train_x[start: end])
                batch_y = np.array(train_y[start: end])
                batch_x = batch_x.reshape(batch_size ,input_vec_size, 1)
                _, c = sess.run([optimizer, cost], feed_dict= {x: batch_x, y: batch_y})
                epoch_loss += c
                i+= batch_size
            print('--------Epoch', epoch+ 1, 'completed out of ', hm_epochs, 'loss:', epoch_loss)

        correct= tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

        print('Accuracy:', accuracy.eval({x:np.array(test_x).reshape(-1, input_vec_size, 1), y:test_y}))

        # testing --------------
        m_lexicon= get_lexicon()
        print('Lexicon length: ',len(m_lexicon))
        input_data= "Mary does not like pizza"  #"he seems to to be healthy today"  #"David likes to go out with Kary"

        current_words= word_tokenize(input_data.lower())
        current_words = [lemmatizer.lemmatize(i) for i in current_words]
        features = np.zeros(len(m_lexicon))
        for word in current_words:
            if word.lower() in m_lexicon:
                index_value = m_lexicon.index(word.lower())
                features[index_value] +=1
        features = np.array(list(features)).reshape(-1, input_vec_size, 1)
        print('features length: ',len(features))

        result = sess.run(tf.argmax(prediction.eval(feed_dict={x:features}), 1))
        print('RESULT: ', result)
        print(prediction.eval(feed_dict={x:features}))
        if result[0] == 0:
            print('Positive: ', input_data)
        elif result[0] == 1:
            print('Negative: ', input_data)

train_neural_network(x)

的输出

print(train_x[0])
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

print(train_y[0])
[0, 1]

len(train_x)= 9596，len(train_x[0]) = 423表示train_x是9596x423的列表?

虽然我现在有一个正在运行的代码，但我仍然有很多疑问.

在 sentiment_demo_lstm 中，我无法理解以下部分

x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, 1])
x = tf.split(x, input_vec_size, 0)

我打印了以下形状:

x = tf.placeholder('float', [None, input_vec_size, 1]) ==> TensorShape([Dimension(None), Dimension(423), Dimension(1)]))
x = tf.transpose(x, [1,0,2]) ==> TensorShape([Dimension(423), Dimension(None), Dimension(1)]))
x = tf.reshape(x, [-1, 1]) ==> TensorShape([Dimension(None), Dimension(1)]))
x = tf.split(x, input_vec_size, 0) ==> ?

在这里，我将隐藏层数设为128，是否需要与输入数相同，即len(train_x)= 9596

1中的值

x = tf.placeholder('float', [None, input_vec_size, 1])

和

x = tf.reshape(x, [-1, 1])

是因为train_x[0]是428x 1 吗?

以下是为了匹配占位符

batch_x = np.array(train_x[start: end]) ==> (128, 423)
batch_x = batch_x.reshape(batch_size ,input_vec_size, 1) ==> (128, 423, 1)

x = tf.placeholder('float', [None, input_vec_size, 1]) 尺寸，对吗?

如果我修改了代码:

while (i+ batch_size) < len(train_x):

为

while i < len(train_x):

我收到以下错误:

Traceback (most recent call last):
  File "sentiment_demo_lstm.py", line 131, in <module>
    train_neural_network(x)
  File "sentiment_demo_lstm.py", line 86, in train_neural_network
    batch_x = batch_x.reshape(batch_size ,input_vec_size, 1)
ValueError: cannot reshape array of size 52452 into shape (128,423,1)

=>训练时我不能包括最后的124条记录/功能集吗?

解决方案

此问题已加载.让我尝试用简单的英语将其隐藏起来，以隐藏所有复杂的内部细节:

具有3个步骤的简单Unrolled LSTM模型如下所示.每个LSTM单元采用一个输入向量和上一个LSTM单元的隐藏输出向量，并为下一个LSTM单元生成一个输出向量和隐藏输出.

以下是同一型号的简明表示.

LSTM模型是从序列到序列的模型，即，当一个序列必须用另一个序列标记时，例如在句子中每个单词的POS标记或NER标记时，它们都用于解决问题.

您似乎正在将其用于分类问题.使用LSTM模型进行分类的方法有两种

1)获取所有状态的输出(在我们的示例中为O1，O2和O3)，并应用softmax层，其softmax层的输出大小等于类数(在您的情况下为2)

2)获取最后一个状态(O3)的输出，并对其施加一个softmax层. (这就是您在编码中所做的.输出[-1]返回输出中的最后一行)

因此，我们根据softmax输出的错误向后传播(通过时间反向传播-BTT).

来到使用Tensorflow的实现，让我们看看LSTM模型的输入和输出是什么.

每个LSTM都有一个输入，但是我们有3个这样的LSTM单元，因此输入(X占位符)应为大小(inputsize *时间步长).但是我们不为单个输入和BTT计算错误，而是对一批输入-输出组合进行错误处理.因此，LSTM的输入为(批大小*输入大小*时间步长).

用隐藏状态的大小定义LSTM单元. LSTM单元的输出和隐藏输出矢量的大小将与隐藏状态的大小相同(请检查LSTM内部计算以了解原因！).然后，我们使用这些LSTM单元的列表定义LSTM模型，其中列表的大小将等于模型展开的数量.因此，我们定义了要完成的展开次数以及每次展开期间的输入大小.

我跳过了很多事情，例如如何处理可变长度序列，序列到序列的错误计算，LSTM如何计算输出和隐藏输出等.

在您的实现中，您将在输入每个LSTM单元之前应用relu层.我不明白您为什么这样做，但是我想您正在这样做是为了将您的输入大小映射到LSTM输入大小.

提出您的问题:

x是大小为[None，input_vec_size，1]的占位符(张量/矩阵/ndarray).也就是说，它可以采用可变的行数，但是每行具有input_vec_size列，并且每个元素都是矢量，其大小为1.通常，在行中将占位符定义为"None"，以便我们可以更改输入的批处理大小.

让我们说input_vec_size = 3

您正在传递大小为[128 * 3 * 1]的ndarray

x = tf.transpose(x，[1,0,2])-> [3 * 128 * 1]

x = tf.reshape(x，[-1，1])-> [384 * 1]

h_layer ['weights']-> [1，128]

x = tf.nn.relu(tf.matmul(x，h_layer ['weights'])+ h_layer ['biases'])-> [384 * 128]

没有输入大小，隐藏大小是不同的. LSTM对输入和上一个隐藏输出以及给定的输出和下一个隐藏输出执行一组操作，这两个操作的大小均为隐藏大小.
x = tf.placeholder('float'，[None，input_vec_size，1])

它定义一个张量或ndarray或可变数量的行，每行具有input_vec_size列an，并且每个值都是单个值向量.

x = tf.reshape(x，[-1，1])->将输入x整形为大小固定为1列和任意行数的矩阵.

batch_x = batch_x.reshape(batch_size，input_vec_size，1)

如果patch_x中的值数量== batch_size * input_vec_size * 1，

batch_x.reshape将失败.对于最后一批可能是这种情况，因为len(train_x)可能不是batch_size的倍数，导致最后一批未完全填充.

您可以通过使用

来避免此问题

batch_x = batch_x.reshape(-1 ,input_vec_size, 1)

但是我仍然不确定为什么在输入层前面使用Relu.

您正在对最后一个单元格的输出进行逻辑回归，这很好.

您可以看一下我的玩具示例，它是一个使用双向LSTM进行分类的分类器，用于对序列是增加还是减少或混合进行分类.

在Tensorflow中使用LSTM的玩具sequence_classifier

I am trying to learn LSTM model for sentiment analysis using Tensorflow, I have gone through the LSTM model.

Following code (create_sentiment_featuresets.py) generates the lexicon from 5000 positive sentences and 5000 negative sentences.

import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import random
from collections import Counter
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def create_lexicon(pos, neg):
    lexicon = []
    with open(pos, 'r') as f:
        contents = f.readlines()
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            all_words = word_tokenize(l)
            lexicon += list(all_words)
    f.close()

    with open(neg, 'r') as f:
        contents = f.readlines()    
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            all_words = word_tokenize(l)
            lexicon += list(all_words)
    f.close()

    lexicon = [lemmatizer.lemmatize(i) for i in lexicon]
    w_counts = Counter(lexicon)
    l2 = []
    for w in w_counts:
        if 1000 > w_counts[w] > 50:
            l2.append(w)
    print("Lexicon length create_lexicon: ",len(lexicon))
    return l2

def sample_handling(sample, lexicon, classification):
    featureset = []
    print("Lexicon length Sample handling: ",len(lexicon))
    with open(sample, 'r') as f:
        contents = f.readlines()
        for l in contents[:len(contents)]:
            l= l.decode('utf-8')
            current_words = word_tokenize(l.lower())
            current_words= [lemmatizer.lemmatize(i) for i in current_words]
            features = np.zeros(len(lexicon))
            for word in current_words:
                if word.lower() in lexicon:
                    index_value = lexicon.index(word.lower())
                    features[index_value] +=1
            features = list(features)
            featureset.append([features, classification])
    f.close()
    print("Feature SET------")
    print(len(featureset))
    return featureset

def create_feature_sets_and_labels(pos, neg, test_size = 0.1):
    global m_lexicon
    m_lexicon = create_lexicon(pos, neg)
    features = []
    features += sample_handling(pos, m_lexicon, [1,0])
    features += sample_handling(neg, m_lexicon, [0,1])
    random.shuffle(features)
    features = np.array(features)

    testing_size = int(test_size * len(features))

    train_x = list(features[:,0][:-testing_size])
    train_y = list(features[:,1][:-testing_size])
    test_x = list(features[:,0][-testing_size:])
    test_y = list(features[:,1][-testing_size:])
    return train_x, train_y, test_x, test_y

def get_lexicon():
    global m_lexicon
    return m_lexicon

The following code (sentiment_analysis.py) is for sentiment analysis using simple neural network model and is working fine

from create_sentiment_featuresets import create_feature_sets_and_labels
from create_sentiment_featuresets import get_lexicon
import tensorflow as tf
import numpy as np
# extras for testing
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
#- end extras

train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt')


# pt A-------------

n_nodes_hl1 = 1500
n_nodes_hl2 = 1500
n_nodes_hl3 = 1500

n_classes = 2
batch_size = 100
hm_epochs = 10

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

hidden_1_layer = {'f_fum': n_nodes_hl1,
                'weight': tf.Variable(tf.random_normal([len(train_x[0]), n_nodes_hl1])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'f_fum': n_nodes_hl2,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'f_fum': n_nodes_hl3,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                'bias': tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'f_fum': None,
                'weight': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                'bias': tf.Variable(tf.random_normal([n_classes]))}


def nueral_network_model(data):
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weight']), hidden_1_layer['bias'])
    l1 = tf.nn.relu(l1)
    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weight']), hidden_2_layer['bias'])
    l2 = tf.nn.relu(l2)
    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weight']), hidden_3_layer['bias'])
    l3 = tf.nn.relu(l3)
    output = tf.matmul(l3, output_layer['weight']) + output_layer['bias']
    return output

# pt B--------------

def train_neural_network(x):
    prediction = nueral_network_model(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits= prediction, labels= y))
    optimizer = tf.train.AdamOptimizer(learning_rate= 0.001).minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0
            i = 0
            while i < len(train_x):
                start = i
                end = i+ batch_size
                batch_x = np.array(train_x[start: end])
                batch_y = np.array(train_y[start: end])
                _, c = sess.run([optimizer, cost], feed_dict= {x: batch_x, y: batch_y})
                epoch_loss += c
                i+= batch_size
            print('Epoch', epoch+ 1, 'completed out of ', hm_epochs, 'loss:', epoch_loss)

        correct= tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy:', accuracy.eval({x:test_x, y:test_y}))

        # testing --------------
        m_lexicon= get_lexicon()
        print('Lexicon length: ',len(m_lexicon))        
        input_data= "David likes to go out with Kary"       
        current_words= word_tokenize(input_data.lower())
        current_words = [lemmatizer.lemmatize(i) for i in current_words]
        features = np.zeros(len(m_lexicon))
        for word in current_words:
            if word.lower() in m_lexicon:
                index_value = m_lexicon.index(word.lower())
                features[index_value] +=1

        features = np.array(list(features)).reshape(1,-1)
        print('features length: ',len(features))
        result = sess.run(tf.argmax(prediction.eval(feed_dict={x:features}), 1))
        print(prediction.eval(feed_dict={x:features}))
        if result[0] == 0:
            print('Positive: ', input_data)
        elif result[0] == 1:
            print('Negative: ', input_data)

train_neural_network(x)

I am trying to modify the above (sentiment_analysis.py) for LSTM modelafter reading the RNN w/ LSTM cell example in TensorFlow and Python which is for LSTM on mnist image dataset:

Some how through many hit and run trails, I was able to get the below running code (sentiment_demo_lstm.py) :

import tensorflow as tf
from tensorflow.contrib import rnn
from create_sentiment_featuresets import create_feature_sets_and_labels
from create_sentiment_featuresets import get_lexicon

import numpy as np

# extras for testing
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
#- end extras

train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt')

n_steps= 100
input_vec_size= len(train_x[0])
hm_epochs = 8
n_classes = 2
batch_size = 128
n_hidden = 128

x = tf.placeholder('float', [None, input_vec_size, 1])
y = tf.placeholder('float')

def recurrent_neural_network(x):
    layer = {'weights': tf.Variable(tf.random_normal([n_hidden, n_classes])),   # hidden_layer, n_classes
            'biases': tf.Variable(tf.random_normal([n_classes]))}

    h_layer = {'weights': tf.Variable(tf.random_normal([1, n_hidden])), # hidden_layer, n_classes
            'biases': tf.Variable(tf.random_normal([n_hidden], mean = 1.0))}

    x = tf.transpose(x, [1,0,2])
    x = tf.reshape(x, [-1, 1])
    x= tf.nn.relu(tf.matmul(x, h_layer['weights']) + h_layer['biases'])

    x = tf.split(x, input_vec_size, 0)

    lstm_cell = rnn.BasicLSTMCell(n_hidden, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype= tf.float32)
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    return output

def train_neural_network(x):
    prediction = recurrent_neural_network(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits= prediction, labels= y))
    optimizer = tf.train.AdamOptimizer(learning_rate= 0.001).minimize(cost)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(hm_epochs):
            epoch_loss = 0
            i = 0
            while (i+ batch_size) < len(train_x):
                start = i
                end = i+ batch_size
                batch_x = np.array(train_x[start: end])
                batch_y = np.array(train_y[start: end])
                batch_x = batch_x.reshape(batch_size ,input_vec_size, 1)
                _, c = sess.run([optimizer, cost], feed_dict= {x: batch_x, y: batch_y})
                epoch_loss += c
                i+= batch_size
            print('--------Epoch', epoch+ 1, 'completed out of ', hm_epochs, 'loss:', epoch_loss)

        correct= tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

        print('Accuracy:', accuracy.eval({x:np.array(test_x).reshape(-1, input_vec_size, 1), y:test_y}))

        # testing --------------
        m_lexicon= get_lexicon()
        print('Lexicon length: ',len(m_lexicon))
        input_data= "Mary does not like pizza"  #"he seems to to be healthy today"  #"David likes to go out with Kary"

        current_words= word_tokenize(input_data.lower())
        current_words = [lemmatizer.lemmatize(i) for i in current_words]
        features = np.zeros(len(m_lexicon))
        for word in current_words:
            if word.lower() in m_lexicon:
                index_value = m_lexicon.index(word.lower())
                features[index_value] +=1
        features = np.array(list(features)).reshape(-1, input_vec_size, 1)
        print('features length: ',len(features))

        result = sess.run(tf.argmax(prediction.eval(feed_dict={x:features}), 1))
        print('RESULT: ', result)
        print(prediction.eval(feed_dict={x:features}))
        if result[0] == 0:
            print('Positive: ', input_data)
        elif result[0] == 1:
            print('Negative: ', input_data)

train_neural_network(x)

Output of

print(train_x[0])
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

print(train_y[0])
[0, 1]

len(train_x)= 9596, len(train_x[0]) = 423 meaning train_x is a list of 9596x423 ?

Tough I have a running code now, I still have lots of doubts.

In sentiment_demo_lstm, I am not able to understand the following part

x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, 1])
x = tf.split(x, input_vec_size, 0)

I have print the following shapes:

x = tf.placeholder('float', [None, input_vec_size, 1]) ==> TensorShape([Dimension(None), Dimension(423), Dimension(1)]))
x = tf.transpose(x, [1,0,2]) ==> TensorShape([Dimension(423), Dimension(None), Dimension(1)]))
x = tf.reshape(x, [-1, 1]) ==> TensorShape([Dimension(None), Dimension(1)]))
x = tf.split(x, input_vec_size, 0) ==> ?

Here I took the number of hidden layers as 128, does it need to be same as the number of inputs i.e. len(train_x)= 9596

The value 1 in

x = tf.placeholder('float', [None, input_vec_size, 1])

and

x = tf.reshape(x, [-1, 1])

is because train_x[0] is 428x1 ?

The following is in order to match the placeholder

batch_x = np.array(train_x[start: end]) ==> (128, 423)
batch_x = batch_x.reshape(batch_size ,input_vec_size, 1) ==> (128, 423, 1)

x = tf.placeholder('float', [None, input_vec_size, 1]) dimensions, right?

If I modified the code:

while (i+ batch_size) < len(train_x):

while i < len(train_x):

I get the following error:

Traceback (most recent call last):
  File "sentiment_demo_lstm.py", line 131, in <module>
    train_neural_network(x)
  File "sentiment_demo_lstm.py", line 86, in train_neural_network
    batch_x = batch_x.reshape(batch_size ,input_vec_size, 1)
ValueError: cannot reshape array of size 52452 into shape (128,423,1)

=> I can't include the last 124 records/feature-sets while training?

解决方案

This is loaded question. Let me try to put it in simple English hiding all the complicated inner details:

A simple Unrolled LSTM model with 3 steps is shown below. Each LSTM cell takes an input vector and the hidden output vector of the previous LSTM cell and produces an output vector and the hidden output for the next LSTM cell.

A concise representation of the same model is shown below.

LSTM models are sequence to sequence models, i.e, they are used for problems when a sequence has to be labeled with an another sequence, like POS tagging or NER tagging of each word in a sentence.

You seem to be using it for classification problem. There are two possible ways to use LSTM model for classification

1) Take the output of all the states (O1, O2 and O3 in our example) and apply a softmax layer with softmax layer output size being equal to number of classes (2 in your case)

2) Take the output of the last state (O3) and apply a softmax layer to it. (This is what you are doing in your cod. outputs[-1] return the last row in the outputs)

So we back propagate (Backpropagation Through Time - BTT) on the error of the softmax output.

Coming to the implementation using Tensorflow, lets see what is the input and output to the LSTM model.

Each LSTM takes an input, but we have 3 such LSTM cells, So the input (X placeholder) should be of size (inputsize * time steps). But we don't calculate error for single input and BTT for it, but instead we do it on a batch of input - output combinations. So the Input of LSTM will be (batchsize * inputsize * time steps).

A LSTM cells is defined with the size of hidden state. The size of output and the hidden output vector of the LSTM cell will be same as the size of the hidden states (Check LSTM internal calcuations for why!). We then define an LSTM Model using a list of these LSTM cells where the size of the list will be equal to the number of unrolling of the model. So we define the number of unrolling to be done and the size of input during each unrolling.

I have skipped lots of things like how to handle variable length sequence, sequence to sequence error calcuations, How LSTM calcuates output and hidden output etc.

Coming to your implementation, you are applying a relu layer before the input of each LSTM cell. I dont understand why you are doing that but I guess you are doing it to map your input size to that of the LSTM input size.

Coming to your questions:

x is the placeholder (tensor/matrix/ndarray) of size [None, input_vec_size, 1]. i.e it can take variable number of rows but each row with input_vec_size columns and each element being a vector is size 1. Normally placeholders are defined with "None" in the rows so that we can vary the batch size of the input.

lets say input_vec_size = 3

You are passing a ndarray of size [128 * 3 * 1]

x = tf.transpose(x, [1,0,2]) --> [3*128*1]

x = tf.reshape(x, [-1, 1]) --> [384*1]

h_layer['weights'] --> [1, 128]

x= tf.nn.relu(tf.matmul(x, h_layer['weights']) + h_layer['biases']) --> [384 * 128]

No input size are hidden size are different. LSTM does a set of operations on the input and previous hidden output and given an output and next hidden output both of which are of size hidden size.
x = tf.placeholder('float', [None, input_vec_size, 1])

It defines a tensor or ndarray or variable number of rows, each rows has input_vec_size columns an and each value is a single value vector.

x = tf.reshape(x, [-1, 1]) --> reshapes the input x into a matrix of size fixed to 1 column and any number of rows.

batch_x = batch_x.reshape(batch_size ,input_vec_size, 1)

batch_x.reshape will fail if number of values in batch_x != batch_size*input_vec_size*1. This might be the case for last batch because len(train_x) might not be a multiple of batch_size resulting in the non fully filled last batch.

You can avoid this problem by using

batch_x = batch_x.reshape(-1 ,input_vec_size, 1)

But I am still not sure why you are using Relu in front of the input layer.

You are applying logistic regression at the output of the last cell which is fine.

You can look my toy example which is a classifier using bidirectional LSTM for classifying if a sequence is increasing or decreasing or mixed.

Toy sequence_classifier using LSTM in Tensorflow

这篇关于使用张量流了解LSTM模型以进行情感分析的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！