问题
I am trying an experiment on last state vs final output of LSTM cell, Here is minimal code :
When I am using last state of lstm outputs :
with tf.variable_scope('forward'):
fr_cell = tf.contrib.rnn.LSTMCell(num_units = 100)
dropout_fr = tf.contrib.rnn.DropoutWrapper(fr_cell, output_keep_prob = 1. - 0.3)
with tf.variable_scope('backward'):
bw_cell = tf.contrib.rnn.LSTMCell(num_units = 100)
dropout_bw = tf.contrib.rnn.DropoutWrapper(bw_cell, output_keep_prob = 1. - 0.3)
with tf.variable_scope('encoder') as scope:
model,last_state = tf.nn.bidirectional_dynamic_rnn(dropout_fr,
dropout_bw,
inputs=embedding_lookup,
sequence_length=sequence_leng,
dtype=tf.float32)
logits = tf.concat([last_state[0].c,last_state[1].c],axis=-1)
Training with one dense layer after logits the result is like this :
{'test_accuracy': {'subset_accuracy': 0.6752631578947369, 'hamming_score': 0.7172293233082706, 'hamming_loss': 0.022033492822966506, 'micro_ac': 0.7677442088530063, 'weight_ac': 0.7004068523462434, 'epoch': 0}}
{'test_accuracy': {'subset_accuracy': 0.7657894736842106, 'hamming_score': 0.8103120300751879, 'hamming_loss': 0.014090909090909093, 'micro_ac': 0.8539827861515641, 'weight_ac': 0.8162705485230927, 'epoch': 1}}
{'test_accuracy': {'subset_accuracy': 0.79, 'hamming_score': 0.8387255639097745, 'hamming_loss': 0.0138755980861244, 'micro_ac': 0.8634036690523823, 'weight_ac': 0.8490908726490719, 'epoch': 2}}
{'test_accuracy': {'subset_accuracy': 0.7968421052631578, 'hamming_score': 0.8451278195488723, 'hamming_loss': 0.013157894736842105, 'micro_ac': 0.8687780825633675, 'weight_ac': 0.8437635113410252, 'epoch': 3}}
{'test_accuracy': {'subset_accuracy': 0.7952631578947369, 'hamming_score': 0.8434348370927318, 'hamming_loss': 0.012990430622009568, 'micro_ac': 0.8709125221446699, 'weight_ac': 0.851000672155002, 'epoch': 4}}
{'test_accuracy': {'subset_accuracy': 0.8005263157894736, 'hamming_score': 0.8489172932330826, 'hamming_loss': 0.012775119617224879, 'micro_ac': 0.8725711335436651, 'weight_ac': 0.8534756826061506, 'epoch': 5}}
{'test_accuracy': {'subset_accuracy': 0.8131578947368421, 'hamming_score': 0.856890977443609, 'hamming_loss': 0.011698564593301436, 'micro_ac': 0.8823257019230701, 'weight_ac': 0.8584582438108331, 'epoch': 6}}
{'test_accuracy': {'subset_accuracy': 0.8110526315789472, 'hamming_score': 0.8581015037593985, 'hamming_loss': 0.012200956937799042, 'micro_ac': 0.878827146709841, 'weight_ac': 0.856135431687035, 'epoch': 7}}
{'test_accuracy': {'subset_accuracy': 0.8105263157894737, 'hamming_score': 0.8602330827067668, 'hamming_loss': 0.011937799043062199, 'micro_ac': 0.8823584366546263, 'weight_ac': 0.8650550484323777, 'epoch': 8}}
{'test_accuracy': {'subset_accuracy': 0.8157894736842106, 'hamming_score': 0.8606629072681704, 'hamming_loss': 0.011842105263157895, 'micro_ac': 0.8824096562101388, 'weight_ac': 0.8627400503365257, 'epoch': 9}}
{'test_accuracy': {'subset_accuracy': 0.8094736842105262, 'hamming_score': 0.8556365914786968, 'hamming_loss': 0.012200956937799042, 'micro_ac': 0.8785861959976535, 'weight_ac': 0.8570582905550095, 'epoch': 10}}
{'test_accuracy': {'subset_accuracy': 0.7894736842105263, 'hamming_score': 0.8325488721804511, 'hamming_loss': 0.013133971291866029, 'micro_ac': 0.8670913555173759, 'weight_ac': 0.846170159719809, 'epoch': 11}}
{'test_accuracy': {'subset_accuracy': 0.7999999999999999, 'hamming_score': 0.8482067669172932, 'hamming_loss': 0.012679425837320573, 'micro_ac': 0.8735868981209328, 'weight_ac': 0.850973895911516, 'epoch': 12}}
{'test_accuracy': {'subset_accuracy': 0.7989473684210526, 'hamming_score': 0.8445751879699249, 'hamming_loss': 0.01244019138755981, 'micro_ac': 0.8746534727319052, 'weight_ac': 0.8538169978380964, 'epoch': 13}}
{'test_accuracy': {'subset_accuracy': 0.7778947368421052, 'hamming_score': 0.8214172932330827, 'hamming_loss': 0.013468899521531104, 'micro_ac': 0.8625357597942105, 'weight_ac': 0.8408924397431621, 'epoch': 14}}
And when I am using last output LSTM instead of Lstm state like this :
with tf.variable_scope('forward'):
fr_cell = tf.contrib.rnn.LSTMCell(num_units = rnn_units)
dropout_fr = tf.contrib.rnn.DropoutWrapper(fr_cell, output_keep_prob = 1. - 0.3)
with tf.variable_scope('backward'):
bw_cell = tf.contrib.rnn.LSTMCell(num_units = rnn_units)
dropout_bw = tf.contrib.rnn.DropoutWrapper(bw_cell, output_keep_prob = 1. - 0.3)
with tf.variable_scope('encoder') as scope:
model,last_state = tf.nn.bidirectional_dynamic_rnn(dropout_fr,
dropout_bw,
inputs=embedding_lookup,
sequence_length=sequence_leng,
dtype=tf.float32)
# use lstm final output as logits
logits = tf.transpose(tf.concat(model, 2), [1, 0, 2])[-1]
Output:
{'test_accuracy': {'subset_accuracy': 0.0, 'hamming_score': 0.0, 'hamming_loss': 0.05322966507177033, 'micro_ac': 0.0, 'weight_ac': 0.0, 'epoch': 0}}
{'test_accuracy': {'subset_accuracy': 0.002631578947368421, 'hamming_score': 0.0028947368421052633, 'hamming_loss': 0.053181818181818184, 'micro_ac': 0.0051884525480888595, 'weight_ac': 0.0039014692243334783, 'epoch': 1}}
{'test_accuracy': {'subset_accuracy': 0.0015789473684210526, 'hamming_score': 0.0015789473684210526, 'hamming_loss': 0.053133971291866026, 'micro_ac': 0.0037344467363098015, 'weight_ac': 0.003035036916615864, 'epoch': 2}}
{'test_accuracy': {'subset_accuracy': 0.0005263157894736842, 'hamming_score': 0.0008771929824561404, 'hamming_loss': 0.053277511961722486, 'micro_ac': 0.0027782544390786, 'weight_ac': 0.002276383526383526, 'epoch': 3}}
{'test_accuracy': {'subset_accuracy': 0.002631578947368421, 'hamming_score': 0.0027631578947368424, 'hamming_loss': 0.05339712918660287, 'micro_ac': 0.006355890371957374, 'weight_ac': 0.005499248315727301, 'epoch': 4}}
{'test_accuracy': {'subset_accuracy': 0.0010526315789473684, 'hamming_score': 0.0027192982456140345, 'hamming_loss': 0.05342105263157895, 'micro_ac': 0.008974728742169633, 'weight_ac': 0.006632679296201175, 'epoch': 5}}
{'test_accuracy': {'subset_accuracy': 0.002105263157894737, 'hamming_score': 0.0029824561403508773, 'hamming_loss': 0.053277511961722486, 'micro_ac': 0.008082528107903096, 'weight_ac': 0.006468760732995094, 'epoch': 6}}
{'test_accuracy': {'subset_accuracy': 0.003157894736842105, 'hamming_score': 0.0038596491228070177, 'hamming_loss': 0.05320574162679426, 'micro_ac': 0.008972047870946777, 'weight_ac': 0.007389730774544649, 'epoch': 7}}
{'test_accuracy': {'subset_accuracy': 0.004736842105263157, 'hamming_score': 0.005175438596491228, 'hamming_loss': 0.05311004784688995, 'micro_ac': 0.011757757270832751, 'weight_ac': 0.009632258011584858, 'epoch': 8}}
{'test_accuracy': {'subset_accuracy': 0.002631578947368421, 'hamming_score': 0.002807017543859649, 'hamming_loss': 0.05322966507177034, 'micro_ac': 0.006419665967549817, 'weight_ac': 0.00551974779382167, 'epoch': 9}}
{'test_accuracy': {'subset_accuracy': 0.002631578947368421, 'hamming_score': 0.0029385964912280703, 'hamming_loss': 0.053181818181818184, 'micro_ac': 0.007242034388602448, 'weight_ac': 0.006127403389008882, 'epoch': 10}}
{'test_accuracy': {'subset_accuracy': 0.002631578947368421, 'hamming_score': 0.002631578947368421, 'hamming_loss': 0.05325358851674641, 'micro_ac': 0.005549722513874306, 'weight_ac': 0.004745753985772134, 'epoch': 11}}
{'test_accuracy': {'subset_accuracy': 0.003157894736842105, 'hamming_score': 0.0038596491228070177, 'hamming_loss': 0.05318181818181817, 'micro_ac': 0.008919944767394215, 'weight_ac': 0.007479381435801285, 'epoch': 12}}
{'test_accuracy': {'subset_accuracy': 0.002105263157894737, 'hamming_score': 0.002982456140350877, 'hamming_loss': 0.05322966507177033, 'micro_ac': 0.008054409681867774, 'weight_ac': 0.0069823035665285485, 'epoch': 13}}
{'test_accuracy': {'subset_accuracy': 0.0015789473684210526, 'hamming_score': 0.001842105263157895, 'hamming_loss': 0.053277511961722486, 'micro_ac': 0.004641319180578367, 'weight_ac': 0.003951743096479938, 'epoch': 14}}
By output this is clearly visible there is a huge difference between using the last state vs last output.
Why there is a big difference like this or Am I using the wrong output or there is any mistake in my code?
Thanks in advance
来源:https://stackoverflow.com/questions/58926940/tensorflow-lstm-hidden-states-vs-final-output-in-tensorflows-bidirectional-l