问题
I already have some code which trains a classifier from numpy arrays. However, my training data set is very large. It seems the recommended solution is to use TFRecords. My attempts to use TFRecords with my own data set have failed, so I have gradually reduced my code to a minimal toy.
Example:
import tensorflow as tf
def readsingleexample(serialized):
print("readsingleexample", serialized)
feature = dict()
feature['x'] = tf.FixedLenFeature([], tf.int64)
feature['label'] = tf.FixedLenFeature([], tf.int64)
parsed_example = tf.parse_single_example(serialized, features=feature)
print(parsed_example)
return parsed_example['x'], parsed_example['label']
def TestParse(filename):
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
example=tf.train.Example()
example.ParseFromString(string_record)
print(example.features)
def TestRead(filename):
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
feats, label = readsingleexample(string_record)
print(feats, label)
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def TFRecordsTest(filename):
example=tf.train.Example(features=tf.train.Features(feature={
'x': _int64_feature(7),
'label': _int64_feature(4)
}))
writer = tf.python_io.TFRecordWriter(filename)
writer.write(example.SerializeToString())
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
example=tf.train.Example()
example.ParseFromString(string_record)
print(example.features)
dataset=tf.data.TFRecordDataset(filenames=[filename])
dataset=dataset.map(readsingleexample)
dataset=dataset.repeat()
def train_input_fn():
iterator=dataset.make_one_shot_iterator()
feats_tensor, labels_tensor = iterator.get_next()
return {"x":feats_tensor}, labels_tensor
feature_columns = []
feature_columns.append(tf.feature_column.numeric_column(key='x'))
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 10, 10],
n_classes=2)
classifier.train(input_fn=train_input_fn, steps=1000)
return
This results in the following output:
feature {
key: "label"
value {
int64_list {
value: 4
}
}
}
feature {
key: "x"
value {
int64_list {
value: 7
}
}
}
readsingleexample Tensor("arg0:0", shape=(), dtype=string)
{'x': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=() dtype=int64>, 'label': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=() dtype=int64>}
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\eeark\AppData\Local\Temp\tmpcl47b2ut
Traceback (most recent call last):
File "<pyshell#2>", line 1, in <module>
tfrecords_test.TFRecordsTest(fn)
File "C:\_P4\user_feindselig\_python\tfrecords_test.py", line 60, in TFRecordsTest
classifier.train(input_fn=train_input_fn, steps=1000)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 352, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 812, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\estimator.py", line 793, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 354, in _model_fn
config=config)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 185, in _dnn_model_fn
logits = logit_fn(features=features, mode=mode)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\estimator\canned\dnn.py", line 91, in dnn_logit_fn
features=features, feature_columns=feature_columns)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 273, in input_layer
trainable, cols_to_vars)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 198, in _internal_input_layer
trainable=trainable)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 2080, in _get_dense_tensor
return inputs.get(self)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1883, in get
transformed = column._transform_feature(self) # pylint: disable=protected-access
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 2048, in _transform_feature
input_tensor = inputs.get(self.key)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1870, in get
feature_tensor = self._get_raw_feature_as_tensor(key)
File "C:\Program Files\Python352\lib\site-packages\tensorflow\python\feature_column\feature_column.py", line 1924, in _get_raw_feature_as_tensor
key, feature_tensor))
ValueError: Feature (key: x) cannot have rank 0. Give: Tensor("IteratorGetNext:0", shape=(), dtype=int64, device=/device:CPU:0)
What does the error mean? What could be going wrong?
回答1:
The following appears to work: no errors are raised, at least. tf.parse_example([serialized], ...) is used instead of tf.parse_single_example(serialized, ...). (Also, the label in the synthetic data was altered to be less than the number of classes.)
import tensorflow as tf
def readsingleexample(serialized):
print("readsingleexample", serialized)
feature = dict()
feature['x'] = tf.FixedLenFeature([], tf.int64)
feature['label'] = tf.FixedLenFeature([], tf.int64)
parsed_example = tf.parse_example([serialized], features=feature)
print(parsed_example)
return parsed_example['x'], parsed_example['label']
def TestParse(filename):
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
example=tf.train.Example()
example.ParseFromString(string_record)
print(example.features)
def TestRead(filename):
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
feats, label = readsingleexample(string_record)
print(feats, label)
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def TFRecordsTest(filename):
example=tf.train.Example(features=tf.train.Features(feature={
'x': _int64_feature(7),
'label': _int64_feature(0)
}))
writer = tf.python_io.TFRecordWriter(filename)
writer.write(example.SerializeToString())
record_iterator=tf.python_io.tf_record_iterator(path=filename)
for string_record in record_iterator:
example=tf.train.Example()
example.ParseFromString(string_record)
print(example.features)
dataset=tf.data.TFRecordDataset(filenames=[filename])
dataset=dataset.map(readsingleexample)
dataset=dataset.repeat()
def train_input_fn():
iterator=dataset.make_one_shot_iterator()
feats_tensor, labels_tensor = iterator.get_next()
return {'x':feats_tensor}, labels_tensor
feature_columns = []
feature_columns.append(tf.feature_column.numeric_column(key='x'))
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 10, 10],
n_classes=2)
classifier.train(input_fn=train_input_fn, steps=1000)
return
回答2:
rank 0 means its a scalar
so
example=tf.train.Example(features=tf.train.Features(feature={
'x': [_int64_feature(7)],
'label': _int64_feature(4)
}))
would make it rank 1 or a vector i.e. add []
来源:https://stackoverflow.com/questions/49169016/training-classifier-from-tfrecords-in-tensorflow