问题
I have a MapReduce program as below
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
public class Sample {
public static class SampleMapper extends MapReduceBase implements
Mapper<Text, Text, Text, Text> {
private Text word = new Text();
@Override
public void map(Text key, Text value,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringTokenizer itr = new StringTokenizer(value.toString(),",");
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
output.collect(key, word);
}
}
}
public static class SampleReducer extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
private Text result = new Text();
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringBuffer aggregation = new StringBuffer();
while (values.hasNext()) {
aggregation.append("|" + values.next().toString());
}
result.set(aggregation.toString());
output.collect(key, result);
}
}
public static void main(String args[]) throws IOException {
JobConf conf = new JobConf(Sample.class);
conf.setJobName("Sample");
conf.setMapperClass(SampleMapper.class);
conf.setReducerClass(SampleReducer.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setInputFormat(KeyValueTextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
I've made the jar and I've been to trying to get the output. But the output file which is being created is empty.
I'm using the following command to run the job
hadoop jar mapreduce.jar Sample /tmp/input tmp/output
mapreduce.jar is the jar which I have packaged and my input file is like
1 a,b,c
2 e,f
1 x,y,z
2 g
expected output
1 a|b|c|x|y|z
2 e|f|g
回答1:
I'm guessing that since you're using KeyValueTextInputFormat as the input format that it's not finding a separater byte and is therefore using the entire line value as the key (value is ""). That would mean that your iteration in the mapper doesn't go through any loops and nothing is written out. Use property name mapreduce.input.keyvaluelinerecordreader.key.value.separator in the config to hold " " as the separator byte.
回答2:
Try passing configuration Object to the JobConf , I guess your JobConf is not able to get the Hadoop/hdfs configuration.
Configuration configuration=new Configuration();
JobConf jobconf=new JobConf(configuration, exampleClass.class);
conf2.setJarByClass(cls);
.......
来源:https://stackoverflow.com/questions/12780407/empty-output-file-generated-after-running-hadoop-job