How to specify KeyValueTextInputFormat Separator in Hadoop-.20 api?

后端 未结 7 756
盖世英雄少女心
盖世英雄少女心 2020-12-08 05:57

In new API (apache.hadoop.mapreduce.KeyValueTextInputFormat) , how to specify separator (delimiter) other than tab(which is default) to separate key and Value.

Samp

7条回答
  •  北海茫月
    2020-12-08 06:04

    Example

    public class KeyValueTextInput extends Configured implements Tool {
        public static void main(String args[]) throws Exception {
            String log4jConfPath = "log4j.properties";
            PropertyConfigurator.configure(log4jConfPath);
            int res = ToolRunner.run(new KeyValueTextInput(), args);
            System.exit(res);
        }
    
        public int run(String[] args) throws Exception {
    

    Configuration conf = this.getConf();

            //conf.set("key.value.separator.in.input.line", ",");
    

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ",");

            Job job = Job.getInstance(conf, "WordCountSampleTemplate");
            job.setJarByClass(KeyValueTextInput.class);
            job.setMapperClass(Map.class);
            job.setReducerClass(Reduce.class);
    
            //job.setMapOutputKeyClass(Text.class);
            //job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            job.setInputFormatClass(KeyValueTextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            FileInputFormat.addInputPath(job, new Path(args[0]));
            Path outputPath = new Path(args[1]);
            FileSystem fs = FileSystem.get(new URI(outputPath.toString()), conf);
            fs.delete(outputPath, true);
            FileOutputFormat.setOutputPath(job, outputPath);
            return job.waitForCompletion(true) ? 0 : 1;
        }
    }
    
    class Map extends Mapper {
        public void map(Text k1, Text v1, Context context) throws IOException, InterruptedException {
            context.write(k1, v1);
        }
    }
    
    class Reduce extends Reducer {
        public void reduce(Text Key, Iterable values, Context context) throws IOException, InterruptedException {
            String sum = " || ";
            for (Text value : values)
                sum = sum + value.toString() + " || ";
            context.write(Key, new Text(sum));
        }
    }
    

提交回复
热议问题