代码存于github:https://github.com/zuodaoyong/Hadoop
Hadoop作业在运行时维护了若干个内置计数器,方便用户监控已处理数据量和已产生的输出数据量
1、采用枚举的方式统计计数
Counter getCounter(Enum<?> var1);
enum CustomCounter{
normal,abnormal
}
context.getCounter(CustomCounter.normal).increment(1);
2、采用计数器组,计数器名称的方式统计
Counter getCounter(String var1, String var2);
context.getCounter("logMapper","parseLog_true").increment(1);
3、实例
public class LogMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
String[] splits=null;
Text k=new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//获取一行
String line = value.toString();
//解析log
boolean result=parseLog(line,context);
if(!result){
return;
}
k.set(line);
context.write(k, NullWritable.get());
}
private boolean parseLog(String line,Context context) {
splits = line.split("\\s");
if(splits.length>11){
context.getCounter("logMapper","parseLog_true").increment(1);
return true;
}
context.getCounter("logMapper", "parseLog_false").increment(1);
return false;
}
}
public static void main(String[] args) throws Exception{
System.setProperty("HADOOP_USER_NAME", "root");
Configuration configuration=new Configuration();
Job job = Job.getInstance(configuration);
job.setMapperClass(LogMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job, new Path("/mapreduce/log/web"));
FileOutputFormat.setOutputPath(job, new Path("/mapreduce/log/output"));
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion==true?0:1);
}

来源:CSDN
作者:zuodaoyong
链接:https://blog.csdn.net/zuodaoyong/article/details/104115423