MapReduce算法(将数据按照 /OutputData/城市名称/日期(YYYY-MM-dd)/类型(固定Gn)/imsi.txt )

老子叫甜甜 提交于 2019-12-21 03:04:50

需求:

现有部分GN数据,数据为全省数据,解析GN数据,将数据按照 /OutputData/城市名称/日期(YYYY-MM-dd)/类型(固定Gn)/imsi.txt  (有很多imsi)的结构,将相同城市,相同日期,相同imsi(国际移动用户标识),类型为Gn的数据汇总到一起,。

解析出新的IMSI, VULUME、CELLID、TAC、city、time

数据:

1|460002452699237|8655890276520178|8613786401241|21.176.70.136|29588|255|56042|221.177.173.83|221.177.173.64|221.177.173.35|221.177.173.35|2|cmnet|101|a788057f91cf3a89|1480752079784|1480752079788|18|26|0|33931|8.8.8.8|53|460|0|73|366|1|1|0|0|0|0|0|0|183.232.72.164|0|1|4|6|6|2260069379|||||||||||||||

数据说明:数据列的分隔符为“|”,截取出数据的第六个和第八个字段,两个字段使用“_”拼接,构成城市名称编号。

日期字段为第十七个数据。

Imsi数据为第二个数据

GnMapper 
package GN.demo01;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class GnMapper extends Mapper<LongWritable, Text,Text,Text> {
    private SimpleDateFormat df = new SimpleDateFormat("YYYY-MM-dd");

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split("\\|");
        context.write(new Text("/OutputData/"+ split[5]+"_"+split[7]+"/"+df.format(new Date(Long.parseLong(split[16])))+"/Gn/"+split[1]),new Text(value));
    }


}
GnReducer
package GN.demo01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;


public class GnReducer extends Reducer<Text, Text, Text, NullWritable> {

    FSDataOutputStream fsDataOutputStream;


    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Configuration configuration = context.getConfiguration();
        FileSystem fileSystem = null;
        try {
            fileSystem = FileSystem.get(new URI("hdfs://192.168.100.201:8020"),configuration);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }

        String str = "";
        fsDataOutputStream = fileSystem.create(new Path(key.toString() + ".txt"));

        for (Text value : values) {
             str += value.toString() + "\r\n";
        }
        byte[] bytes = str.getBytes();
        fsDataOutputStream.write(bytes,0,bytes.length);
        fsDataOutputStream.close();
    }

}

GnDriver

package GN.demo01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class GnDriver extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = new Configuration();
//        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
        Job job = Job.getInstance(conf);
//        job.setNumReduceTasks(30);
        job.setJarByClass(GnDriver.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("E:\\2019-传智项目\\企业需求实战\\01_湖南移动项目需求\\数据\\gn"));

        job.setMapperClass(GnMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GnReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("E:\\2019-传智项目\\企业需求实战\\01_湖南移动项目需求\\数据\\Out"));


        boolean b = job.waitForCompletion(true);
        return b?0:1;
    }

    public static void main(String[] args) throws Exception{
        ToolRunner.run(new GnDriver(),args);
    }
}

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!