一、安装Eclipse
1、下载-官方网址
https://www.eclipse.org/downloads
tar -zxvf eclipse-committers-oxygen-3a-linux-gtk-x86_64.tar.gz
3、启动 (创建桌面快捷方式)
vim eclipse.desktop
[Desktop Entry] Encoding=UTF-8 Name=Eclipse Comment=Eclipse IDE Exec=/usr/local/eclipse/eclipse(eclipse存放路径) Icon=/usr/local/eclipse/icon.xpm Terminal=false Type=Application Categories=GNOME;Application;Development; StartupNotify=true
二、在Eclipse中安装Hadoop插件
1.复制jar包
2.在Eclipse中设置Hadoop的安装目录
图1 Eclipse中设置Hadoop的安装目录
3.创建并配置Map/Reduce Locations
图2 在Eclipse中选择Map/Reduce Locations
Map/Reduce Locations”子窗口中右键单击,选择“New Hadoop Location”,创建一个新的Hadoop Location。
图4 在Eclipse中设置Hadoop Location
图5 成功创建的Hadoop Location
三、开发第一个MapReduce程序
1.数据文件
图6 wordtest.txt文件内容
2.创建Map/Reduce项目
图6 创建Map/Reduce项目
图7 输入项目名称
3.编写代码
TokenizerMapper.java
package com.hellohadoop; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while(itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }
IntSumReducer.java
package com.hellohadoop; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ private IntWritable result = new IntWritable(); public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { int sum = 0; for(IntWritable val:values) { sum += val.get(); } result.set(sum); context.write(key, result); } }
WordCount.java
package com.hellohadoop; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf,"word count"); job.setJarByClass(WordCount.class); //指定Mapper类 job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); //指定Reducer类 job.setReducerClass(IntSumReducer.class); //设置Reduce函数输出key的类型 job.setOutputKeyClass(Text.class); //设置Reduce函数输出value的类型 job.setOutputValueClass(IntWritable.class); //指定输入路径 FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //指定输出路径 FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("OK"); //提交任务 System.exit(job.waitForCompletion(true) ? 0 : 1); } }
4.运行程序
图8 为Java Application创建一个新的配置
Arguments",输入参数,如下图所示:
图9 输入运行参数
图10 MapReduce程序运行结果
文章来源: Eclipse中搭建MapReduce开发环境