一、安装Eclipse

1、下载-官方网址

https://www.eclipse.org/downloads

tar -zxvf eclipse-committers-oxygen-3a-linux-gtk-x86_64.tar.gz

3、启动（创建桌面快捷方式）

 vim eclipse.desktop

[Desktop Entry] Encoding=UTF-8 Name=Eclipse Comment=Eclipse IDE Exec=/usr/local/eclipse/eclipse(eclipse存放路径) Icon=/usr/local/eclipse/icon.xpm Terminal=false Type=Application Categories=GNOME;Application;Development; StartupNotify=true

二、在Eclipse中安装Hadoop插件

1.复制jar包

2.在Eclipse中设置Hadoop的安装目录

图1 Eclipse中设置Hadoop的安装目录

3.创建并配置Map/Reduce Locations

图2 在Eclipse中选择Map/Reduce Locations

Map/Reduce Locations”子窗口中右键单击，选择“New Hadoop Location”，创建一个新的Hadoop Location。

图4 在Eclipse中设置Hadoop Location

图5 成功创建的Hadoop Location

三、开发第一个MapReduce程序

1.数据文件

图6 wordtest.txt文件内容

2.创建Map/Reduce项目

图6 创建Map/Reduce项目

图7 输入项目名称

3.编写代码

TokenizerMapper.java

package com.hellohadoop; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ 	private final static IntWritable one = new IntWritable(1); 	private Text word = new Text(); 	public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 		StringTokenizer itr = new StringTokenizer(value.toString()); 		while(itr.hasMoreTokens()) { 			word.set(itr.nextToken()); 			context.write(word, one); 		} 	} }

IntSumReducer.java

package com.hellohadoop; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ 	private IntWritable result = new IntWritable(); 	public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { 		int sum = 0; 		for(IntWritable val:values) { 			sum += val.get(); 		} 		result.set(sum); 		context.write(key, result); 	} }

WordCount.java

package com.hellohadoop; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 		Configuration conf = new Configuration(); 		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();	 Job job = new Job(conf,"word count"); 		job.setJarByClass(WordCount.class); 		//指定Mapper类 		job.setMapperClass(TokenizerMapper.class); 		job.setCombinerClass(IntSumReducer.class); 		//指定Reducer类 		job.setReducerClass(IntSumReducer.class); 		//设置Reduce函数输出key的类型 		job.setOutputKeyClass(Text.class); 		//设置Reduce函数输出value的类型 		job.setOutputValueClass(IntWritable.class); 		//指定输入路径 		FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 		//指定输出路径 		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 		System.out.println("OK"); 		//提交任务 		System.exit(job.waitForCompletion(true) ? 0 : 1); 	} }