Hadoop2.4.1
Eclipse IDE for Java Developers Luna Release (4.4.0)
1. Open a Map/Reducer Project
2. Add lib jar:
- Right click the project > Build Path > Configure Build Path > Java Build Path > Libraries
> Add External JARS (including jars in following dir):
- share/hadoop/common
- share/hadoop/common/lib
- share/hadoop/mapreduce
- share/hadoop/mapreduce/lib
- share/hadoop/yarn
- share/hadoop/yarn/lib
--------additional-----------
- HDFS lib
- HBase lib
3. On this project, add new:
- Mapper: Mp.java
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class Mp extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable ikey, Text ivalue, Context context)
throws IOException, InterruptedException {
String line = ivalue.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while(tokenizer.hasMoreTokens()){
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class Mp extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable ikey, Text ivalue, Context context)
throws IOException, InterruptedException {
String line = ivalue.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while(tokenizer.hasMoreTokens()){
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
- Reducer: Rd.java
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class Rd extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text _key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
// process values
int sum =0;
for(IntWritable v : values){
sum += v.get();
}
context.write(_key, new IntWritable(sum));
}
}
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class Rd extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text _key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
// process values
int sum =0;
for(IntWritable v : values){
sum += v.get();
}
context.write(_key, new IntWritable(sum));
}
}
- MapReduce Driver: WC.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WC {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "wordcount");
//Job job = Job.getInstance(conf, "wordcount");
job.setJarByClass(WC.class);
// TODO: specify a mapper
job.setMapperClass(Mp.class);
// TODO: specify a reducer
job.setReducerClass(Rd.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
4. Create jar file
- File > Export > JAR file
- Select resources and Jar File Loctaion
5. Run application
- Seclect "Run Configurations" > Check "Java Application", "Name", "Project", "Mainclass"
- Enter "Arguments" > add "file1 Output" in Program arguments
[備註] 因為main裡面沒有指定 input output, 所以這邊必須設定給app,
相當於用terminal 執行 $ hadoop jar project.jar file1 Output1 ,
如果不加路徑,預設input 及output位置在本機的 $ECLIPSE_WORKSPACE/PROJECT_FOLDER
- Click "Run"
[ 問題 ] 如何Run appliction 於現有的Hadoop系統上,而非 Local端
===> 2014/09/18 目前測試,必須export jar file 丟到master上運行是OK的。
[Solution]