Step 2 - First MapReduce Program
Step 2 - First MapReduce Program
(Hadoop)
with Eclipse
1. Prepare:
● System:
○ Ubuntu 12.04
○ Hadoop 1.0.4
● Requirement:
Eclipse IDE for Java EE Developers 1.7.0
Install eclipse IDE in Ubuntu using Ubuntu software center
Install java 7 and plugins.
● Append two codes to /etc/bash.bashrc to setup java Class path
export JAVA_Home/usr/java/jdk1.6.0_25
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME/home/hadoop/hadoop-1.0.4
export HADOOP_LIB=$HADOOP_HOME/lib
Start Hadoop:
hadoop@slavenode1:~/apache/hadoop-1.0.4$ bin/start-all.sh
hadoop@slavenode1:~/apache/hadoop-1.0.4$ jps
6098 JobTracker
8024 Jps
5783 DataNode
5997 SecondaryNameNode
5571 NameNode
6310 TaskTracker
(Make sure NameNode, DataNode, JobTracker, TaskTracker, SecondaryNameNode are running)
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
job.setNumReduceTasks( 1 );
job.setInputFormatClass(TextInputFormat.class);
job.waitForCompletion(true);
return 0;
}
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
*
* @author training
* Class : WordCountMapper
*
*/
@Override
public void map(LongWritable inputKey,Text inputVal,Context context) throws
IOException,InterruptedException
{
String line = inputVal.toString();
String[] splits = line.trim().split("\\W+");
for(String outputKey:splits)
{
context.write(new Text(outputKey), new IntWritable(1));
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
○ click it
○ click on jar file and Next
○ put the jar file name with path.And click on Finish.
○ Run the jar file in terminal
// Conf object will read the HDFS configuration parameters from these XML
// files. You may specify the parameters for your own if you want.
○ If you do not assign the configurations to conf object (using hadoop xml file) your HDFS
operation will be performed on the local file system and not on the HDFS.
● Adding file to HDFS: Create a FileSystem object and use a file stream to add a file.
○ Reading file from HDFS: Create a file stream object to a file in HDFS and read it.
FSDataInputStream in = fileSystem.open(path);
in.close();
out.close();
fileSystem.close();
3. Deleting file from HDFS: Create a file stream object to a file in HDFS and delete it.
// Delete file
fileSystem.delete(new Path(file), true);
fileSystem.close();
○ Create dir in HDFS: Create a file stream object to a file in HDFS and read it.
// Create directories
fileSystem.mkdirs(path);
fileSystem.close();
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
// Conf object will read the HDFS configuration parameters from these
// XML files.
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/hdfs-site.xml"));
FSDataInputStream in = fileSystem.open(path);
in.close();
out.close();
fileSystem.close();
}
fileSystem.close();
}
fileSystem.mkdirs(path);
fileSystem.close();
}
if (args.length < 1) {
System.out.println("Usage: hdfsclient add/read/delete/mkdir" +
" [<local_path> <hdfs_path>]");
System.exit(1);
}
client.addFile(args[1], args[2]);
} else if (args[0].equals("read")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient read <hdfs_path>");
System.exit(1);
}
client.readFile(args[1]);
} else if (args[0].equals("delete")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient delete <hdfs_path>");
System.exit(1);
}
client.deleteFile(args[1]);
} else if (args[0].equals("mkdir")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient mkdir <hdfs_path>");
System.exit(1);
}
client.mkdir(args[1]);
} else {
System.out.println("Usage: hdfsclient add/read/delete/mkdir" +
" [<local_path> <hdfs_path>]");
System.exit(1);
}
System.out.println("Done!");
}
}