0% found this document useful (0 votes)
38 views

Step 2 - First MapReduce Program

This document provides steps to set up MapReduce programming with Eclipse on Hadoop. It includes instructions for installing required software, configuring Eclipse, creating MapReduce programs in Eclipse projects, running jobs and viewing results. Code examples are also provided for common HDFS operations like adding, reading and deleting files using Java.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
38 views

Step 2 - First MapReduce Program

This document provides steps to set up MapReduce programming with Eclipse on Hadoop. It includes instructions for installing required software, configuring Eclipse, creating MapReduce programs in Eclipse projects, running jobs and viewing results. Code examples are also provided for common HDFS operations like adding, reading and deleting files using Java.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 25

Programing Mapreduce

(Hadoop)
with Eclipse

1. Prepare:
● System:
○ Ubuntu 12.04
○ Hadoop 1.0.4

● Requirement:
Eclipse IDE for Java EE Developers 1.7.0
Install eclipse IDE in Ubuntu using Ubuntu software center
Install java 7 and plugins.
● Append two codes to /etc/bash.bashrc to setup java Class path

export JAVA_Home/usr/java/jdk1.6.0_25

export PATH=$PATH:$JAVA_HOME/bin

export HADOOP_HOME/home/hadoop/hadoop-1.0.4

export HADOOP_LIB=$HADOOP_HOME/lib

Step 1. Install and start the Hadoop server


In this section:I assume your Hadoop installation is ready.For single Node setup

Start Hadoop:

hadoop@slavenode1:~/apache/hadoop-1.0.4$ bin/start-all.sh
hadoop@slavenode1:~/apache/hadoop-1.0.4$ jps

6098 JobTracker
8024 Jps
5783 DataNode
5997 SecondaryNameNode
5571 NameNode
6310 TaskTracker
(Make sure NameNode, DataNode, JobTracker, TaskTracker, SecondaryNameNode are running)

● Start with Eclipse :


○ Click on the eclipse icon

○ ○ create a workspace where you need to store your project.


○ click ok.
● Then There is a screen appears like this.
○ Then Right click on your Project Explore space
○ click on Project
○ click on java project and Next.
○ Next
○ Put your project name and Finish.
○ Right click on your project -->Build Path -->Configure Build Path-->click
○ Then add External jar-->select jars-->ok
○ commons-collections-3.2.1.jar
○ commons-configuration-1.6.jar
○ commons-httpclient-3.0.1.jar
○ commons-lang-2.4.jar
○ commons-logging-1.1.1.jar
○ commons-logging-api-1.0.4.jar
○ jackson-core-asl-1.8.8.jar
○ jackson-mapper-asl-1.8.8.jar
○ log4j-1.2.15.jar
○ hadoop-core-1.0.4.jar
○ Project configuration is completed.Let your Project name is SimpleWordCount.....
○ Now expand your project and you’ll get src folder.
○ Delete the src folder.
○ create a source folder
○ Right click on project-->new-->source Folder

○ click and put src/main/java


○ ok
○ create a package named as com.trendwise.software inside src/main/java Folder.
○ now create class named WordCountDriver inside com.trendwise.software package.
○ copy the code and paste in WordCountDriver.java

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCountDriver extends Configured implements Tool{

public static void main(String[] args) throws Exception


{
ToolRunner.run(new WordCountDriver(),args);
}
@Override
public int run(String[] args) throws Exception {

Job job = new Job(getConf(),"Basic Word Count Job");


job.setJarByClass(WordCountDriver.class);

//Map and Reduce


job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);

job.setNumReduceTasks( 1 );

job.setInputFormatClass(TextInputFormat.class);

//the map output


job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);

//the reduce output


job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));


FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

return 0;
}

○ Again create a class named WordContMapper inside com.trendwise.software


○ Copy the code and paste in WordCountMapper.java

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
*
* @author training
* Class : WordCountMapper
*
*/

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{


/**
* Optimization: Instead of creating the variables in the
*/

@Override
public void map(LongWritable inputKey,Text inputVal,Context context) throws
IOException,InterruptedException
{
String line = inputVal.toString();
String[] splits = line.trim().split("\\W+");
for(String outputKey:splits)
{
context.write(new Text(outputKey), new IntWritable(1));

○ create WordCounterReduce class inside com.trendwise.software


○ copy the code and paste in WordCountReduce.java

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text,IntWritable,Text, IntWritable>{


@Override
public void reduce(Text key,Iterable<IntWritable> listOfValues,Context context) throws
IOException,InterruptedException
{
int sum=0;
for(IntWritable val:listOfValues)
{
sum = sum + val.get();
}
context.write(key,new IntWritable(sum));

○ create jar file of the Project .


○ Rigth click on project and take the cursor to Export option

○ click it
○ click on jar file and Next
○ put the jar file name with path.And click on Finish.
○ Run the jar file in terminal

bin/hadoo jar jarfilename DriverClassname inputfilename outputfilename

○ see the output in Browser


○ localhost:50070/dfshealth.jsp → enter

● HDFS programming in Java:-

● Below is a code sample of how to read from and write to HDFS in


java.
○ Creating a configuration object: To be able to read from or write to HDFS, you
need to create a Configuration object and pass configuration parameter to it using
hadoop configuration files.

// Conf object will read the HDFS configuration parameters from these XML
// files. You may specify the parameters for your own if you want.
○ If you do not assign the configurations to conf object (using hadoop xml file) your HDFS
operation will be performed on the local file system and not on the HDFS.
● Adding file to HDFS: Create a FileSystem object and use a file stream to add a file.

FileSystem fileSystem = FileSystem.get(conf);

// Check if the file already exists


Path path = new Path("/path/to/file.ext");
if (fileSystem.exists(path)) {
System.out.println("File " + dest + " already exists");
return;
}

// Create a new file and write data to it.


FSDataOutputStream out = fileSystem.create(path);
InputStream in = new BufferedInputStream(new FileInputStream(
new File(source)));

byte[] b = new byte[1024];


int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

// Close all the file descripters


in.close();
out.close();
fileSystem.close();

○ Reading file from HDFS: Create a file stream object to a file in HDFS and read it.

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path("/path/to/file.ext");


if (!fileSystem.exists(path)) {
System.out.println("File does not exists");
return;
}

FSDataInputStream in = fileSystem.open(path);

String filename = file.substring(file.lastIndexOf('/') + 1,


file.length());

OutputStream out = new BufferedOutputStream(new FileOutputStream(


new File(filename)));
byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

in.close();
out.close();
fileSystem.close();

3. Deleting file from HDFS: Create a file stream object to a file in HDFS and delete it.

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path("/path/to/file.ext");


if (!fileSystem.exists(path)) {
System.out.println("File does not exists");
return;
}

// Delete file
fileSystem.delete(new Path(file), true);

fileSystem.close();

○ Create dir in HDFS: Create a file stream object to a file in HDFS and read it.

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(dir);


if (fileSystem.exists(path)) {
System.out.println("Dir " + dir + " already not exists");
return;
}

// Create directories
fileSystem.mkdirs(path);

fileSystem.close();

○ Complete code for Adding ,Reading,Deleting and create directory.

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HDFSClient {


public HDFSClient() {

public void addFile(String source, String dest) throws IOException {


Configuration conf = new Configuration();

// Conf object will read the HDFS configuration parameters from these
// XML files.
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/hdfs-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

// Get the filename out of the file path


String filename = source.substring(source.lastIndexOf('/') + 1,
source.length());

// Create the destination path including the filename.


if (dest.charAt(dest.length() - 1) != '/') {
dest = dest + "/" + filename;
} else {
dest = dest + filename;
}

// System.out.println("Adding file to " + destination);

// Check if the file already exists


Path path = new Path(dest);
if (fileSystem.exists(path)) {
System.out.println("File " + dest + " already exists");
return;
}

// Create a new file and write data to it.


FSDataOutputStream out = fileSystem.create(path);
InputStream in = new BufferedInputStream(new FileInputStream(
new File(source)));

byte[] b = new byte[1024];


int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}
// Close all the file descripters
in.close();
out.close();
fileSystem.close();
}

public void readFile(String file) throws IOException {


Configuration conf = new Configuration();
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);


if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

FSDataInputStream in = fileSystem.open(path);

String filename = file.substring(file.lastIndexOf('/') + 1,


file.length());

OutputStream out = new BufferedOutputStream(new FileOutputStream(


new File(filename)));

byte[] b = new byte[1024];


int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

in.close();
out.close();
fileSystem.close();
}

public void deleteFile(String file) throws IOException {


Configuration conf = new Configuration();
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);


if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

fileSystem.delete(new Path(file), true);

fileSystem.close();
}

public void mkdir(String dir) throws IOException {


Configuration conf = new Configuration();
conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(dir);


if (fileSystem.exists(path)) {
System.out.println("Dir " + dir + " already not exists");
return;
}

fileSystem.mkdirs(path);

fileSystem.close();
}

public static void main(String[] args) throws IOException {

if (args.length < 1) {
System.out.println("Usage: hdfsclient add/read/delete/mkdir" +
" [<local_path> <hdfs_path>]");
System.exit(1);
}

HDFSClient client = new HDFSClient();


if (args[0].equals("add")) {
if (args.length < 3) {
System.out.println("Usage: hdfsclient add <local_path> " +
"<hdfs_path>");
System.exit(1);
}

client.addFile(args[1], args[2]);
} else if (args[0].equals("read")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient read <hdfs_path>");
System.exit(1);
}

client.readFile(args[1]);
} else if (args[0].equals("delete")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient delete <hdfs_path>");
System.exit(1);
}

client.deleteFile(args[1]);
} else if (args[0].equals("mkdir")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient mkdir <hdfs_path>");
System.exit(1);
}

client.mkdir(args[1]);
} else {
System.out.println("Usage: hdfsclient add/read/delete/mkdir" +
" [<local_path> <hdfs_path>]");
System.exit(1);
}

System.out.println("Done!");
}
}

● Debugging & Diagnosys:


You might also like