0% found this document useful (0 votes)
132 views

SalesData Map Reduce

This document contains code for a MapReduce job in Java that analyzes sales data by country. It creates an input directory in HDFS, copies a CSV file to that directory, and runs a MapReduce job on the data. The Mapper class splits each line on commas and emits the country field as the key and the value "1". The Reducer class sums the values for each key to get a total count of sales by country.

Uploaded by

bhavana16686
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
132 views

SalesData Map Reduce

This document contains code for a MapReduce job in Java that analyzes sales data by country. It creates an input directory in HDFS, copies a CSV file to that directory, and runs a MapReduce job on the data. The Mapper class splits each line on commas and emits the country field as the key and the value "1". The Reducer class sums the values for each key to get a total count of sales by country.

Uploaded by

bhavana16686
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

C:\hadoop-2.8.

0\sbin>hadoop fs -mkdir /input_dir_sales

C:\hadoop-2.8.0\sbin>cd..

C:\hadoop-2.8.0>hadoop fs -put c:/SalesJan2009.csv /input_dir_sales

C:\hadoop-2.8.0>hadoop jar C:/hadoop-2.8.0/SalesMapReduce.jar SalesData.SalesCountryDriver


/input_dir_sales/SalesJan2009.csv /output_dir_sales

package SalesData;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import java.util.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;

public class SalesCountryDriver {


public static void main(String[] args) {
JobClient my_client = new JobClient();
// Create a configuration object for the job
JobConf job_conf = new
JobConf(SalesCountryDriver.class);

// Set a name of the Job


job_conf.setJobName("SalePerCountry");

// Specify data type of output key and value


job_conf.setOutputKeyClass(Text.class);
job_conf.setOutputValueClass(IntWritable.class);

// Specify names of Mapper and Reducer Class


job_conf.setMapperClass(SalesMapper.class);
job_conf.setReducerClass(SalesCountryReducer.class);

// Specify formats of the data type of Input and


output
job_conf.setInputFormat(TextInputFormat.class);
job_conf.setOutputFormat(TextOutputFormat.class);

// Set input and output directories using command


line arguments,
//arg[0] = name of input directory on HDFS, and
arg[1] = name of output directory to be created to store
the output file.

FileInputFormat.setInputPaths(job_conf, new
Path(args[0]));
FileOutputFormat.setOutputPath(job_conf, new
Path(args[1]));

my_client.setConf(job_conf);
try {
// Run the job
JobClient.runJob(job_conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
class SalesMapper extends MapReduceBase implements Mapper
<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new
IntWritable(1);

public void map(LongWritable key, Text value,


OutputCollector <Text, IntWritable> output, Reporter
reporter) throws IOException {

String valueString = value.toString();


String[] SingleCountryData =
valueString.split(",");
output.collect(new Text(SingleCountryData[7]),
one);
}
}

class SalesCountryReducer extends MapReduceBase implements


Reducer<Text, IntWritable, Text, IntWritable> {

public void reduce(Text t_key, Iterator<IntWritable>


values, OutputCollector<Text,IntWritable> output, Reporter
reporter) throws IOException {
Text key = t_key;
int frequencyForCountry = 0;
while (values.hasNext()) {
// replace type of value with the actual
type of our value
IntWritable value = (IntWritable)
values.next();
frequencyForCountry += value.get();

}
output.collect(key, new
IntWritable(frequencyForCountry));
}
}

You might also like