Reducer doesn’t call reduce method when using my own class as output value MapReduce Hadoop

I was trying to use my own Class object as the output value of my Mapper and use them inside the Reducer but the reduce() method isn’t called and my app was going to be terminated if I remove the default constructor of DateIncome class. I wrote my codes as follows:

Driver:

package it.polito.bigdata.hadoop.lab;

import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.File;


/**
 * MapReduce program
 */
public class DriverBigData extends Configured implements Tool {

    @Override
    public int run(String[] args) throws Exception {


        int exitCode = 0;



        FileUtils.forceDelete(new File("output/"));

        Path inputPath = new Path("input/");
        Path outputPath = new Path("output");
        int numberOfReducer = 1;


        //FileUtils.forceDelete(new File(String.valueOf(outputPath.isUriPathAbsolute())));
        Configuration configuration = this.getConf();

        Job job = Job.getInstance(configuration);

        job.setJobName("myJob");

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setJarByClass(DriverBigData.class);

        job.setInputFormatClass(KeyValueTextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(MapperBigData.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DateIncome.class);

        job.setReducerClass(ReducerBigData.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

//        job.setCombinerClass(CombinerBigData.class);
       job.setNumReduceTasks(numberOfReducer);

        // Execute the job and wait for completion
        if (job.waitForCompletion(true))
            exitCode = 0;
        else
            exitCode = 1;


        return exitCode;

    }


    /**
     * Main of the driver
     */

    public static void main(String args[]) throws Exception {
        // Exploit the ToolRunner class to "configure" and run the Hadoop application
        int res = ToolRunner.run(new Configuration(), new DriverBigData(), args);

        System.exit(res);
    }
}

JavaScript
​x
 
package it.polito.bigdata.hadoop.lab;​import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils;import org.apache.commons.io.FileUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.FloatWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;​import java.io.File;​​/** * MapReduce program */public class DriverBigData extends Configured implements Tool {​    @Override    public int run(String[] args) throws Exception {​​        int exitCode = 0;​​​        FileUtils.forceDelete(new File("output/"));​        Path inputPath = new Path("input/");        Path outputPath = new Path("output");        int numberOfReducer = 1;​​        //FileUtils.forceDelete(new File(String.valueOf(outputPath.isUriPathAbsolute())));        Configuration configuration = this.getConf();​        Job job = Job.getInstance(configuration);​        job.setJobName("myJob");​        FileInputFormat.addInputPath(job, inputPath);        FileOutputFormat.setOutputPath(job, outputPath);​        job.setJarByClass(DriverBigData.class);​        job.setInputFormatClass(KeyValueTextInputFormat.class);        job.setOutputFormatClass(TextOutputFormat.class);​        job.setMapperClass(MapperBigData.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(DateIncome.class);​        job.setReducerClass(ReducerBigData.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(FloatWritable.class);​//        job.setCombinerClass(CombinerBigData.class);       job.setNumReduceTasks(numberOfReducer);​        // Execute the job and wait for completion        if (job.waitForCompletion(true))            exitCode = 0;        else            exitCode = 1;​​        return exitCode;​    }​​    /**     * Main of the driver     */​    public static void main(String args[]) throws Exception {        // Exploit the ToolRunner class to "configure" and run the Hadoop application        int res = ToolRunner.run(new Configuration(), new DriverBigData(), args);​        System.exit(res);    }}​​​

Mapper:

package it.polito.bigdata.hadoop.lab;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import javax.swing.plaf.synth.ColorType;

/**
 * Lab  - Mapper
 */

/* Set the proper data types for the (key,value) pairs */
class MapperBigData extends Mapper<
        Text, // Input key type
        Text,         // Input value type
        Text,         // Output key type
        DateIncome> {// Output value type



    protected void map(
            Text key,   // Input key type
            Text value,         // Input value type
            Context context) throws IOException, InterruptedException {


        try {
            DateIncome income = new DateIncome(key.toString(),Float.parseFloat(value.toString()));

            context.write(key, income);
        }catch (Exception e){
            System.err.println(e.toString());
        }

    }


}

JavaScript
 
package it.polito.bigdata.hadoop.lab;​import java.io.IOException;import java.util.*;​import org.apache.hadoop.io.FloatWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;​import javax.swing.plaf.synth.ColorType;​/** * Lab  - Mapper */​/* Set the proper data types for the (key,value) pairs */class MapperBigData extends Mapper<        Text, // Input key type        Text,         // Input value type        Text,         // Output key type        DateIncome> {// Output value type​​​    protected void map(            Text key,   // Input key type            Text value,         // Input value type            Context context) throws IOException, InterruptedException {​​        try {            DateIncome income = new DateIncome(key.toString(),Float.parseFloat(value.toString()));​            context.write(key, income);        }catch (Exception e){            System.err.println(e.toString());        }​    }​​}​​​

Reducer:


package it.polito.bigdata.hadoop.lab;

import java.io.IOException;
import java.util.*;

import com.google.common.collect.Multimap;
import javafx.util.Pair;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * Lab - Reducer
 */

/* Set the proper data types for the (key,value) pairs */
class ReducerBigData extends Reducer<
        Text,           // Input key type
        DateIncome,    // Input value type
        Text,           // Output key type
        FloatWritable> {  // Output value type


    float maxIncome = 0;
    String maxDAte = "";


    @Override
    protected void reduce(
            Text key, // Input key type
            Iterable<DateIncome> values, // Input value type
            Context context) throws IOException, InterruptedException {

        System.out.println("reducer");

        for (DateIncome dateIncome : values) {
            System.out.println(dateIncome.getDate() + " " + dateIncome.getIncome());
            if (maxIncome <= dateIncome.getIncome()) {
                maxIncome = dateIncome.getIncome();
                maxDAte = dateIncome.getDate();
            }
        }


    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        super.cleanup(context);
        context.write(new Text(maxDAte), new FloatWritable(maxIncome));
    }
}

JavaScript
 
​​package it.polito.bigdata.hadoop.lab;​import java.io.IOException;import java.util.*;​import com.google.common.collect.Multimap;import javafx.util.Pair;import org.apache.hadoop.io.FloatWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;​/** * Lab - Reducer */​/* Set the proper data types for the (key,value) pairs */class ReducerBigData extends Reducer<        Text,           // Input key type        DateIncome,    // Input value type        Text,           // Output key type        FloatWritable> {  // Output value type​​    float maxIncome = 0;    String maxDAte = "";​​    @Override    protected void reduce(            Text key, // Input key type            Iterable<DateIncome> values, // Input value type            Context context) throws IOException, InterruptedException {​        System.out.println("reducer");​        for (DateIncome dateIncome : values) {            System.out.println(dateIncome.getDate() + " " + dateIncome.getIncome());            if (maxIncome <= dateIncome.getIncome()) {                maxIncome = dateIncome.getIncome();                maxDAte = dateIncome.getDate();            }        }​​    }​    @Override    protected void cleanup(Context context) throws IOException, InterruptedException {        super.cleanup(context);        context.write(new Text(maxDAte), new FloatWritable(maxIncome));    }}​​

DateIncome:

package it.polito.bigdata.hadoop.lab;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class DateIncome implements Writable {
    private String date;
    private float income;

    public DateIncome() {
    }

    public DateIncome(String date, float income) {
        this.date = date;
        this.income = income;
    }

    public String getDate() {
        return date;
    }

    public void setDate(String dateValue) {
        date = dateValue;
    }

    public float getIncome() {
        return income;
    }

    public void setIncome(float incomeValue) {
        income = incomeValue;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        income = in.readFloat();
        date = in.readUTF();
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeFloat(income);
        out.writeUTF(date);
    }

    public String toString() {

        return new String("date:" + date + " income:" + income);
    }

}

JavaScript
 
package it.polito.bigdata.hadoop.lab;​import org.apache.hadoop.io.Writable;​import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;​public class DateIncome implements Writable {    private String date;    private float income;​    public DateIncome() {    }​    public DateIncome(String date, float income) {        this.date = date;        this.income = income;    }​    public String getDate() {        return date;    }​    public void setDate(String dateValue) {        date = dateValue;    }​    public float getIncome() {        return income;    }​    public void setIncome(float incomeValue) {        income = incomeValue;    }​    @Override    public void readFields(DataInput in) throws IOException {        income = in.readFloat();        date = in.readUTF();    }​    @Override    public void write(DataOutput out) throws IOException {        out.writeFloat(income);        out.writeUTF(date);    }​    public String toString() {​        return new String("date:" + date + " income:" + income);    }​}​​

Input.txt:

2015-11-01  1000
2015-11-02  1305
2015-12-01  500
2015-12-02  750
2016-01-01  345
2016-01-02  1145
2016-02-03  200
2016-02-04  500

JavaScript
 
2015-11-01  10002015-11-02  13052015-12-01  5002015-12-02  7502016-01-01  3452016-01-02  11452016-02-03  2002016-02-04  500​​​

output:

2015-11-02  1305.0

JavaScript
 
2015-11-02  1305.0​​

So, My Question is if I remove the default constructor of DateIncome class, The reduce() method of the reducer won’t be called. Why Hadoop does need the default constructor although another constructor is provided?

Answer

All Writable implementations should have default constructor because otherwise your object won’t be deserialized.

During deserialization process object are instantiating by default constructor and only after that all fields are filling. So this process are broken if you create not default constructor.

Advertisement

Answer