Skip to content
Advertisement

How to copy/assign a CompositeKey into another CompositeKey in hadoop?

I try running a map reduce on some data on a cluster and get the following output.

0000000000  44137 0 2
1   1
902996760100000 44137 2 6
2   2
9029967602  44137 2 8
2   2
90299676030000  44137 2 1
9029967604  44137 2 5
2   2
905000  38704 2 1
9050000001  38702 2 24
2   2
9050000001  38704 2 14
2   2
9050000001  38705 2 12
2   2
9050000001  38706 2 13
2   2
9050000001  38714 2 24
2   2
9050000002  38704 2 12
2   2
9050000002  38706 2 12
2   2
9050000011  38704 2 6
2   2
9050000011  38706 2 12
2   2
9050000021  38702 2 12
2   2
9050000031  38704 2 6
2   2
9050000031  38705 2 6
2   2
9050000031  38714 2 12
2   2

This is my reducer

public class RTopLoc extends Reducer<CompositeKey, IntWritable, Text, Text> {
    private static int number = 0;
    private static CompositeKey lastCK;
    private static Text lastLac = new Text();

    @Override
    public void reduce(CompositeKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int sum = sumValues(values);
        String str = Integer.toString(sum);
        String str2 = Integer.toString(number);
        String str3 = key.getSecond().toString();
        context.write(key.getFirst(), new Text(str3 + " " + str2 + " " + str));
        context.write(lastCK.getFirst(), lastCK.getSecond());
        if(number == 0){
            number = sum;
            lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
            context.write(new Text("1"), new Text("1"));
        }
        else if(lastCK.getFirst().equals(key.getFirst()) && sum > number){
            lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
            context.write(new Text("2"), new Text("2"));
        }
        else if(!lastCK.getFirst().equals(key.getFirst())){
//            context.write(lastCK.getFirst(), lastCK.getSecond());
            context.write(new Text("3"), new Text("3"));
            number = sum;
            lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
        }
    }

From what I understand the problem is that hadoop treats lastCK and key as the same object and this condition

if(lastCK.getFirst().equals(key.getFirst()) 

will always be true

This is my CompositeKey class

public class CompositeKey implements WritableComparable {
    private Text first = null;
    private Text second = null;
    public CompositeKey() {
    }
    public CompositeKey(Text first, Text second) {
        this.first = first;
        this.second = second;
    }
    //...getters and setters
    public Text getFirst() {
        return first;
    }

    public void setFirst(Text first) {
        this.first = first;
    }

    public void setFirst(String first) {
        setFirst(new Text(first));
    }

    public Text getSecond() {
        return second;
    }

    public void setSecond(Text second) {
        this.second = second;
    }

    public void setSecond(String second) {
        setSecond(new Text(second));
    }

    public void write(DataOutput d) throws IOException {
        first.write(d);
        second.write(d);
    }

    public void readFields(DataInput di) throws IOException {
        if (first == null) {
            first = new Text();
        }
        if (second == null) {
            second = new Text();
        }
        first.readFields(di);
        second.readFields(di);
    }
    public int compareTo(Object obj) {
        CompositeKey other = (CompositeKey) obj;
        int cmp = first.compareTo(other.getFirst());
        if (cmp != 0) {
            return cmp;
        }
        return second.compareTo(other.getSecond());
    }
    @Override
    public boolean equals(Object obj) {
        CompositeKey other = (CompositeKey)obj;
        return first.equals(other.getFirst());
    }
    @Override
    public int hashCode() {
        return first.hashCode();
    }
}

I tried changing setters to something along this lines

public void setFirst(Text first) {
        this.first.set(first);
    }

    public void setFirst(String first) {
        setFirst(new Text(first));
    }

where set is Hadoop Text Documentation

but got

Error: java.lang.NullPointerException
        at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:29)
        at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:33)

How do I make hadoop treat lastCK and key as different objects?

Advertisement

Answer

If you change these lines:

private Text first = null;
private Text second = null;

to

private Text first = new Text();
private Text second = new Text();

And then use:

public void setFirst(Text first) {
    this.first.set(first);
}

It should work. You could also create first and second in the constructors.

User contributions licensed under: CC BY-SA
4 People found this is helpful
Advertisement