August « 2016 « CodeThatAint

Default Mapper and Reducer are from

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

when the Mapper and reducer are not set using job.setMapperClass()
and job.setReducerClass() then default Mapper.class and Reducer.class will be considered

The Mapper.class performs a word count on lines.The input and output of the default ampper and reducer are as shown.

Input

Test
Test
Test

Output

0	test
5	test
10	test

The Line is considered as a word test – 4 + Carriage Return 1 = 5

package com.mugilmapred;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Test extends Configured implements Tool{

	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		Test objTest = new Test();
		int result = ToolRunner.run(objTest, args);
		System.exit(result);
	}

	public int run(String[] args) throws Exception {
		// TODO Auto-generated method stub		
		Job job =  new Job(getConf());
                job.setJarByClass(Test.class);
				
		Path inputFilepath = new Path(args[0]);
		Path outputFilepath = new Path(args[1]);
		
		FileInputFormat.addInputPath(job, inputFilepath);
		FileOutputFormat.setOutputPath(job, outputFilepath);
		
		FileSystem fs = FileSystem.newInstance(getConf());
		
		if(fs.exists(outputFilepath))
		{
			fs.delete(outputFilepath, true);
		}			
		return job.waitForCompletion(true)? 0:1;
	}
}

when you dont add set jar by class it will throw

Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class com.mugilmapred.Test$Map not found

If you run locally it wont expect this to be specified by when you run in local the class which contains the mapper should be specified else the system does not know in which jar file the mapper is located

job.setJarByClass(Test.class);

You can aslo use setJar as below

job.setJar("Test-0.0.1.jar");

Using a Predefined Reducer in Program

.
.
.
job.setMapperClass(WordMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
    
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
.
.
.
.

LongSumReducer.class takes input from mapper ([count,1] [count,1] [count,1] [count,1]) and group it together as ([count,4])

Table Creation

CREATE TABLE HomeNeeds(Type STRING, Product STRING, No INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TextFile;

Insertion

LOAD DATA LOCAL INPATH '/home/turbo/workspace/Sample Datas/Test.csv'
OVERWRITE INTO TABLE HomeNeeds;

Create Table with Partition

CREATE TABLE HomeNeeds(Type String, Product String, No Int)
PARTITIONED BY (Date String, Country String)  
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

The Partitioned columns and Table columns have no Relations with one another

Inserting into Partitioned Table

LOAD DATA LOCAL INPATH '/home/turbo/workspace/Sample Datas/Test.csv' 
INTO TABLE HomeNeeds
PARTITION (Date='2001-01-25', Country='India');

Partition and Bucketing

CREATE TABLE HomeNeeds(Type String, Item String, No Int)
PARTITIONED BY (Area String)
CLUSTERED BY (Type) INTO 4 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

package com.mugil.pig; import java.io.IOException; import org.apache.pig.FilterFunc; import org.apache.pig.data.Tuple; public class FilterType extends FilterFunc { @Override public Boolean exec(Tuple tuple) throws IOException { if(tuple == null || tuple.size() == 0) return false; try { Object obj = tuple.get(0); if(obj == null) return false; String Type = (String)obj; if(Type.equals("Kitchen")) return true; } catch (Exception e) { throw new IOException("Caught exception processing input row " + e.getMessage(), e); } return false; } }

grunt> REGISTER /usr/local/pig-0.15.0/FilterByType3.jar; grunt> DEFINE FilterType com.mugil.pig.FilterType(); grunt> filtered_records = FILTER records BY FilterType(Type); grunt> DUMP filtered_records;

public static void BinarySearch(int searchVal) { int lowerIndex = 0; int higherIndex = arrNumbers.length; int searchIndex = 0; while(lowerIndex < higherIndex) { int middleIndex = (lowerIndex + higherIndex)/2; if(searchVal < arrNumbers[middleIndex]) { higherIndex = middleIndex + 1; } else if(searchVal > arrNumbers[middleIndex]) { lowerIndex = middleIndex - 1; } else { searchIndex = middleIndex+1; System.out.println("The element is Found at Index " + searchIndex); return; } } }

public void bubbleSort() { for (int i = arrNumbers.length-1; i>1 ; i--) { for (int j = 0; j < i; j++) { if(arrNumbers[j] > arrNumbers[j+1]) { swapValuesAtIndex(j, j+1); } /*IterationDisplay(arrNumbers, j);*/ } } }

public void selectionSort() { int minElement = 0; for (int i = 0; i< arrNumbers.length ; i++) { minElement = i; for (int j = i; j < arrNumbers.length; j++) { if(arrNumbers[minElement] > arrNumbers[j]) { minElement = j; } } swapValuesAtIndex(minElement, i); } }

public void insertionSort() { for (int i = 1; i < arrNumbers.length; i++) { int j = i; int toCompare = arrNumbers[i]; //holds no to Insert - arrNumbers[j-1] while((j>0) && (arrNumbers[j-1] > toCompare)) { arrNumbers[j] = arrNumbers[j-1]; j--; } arrNumbers[j] = toCompare; } }

public static String[] removeElements(String[] input, String deleteMe) { List result = new LinkedList(); for(String item : input) if(!deleteMe.equals(item)) result.add(item); return result.toArray(input); }

M	T	W	T	F	S	S
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30	31

M	T	W	T	F	S	S
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30	31

CodeThatAint

Monthly Archives: August 2016

How to Wrap 10 Challenging Shapes!

MapReduce Program with Default Mapper and Reducer

Hive Queries

User Defined Function Pig

Search

Sorting

Algorithms FAQ

How to Remove element from array