Index: src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java =================================================================== --- src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java (revision 0) +++ src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java (revision 0) @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hcatalog.utils; + +import java.io.IOException; +import java.util.HashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.apache.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat writing to partitioned tables. + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar org.apache.hcatalog.utils.HBaseReadWrite -libjars + * <hcat_jar> * <serveruri> <input_tablename> <output_tablename> [filter] + * If filter is given it will be provided as the partition to write to. + */ +public class WriteTextPartitioned extends Configured implements Tool { + + static String filter = null; + + public static class Map extends + Mapper { + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String name = (String)value.get(0); + int age = (Integer)value.get(1); + double gpa = (Double)value.get(2); + String ds = (String)value.get(3); + + HCatRecord record = (filter == null ? new DefaultHCatRecord(4) : new DefaultHCatRecord(3)); + record.set(0, name); + record.set(1, age); + record.set(2, gpa); + if (filter == null) record.set(3, ds); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + if (args.length > 3) filter = args[3]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteTextPartitioned"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, filter, serverUri, principalID)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteTextPartitioned.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + + java.util.Map partitionVals = null; + if (filter != null) { + String[] s = filter.split("="); + s[1].replace('"', ' '); + s[1].trim(); + partitionVals = new HashMap(1); + partitionVals.put(s[0], s[1]); + } + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, partitionVals, serverUri, principalID)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteTextPartitioned(), args); + System.exit(exitCode); + } +} Index: src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java =================================================================== --- src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java (revision 1239960) +++ src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java (working copy) @@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Job; @@ -54,7 +54,7 @@ private static final String TAB = "\t"; public static class Map - extends Mapper{ + extends Mapper{ String name; int age; @@ -63,12 +63,12 @@ @Override protected void map(WritableComparable key, HCatRecord value, org.apache.hadoop.mapreduce.Mapper.Context context) + Text,IntWritable>.Context context) throws IOException ,InterruptedException { name = (String) value.get(0); age = (Integer) value.get(1); gpa = (Double) value.get(2); - context.write(new Text(name), new DoubleWritable(gpa)); + context.write(new Text(name), new IntWritable(age)); } } @@ -95,7 +95,7 @@ job.setJarByClass(SimpleRead.class); job.setMapperClass(Map.class); job.setOutputKeyClass(Text.class); - job.setOutputValueClass(DoubleWritable.class); + job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputDir)); return (job.waitForCompletion(true) ? 0 : 1); } Index: src/test/e2e/hcatalog/tests/hcat.conf =================================================================== --- src/test/e2e/hcatalog/tests/hcat.conf (revision 1239960) +++ src/test/e2e/hcatalog/tests/hcat.conf (working copy) @@ -130,7 +130,7 @@ ,'hcat' => q\ show tables;\, ,'rc' => 0 - ,'expected_out_regex' => 'studenttab10k' + ,'expected_out_regex' => 'pig_checkin_1' }, ], }, # end g Index: src/test/e2e/hcatalog/tests/pig.conf =================================================================== --- src/test/e2e/hcatalog/tests/pig.conf (revision 1239960) +++ src/test/e2e/hcatalog/tests/pig.conf (working copy) @@ -77,7 +77,31 @@ ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;'] ,'floatpostprocess' => 1 ,'delimiter' => ' ' - }, + }, + { + 'num' => 6 + ,'hcat_prep'=>q\drop table if exists pig_checkin_6; +create table pig_checkin_6 (name string, age int, gpa double) partitioned by (ds string) STORED AS TEXTFILE;\ + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); +b = filter a by ds == '20110924'; +store b into 'pig_checkin_6' using org.apache.hcatalog.pig.HCatStorer('ds="20110924"');\, +#dump a;\, + ,'result_table' => 'pig_checkin_6', + ,'sql' => "select * from studentparttab30k where ds='20110924';", + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 7 + ,'hcat_prep'=>q\drop table if exists pig_checkin_7; +create table pig_checkin_7 (name string, age int, gpa double) partitioned by (ds string) STORED AS TEXTFILE;\ + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); +store a into 'pig_checkin_7' using org.apache.hcatalog.pig.HCatStorer();\, + ,'result_table' => 'pig_checkin_7', + ,'sql' => "select * from studentparttab30k;", + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + } ], }, # end g Index: src/test/e2e/hcatalog/tests/hadoop.conf =================================================================== --- src/test/e2e/hcatalog/tests/hadoop.conf (revision 1239960) +++ src/test/e2e/hcatalog/tests/hadoop.conf (working copy) @@ -23,7 +23,7 @@ ,'hadoop' => q\ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH: \, - ,'sql' => q\select name, gpa from studenttab10k;\ + ,'sql' => q\select name, age from studenttab10k;\ ,'floatpostprocess' => 1 ,'delimiter' => ' ' }, @@ -50,7 +50,38 @@ ,'sql' => q\select age, count(*) from studenttab10k group by age;\ ,'floatpostprocess' => 1 ,'delimiter' => ' ' - }, + },{ + # Read from a partitioned table + 'num' => 4 + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k :OUTPATH: +\, + ,'sql' => q\select name, age from studentparttab30k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + },{ + # Write a single partition to a partitioned table + 'num' => 5 + ,'hcat_prep'=>q\drop table if exists hadoop_checkin_5; +create table hadoop_checkin_5 (name string, age int, gpa double) partitioned by (ds string) STORED AS TEXTFILE;\ + ,'hadoop' => q? +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_5 ds=\"20110924\" +?, + ,'sql' => q\select * from studentparttab30k where ds='20110924';\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, { + # Write a multiple partitions to a partitioned table + 'num' => 6 + ,'hcat_prep'=>q\drop table if exists hadoop_checkin_6; +create table hadoop_checkin_6 (name string, age int, gpa double) partitioned by (ds string) STORED AS TEXTFILE;\ + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_6 +\, + ,'sql' => q\select * from studentparttab30k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + } ], }, # end g { Index: src/test/e2e/hcatalog/conf/default.conf =================================================================== --- src/test/e2e/hcatalog/conf/default.conf (revision 1239960) +++ src/test/e2e/hcatalog/conf/default.conf (working copy) @@ -72,7 +72,7 @@ , 'pigpath' => "$ENV{PIG_HOME}" , 'pigjar' => "$ENV{PIG_JAR}" # Pig jar that doesn't have Antlr , 'oldpigpath' => "$ENV{PH_OLDPIG}" - , 'additionaljars' => "$ENV{HCAT_ROOT}/build/hcatalog/hcatalog-0.3.0-dev.jar:$ENV{HCAT_ROOT}/hive/external/build/metastore/hive-metastore-$hive_version.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libthrift.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/hive-exec-$hive_version.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libfb303.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/jdo2-api-2.3-ec.jar::$ENV{HCAT_ROOT}/storage-drivers/hbase/build/ivy/lib/hbase-storage-driver/hbase-0.90.5.jar:$ENV{HCAT_ROOT}/storage-drivers/hbase/build/ivy/lib/hbase-storage-driver/zookeeper-3.4.0.jar:$ENV{HCAT_ROOT}/storage-drivers/hbase/build/hbase-storage-driver/hbase-storage-driver-0.1.0.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/hive-hbase-handler-$hive_version.jar:$ENV{'HCAT_INSTALL_DIR'}/etc/hcatalog" + , 'additionaljars' => "$ENV{HCAT_ROOT}/build/hcatalog/hcatalog-0.4.0-dev.jar:$ENV{HCAT_ROOT}/hive/external/build/metastore/hive-metastore-$hive_version.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libthrift.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/hive-exec-$hive_version.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libfb303.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/jdo2-api-2.3-ec.jar::$ENV{HCAT_ROOT}/storage-drivers/hbase/build/ivy/lib/hbase-storage-driver/hbase-0.90.5.jar:$ENV{HCAT_ROOT}/storage-drivers/hbase/build/ivy/lib/hbase-storage-driver/zookeeper-3.4.0.jar:$ENV{HCAT_ROOT}/storage-drivers/hbase/build/hbase-storage-driver/hbase-storage-driver-0.1.0.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/hive-hbase-handler-$hive_version.jar:$ENV{'HCAT_INSTALL_DIR'}/etc/hcatalog" #HADOOP , 'hadoopHome' => "$ENV{HCAT_ROOT}/lib" Index: src/test/e2e/hcatalog/build.xml =================================================================== --- src/test/e2e/hcatalog/build.xml (revision 1239960) +++ src/test/e2e/hcatalog/build.xml (working copy) @@ -19,7 +19,7 @@ - + Index: build.xml =================================================================== --- build.xml (revision 1239960) +++ build.xml (working copy) @@ -664,6 +664,7 @@ + Index: bin/hcat-config.sh =================================================================== --- bin/hcat-config.sh (revision 1239960) +++ bin/hcat-config.sh (working copy) @@ -48,8 +48,8 @@ fi # Allow alternate conf dir location. -if [ -e "${HCAT_PREFIX}/conf/hcat-env.sh" ]; then - DEFAULT_CONF_DIR=${HCAT_PREFIX}/"conf" +if [ -e "${HCAT_PREFIX}/etc/hcatalog/hcat-env.sh" ]; then + DEFAULT_CONF_DIR=${HCAT_PREFIX}/"etc/hcatalog" else DEFAULT_CONF_DIR="/etc/hcatalog" fi