diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken deleted file mode 100644 index 71b9652..0000000 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** The InputFormat to use to read data from HCat */ -public class HCatEximInputFormat extends HCatBaseInputFormat { - - /** - * Set the input to use for the Job. This queries the metadata file with - * the specified partition predicates, gets the matching partitions, puts - * the information in the conf object. The inputInfo object is updated with - * information needed in the client context - * - * @param job the job object - * @return two hcat schemas, for the table columns and the partition keys - * @throws IOException - * the exception in communicating with the metadata server - */ - public static List setInput(Job job, - String location, - Map partitionFilter) throws IOException { - FileSystem fs; - try { - fs = FileSystem.get(new URI(location), job.getConfiguration()); - } catch (URISyntaxException e) { - throw new IOException(e); - } - Path fromPath = new Path(location); - Path metadataPath = new Path(fromPath, "_metadata"); - try { - Map.Entry> tp = EximUtil - .readMetaData(fs, metadataPath); - org.apache.hadoop.hive.metastore.api.Table table = tp.getKey(); - InputJobInfo inputInfo = InputJobInfo.create(table.getDbName(), table.getTableName(),null,null,null); - List partCols = table.getPartitionKeys(); - List partInfoList = null; - if (partCols.size() > 0) { - List partColNames = new ArrayList(partCols.size()); - for (FieldSchema fsc : partCols) { - partColNames.add(fsc.getName()); - } - List partitions = tp.getValue(); - partInfoList = filterPartitions(partitionFilter, partitions, table.getPartitionKeys()); - } else { - partInfoList = new ArrayList(1); - HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getSd().getCols())); - Map parameters = table.getParameters(); - String inputStorageDriverClass = null; - if (parameters.containsKey(HCatConstants.HCAT_ISD_CLASS)){ - inputStorageDriverClass = parameters.get(HCatConstants.HCAT_ISD_CLASS); - }else{ - throw new IOException("No input storage driver classname found, cannot read partition"); - } - Properties hcatProperties = new Properties(); - for (String key : parameters.keySet()){ - if (key.startsWith(InitializeInput.HCAT_KEY_PREFIX)){ - hcatProperties.put(key, parameters.get(key)); - } - } - PartInfo partInfo = new PartInfo(schema, inputStorageDriverClass, location + "/data", hcatProperties); - partInfoList.add(partInfo); - } - inputInfo.setPartitions(partInfoList); - inputInfo.setTableInfo(HCatTableInfo.valueOf(table)); - job.getConfiguration().set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputInfo)); - List rv = new ArrayList(2); - rv.add(HCatSchemaUtils.getHCatSchema(table.getSd().getCols())); - rv.add(HCatSchemaUtils.getHCatSchema(partCols)); - return rv; - } catch(SemanticException e) { - throw new IOException(e); - } - } - - private static List filterPartitions(Map partitionFilter, - List partitions, List partCols) throws IOException { - List partInfos = new LinkedList(); - for (Partition partition : partitions) { - boolean matches = true; - List partVals = partition.getValues(); - assert partCols.size() == partVals.size(); - Map partSpec = EximUtil.makePartSpec(partCols, partVals); - if (partitionFilter != null) { - for (Map.Entry constraint : partitionFilter.entrySet()) { - String partVal = partSpec.get(constraint.getKey()); - if ((partVal == null) || !partVal.equals(constraint.getValue())) { - matches = false; - break; - } - } - } - if (matches) { - PartInfo partInfo = InitializeInput.extractPartInfo(partition.getSd(), - partition.getParameters()); - partInfo.setPartitionValues(partSpec); - partInfos.add(partInfo); - } - } - return partInfos; - } -} diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken deleted file mode 100644 index 0ab8c22..0000000 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken +++ /dev/null @@ -1,166 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; - -public class HCatEximOutputCommitter extends OutputCommitter { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputCommitter.class); - - private final OutputCommitter baseCommitter; - - public HCatEximOutputCommitter(JobContext context, OutputCommitter baseCommitter) { - this.baseCommitter = baseCommitter; - } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - baseCommitter.abortTask(context); - } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - baseCommitter.commitTask(context); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return baseCommitter.needsTaskCommit(context); - } - - @Override - public void setupJob(JobContext context) throws IOException { - if( baseCommitter != null ) { - baseCommitter.setupJob(context); - } - } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - baseCommitter.setupTask(context); - } - - @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { - if(baseCommitter != null) { - baseCommitter.abortJob(jobContext, state); - } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - - Path src = new Path(jobInfo.getLocation()); - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - fs.delete(src, true); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - if(baseCommitter != null) { - baseCommitter.commitJob(jobContext); - } - } - - @Override - public void cleanupJob(JobContext jobContext) throws IOException { - LOG.info("HCatEximOutputCommitter.cleanup invoked; m.o.d : " + - jobContext.getConfiguration().get("mapred.output.dir")); - if (baseCommitter != null) { - LOG.info("baseCommitter.class = " + baseCommitter.getClass().getName()); - baseCommitter.cleanupJob(jobContext); - } - - OutputJobInfo jobInfo = HCatBaseOutputFormat.getJobInfo(jobContext); - Configuration conf = jobContext.getConfiguration(); - FileSystem fs; - try { - fs = FileSystem.get(new URI(jobInfo.getTableInfo().getTable().getSd().getLocation()), conf); - } catch (URISyntaxException e) { - throw new IOException(e); - } - doCleanup(jobInfo, fs); - } - - private static void doCleanup(OutputJobInfo jobInfo, FileSystem fs) throws IOException, - HCatException { - try { - Table ttable = jobInfo.getTableInfo().getTable(); - org.apache.hadoop.hive.ql.metadata.Table table = new org.apache.hadoop.hive.ql.metadata.Table( - ttable); - StorageDescriptor tblSD = ttable.getSd(); - Path tblPath = new Path(tblSD.getLocation()); - Path path = new Path(tblPath, "_metadata"); - List tpartitions = null; - try { - Map.Entry> rv = EximUtil - .readMetaData(fs, path); - tpartitions = rv.getValue(); - } catch (IOException e) { - } - List partitions = - new ArrayList(); - if (tpartitions != null) { - for (Partition tpartition : tpartitions) { - partitions.add(new org.apache.hadoop.hive.ql.metadata.Partition(table, tpartition)); - } - } - if (!table.getPartitionKeys().isEmpty()) { - Map partitionValues = jobInfo.getPartitionValues(); - org.apache.hadoop.hive.ql.metadata.Partition partition = - new org.apache.hadoop.hive.ql.metadata.Partition(table, - partitionValues, - new Path(tblPath, Warehouse.makePartPath(partitionValues))); - partition.getTPartition().setParameters(table.getParameters()); - partitions.add(partition); - } - EximUtil.createExportDump(fs, path, (table), partitions); - } catch (SemanticException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (HiveException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } -} diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken deleted file mode 100644 index 6181284..0000000 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken +++ /dev/null @@ -1,176 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; - -/** - * The OutputFormat to use to write data to HCat without a hcat server. This can then - * be imported into a hcat instance, or used with a HCatEximInputFormat. As in - * HCatOutputFormat, the key value is ignored and - * and should be given as null. The value is the HCatRecord to write. - */ -public class HCatEximOutputFormat extends HCatBaseOutputFormat { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputFormat.class); - - /** - * Get the record writer for the job. Uses the Table's default OutputStorageDriver - * to get the record writer. - * - * @param context - * the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); - } - - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - return new HCatEximOutputCommitter(context,((OutputCommitterContainer)getOutputFormat(context).getOutputCommitter(context)).getBaseOutputCommitter()); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - ((OutputFormatContainer)getOutputFormat(context)).getBaseOutputFormat().checkOutputSpecs(context); - } - - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, List partitionValues, HCatSchema columnSchema) throws HCatException { - setOutput(job, dbname, tablename, location, partitionSchema, partitionValues, columnSchema, - RCFileInputDriver.class.getName(), - RCFileOutputDriver.class.getName(), - RCFileInputFormat.class.getName(), - RCFileOutputFormat.class.getName(), - ColumnarSerDe.class.getName()); - } - - @SuppressWarnings("unchecked") - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, - List partitionValues, - HCatSchema columnSchema, - String isdname, String osdname, - String ifname, String ofname, - String serializationLib) throws HCatException { - Map partSpec = new TreeMap(); - List partKeys = null; - if (partitionSchema != null) { - partKeys = partitionSchema.getFields(); - if (partKeys.size() != partitionValues.size()) { - throw new IllegalArgumentException("Partition key size differs from partition value size"); - } - for (int i = 0; i < partKeys.size(); ++i) { - HCatFieldSchema partKey = partKeys.get(i); - if (partKey.getType() != HCatFieldSchema.Type.STRING) { - throw new IllegalArgumentException("Partition key type string is only supported"); - } - partSpec.put(partKey.getName(), partitionValues.get(i)); - } - } - StorerInfo storerInfo = new StorerInfo(isdname, osdname, new Properties()); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbname,tablename,partSpec,null,null); - org.apache.hadoop.hive.ql.metadata.Table tbl = new - org.apache.hadoop.hive.ql.metadata.Table(dbname, tablename); - Table table = tbl.getTTable(); - table.getParameters().put(HCatConstants.HCAT_ISD_CLASS, isdname); - table.getParameters().put(HCatConstants.HCAT_OSD_CLASS, osdname); - try { - String partname = null; - if ((partKeys != null) && !partKeys.isEmpty()) { - List partSchema = HCatSchemaUtils.getFieldSchemas(partKeys); - table.setPartitionKeys(partSchema); - partname = Warehouse.makePartName(partSchema, partitionValues); - } else { - partname = "data"; - } - StorageDescriptor sd = table.getSd(); - sd.setLocation(location); - String dataLocation = location + "/" + partname; - outputJobInfo.setTableInfo(new HCatTableInfo(dbname,tablename,columnSchema,null,storerInfo,table)); - outputJobInfo.setOutputSchema(columnSchema); - outputJobInfo.setLocation(dataLocation); - setPartDetails(outputJobInfo, columnSchema, partSpec); - sd.setCols(HCatUtil.getFieldSchemaList(outputJobInfo.getOutputSchema().getFields())); - sd.setInputFormat(ifname); - sd.setOutputFormat(ofname); - SerDeInfo serdeInfo = sd.getSerdeInfo(); - serdeInfo.setSerializationLib(serializationLib); - Configuration conf = job.getConfiguration(); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken deleted file mode 100644 index 506a40e..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import java.io.IOException; -import java.net.URI; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -public class TestEximSemanticAnalysis extends TestCase { - - private final MiniCluster cluster = MiniCluster.buildCluster(); - private HiveConf hcatConf; - private HCatDriver hcatDriver; - private Warehouse wh; - private static final Logger LOG = LoggerFactory.getLogger(TestEximSemanticAnalysis.class); - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - URI fsuri = cluster.getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), "/user/hive/warehouse"); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - wh = new Warehouse(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - - hcatDriver = new HCatDriver(); - } - - @Override - protected void tearDown() throws Exception { - } - - public void testExportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwx-wx")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue("Permission denied expected : "+response.getErrorMessage(), - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxr-x")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue( - "Permission denied expected: "+response.getErrorMessage() , - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxrwx")); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportSetPermsGroup() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("drop table junit_sem_analysis_imported"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - - hcatConf.set(HCatConstants.HCAT_PERMS, "-rwxrw-r--"); - hcatConf.set(HCatConstants.HCAT_GROUP, "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis_imported from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis_imported"); - assertEquals(FsPermission.valueOf("-rwxrw-r--"), cluster.getFileSystem().getFileStatus(whPath).getPermission()); - assertEquals("nosuchgroup", cluster.getFileSystem().getFileStatus(whPath).getGroup()); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - response = hcatDriver.run("drop table junit_sem_analysis_imported"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - -} - diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken deleted file mode 100644 index 7612337..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import static org.junit.Assert.assertEquals; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.thrift.TException; - -import junit.framework.TestCase; - -public class TestStorageHandlerProperties extends TestCase { - - private Driver hcatDriver; - private Driver hiveDriver; - private HiveMetaStoreClient msc; - - protected void setUp() throws Exception { - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - HiveConf hiveConf = new HiveConf(hcatConf,this.getClass()); - hiveDriver = new Driver(hiveConf); - - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } - - public void testTableProperties() throws CommandNeedRetryException, MetaException ,TException, NoSuchObjectException{ - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.cli.DummyStorageHandler' "); - - assertEquals(0, response.getResponseCode()); - Table tbl = msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "test_table"); - DummyStorageHandler dsh = new DummyStorageHandler(); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_ISD_CLASS)); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_OSD_CLASS)); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_ISD_CLASS), dsh.getInputStorageDriver().getName()); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_OSD_CLASS), dsh.getOutputStorageDriver().getName()); - } - - /* @throws java.lang.Exception - * @see junit.framework.TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - } - -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken deleted file mode 100644 index 082d723..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken +++ /dev/null @@ -1,193 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapred; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.mapred.HCatMapredInputFormat; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.storagehandler.HCatStorageHandlerImpl; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; - -public class TestHiveHCatInputFormat extends TestCase { - private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - - String PTNED_TABLE = "junit_testhiveinputintegration_ptni"; - String UNPTNED_TABLE = "junit_testhiveinputintegration_noptn"; - String basicFile = "/tmp/"+PTNED_TABLE+".file"; - - public void testFromHive() throws Exception { - if (driver == null){ - driver = HCatDataCheckUtil.instantiateDriver(cluster); - } - - Properties props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - String basicFileFullName = cluster.getProperties().getProperty("fs.default.name") + basicFile; - - cleanup(); - - // create source data file - HCatDataCheckUtil.generateDataFile(cluster,basicFile); - - String createPtnedTable = "(j int, s string) partitioned by (i int) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,PTNED_TABLE,createPtnedTable); - - String createUnptnedTable = "(i int, j int, s string) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,UNPTNED_TABLE,createUnptnedTable); - - - driver.run("describe extended "+UNPTNED_TABLE); - ArrayList des_values = new ArrayList(); - driver.getResults(des_values); - for (String s : des_values){ - System.err.println("du:"+s); - } - - driver.run("describe extended "+PTNED_TABLE); - ArrayList des2_values = new ArrayList(); - driver.getResults(des2_values); - for (String s : des2_values){ - System.err.println("dp:"+s); - } - - // use pig to read from source file and put into this table - - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+UNPTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+PTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - // partitioned by i - // select * from tbl; - // select j,s,i from tbl; - // select * from tbl where i = 3; - // select j,s,i from tbl where i = 3; - // select * from tbl where j = 3; - // select j,s,i from tbl where j = 3; - - ArrayList p_select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_nofilter","select * from "+PTNED_TABLE); - ArrayList p_select_named_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_nofilter","select j,s,i from "+PTNED_TABLE); - - assertDataIdentical(p_select_star_nofilter,p_select_named_nofilter,50); - - ArrayList p_select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_ptnfilter","select * from "+PTNED_TABLE+" where i = 3"); - ArrayList p_select_named_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_ptnfilter","select j,s,i from "+PTNED_TABLE+" where i = 3"); - - assertDataIdentical(p_select_star_ptnfilter,p_select_named_ptnfilter,10); - - ArrayList select_star_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nonptnfilter","select * from "+PTNED_TABLE+" where j = 28"); - ArrayList select_named_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_named_nonptnfilter","select j,s,i from "+PTNED_TABLE+" where j = 28"); - - assertDataIdentical(select_star_nonptnfilter,select_named_nonptnfilter,1); - - // non-partitioned - // select * from tbl; - // select i,j,s from tbl; - // select * from tbl where i = 3; - // select i,j,s from tbl where i = 3; - - // select j,s,i from tbl; - // select j,s,i from tbl where i = 3; - - ArrayList select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nofilter","select * from "+UNPTNED_TABLE); //i,j,s select * order is diff for unptn - ArrayList select_ijs_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_nofilter","select i,j,s from "+UNPTNED_TABLE); - - assertDataIdentical(select_star_nofilter,select_ijs_nofilter,50); - - ArrayList select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_ptnfilter","select * from "+UNPTNED_TABLE+" where i = 3"); //i,j,s - ArrayList select_ijs_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_ptnfilter","select i,j,s from "+UNPTNED_TABLE+" where i = 3"); - - assertDataIdentical(select_star_ptnfilter,select_ijs_ptnfilter,10); - - ArrayList select_jsi_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_nofilter","select j,s,i from "+UNPTNED_TABLE); - assertDataIdentical(p_select_named_nofilter,select_jsi_nofilter,50,true); - - ArrayList select_jsi_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_ptnfilter","select j,s,i from "+UNPTNED_TABLE+" where i = 3"); - assertDataIdentical(p_select_named_ptnfilter,select_jsi_ptnfilter,10,true); - - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords) { - assertDataIdentical(result1,result2,numRecords,false); - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords,boolean doSort) { - assertEquals(numRecords, result1.size()); - assertEquals(numRecords, result2.size()); - Collections.sort(result1); - Collections.sort(result2); - for (int i = 0; i < numRecords; i++){ - assertEquals(result1.get(i),result2.get(i)); - } - } - - - private void cleanup() throws IOException, CommandNeedRetryException { - MiniCluster.deleteFile(cluster, basicFile); - HCatDataCheckUtil.dropTable(driver,PTNED_TABLE); - HCatDataCheckUtil.dropTable(driver,UNPTNED_TABLE); - } - -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken deleted file mode 100644 index 8b3e089..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken +++ /dev/null @@ -1,429 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.mapreduce.TestHCatEximInputFormat.TestImport.EmpDetails; - -/** - * - * TestHCatEximInputFormat. tests primarily HCatEximInputFormat but - * also HCatEximOutputFormat. - * - */ -public class TestHCatEximInputFormat extends TestCase { - - public static class TestExport extends - org.apache.hadoop.mapreduce.Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - public static class TestImport extends - org.apache.hadoop.mapreduce.Mapper< - org.apache.hadoop.io.LongWritable, HCatRecord, - org.apache.hadoop.io.Text, - org.apache.hadoop.io.Text> { - - private HCatSchema recordSchema; - - public static class EmpDetails { - public String emp_name; - public String emp_dob; - public String emp_sex; - public String emp_country; - public String emp_state; - } - - public static Map empRecords = new TreeMap(); - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - try { - recordSchema = HCatBaseInputFormat.getOutputSchema(context); - } catch (Exception e) { - throw new IOException("Error getting outputschema from job configuration", e); - } - System.out.println("RecordSchema : " + recordSchema.toString()); - } - - @Override - public void map(LongWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - EmpDetails empDetails = new EmpDetails(); - Integer emp_id = value.getInteger("emp_id", recordSchema); - String emp_name = value.getString("emp_name", recordSchema); - empDetails.emp_name = emp_name; - if (recordSchema.getPosition("emp_dob") != null) { - empDetails.emp_dob = value.getString("emp_dob", recordSchema); - } - if (recordSchema.getPosition("emp_sex") != null) { - empDetails.emp_sex = value.getString("emp_sex", recordSchema); - } - if (recordSchema.getPosition("emp_country") != null) { - empDetails.emp_country = value.getString("emp_country", recordSchema); - } - if (recordSchema.getPosition("emp_state") != null) { - empDetails.emp_state = value.getString("emp_state", recordSchema); - } - empRecords.put(emp_id, empDetails); - } - } - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path inputLocation; - Path outputLocation; - private HCatSchema partSchema; - - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximinputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - inputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - - job.setJarByClass(this.getClass()); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - private void setupMRExport(String[] records) throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - FSDataOutputStream ds = fs.create(outputLocation, true); - for (String record : records) { - ds.writeBytes(record); - } - ds.close(); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, outputLocation); - job.setMapperClass(TestExport.class); - } - - private void setupMRImport() throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - job.setInputFormatClass(HCatEximInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputLocation); - job.setMapperClass(TestImport.class); - TestImport.empRecords.clear(); - } - - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - // fs.delete(inputLocation, true); - // fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } - - - private void runNonPartExport() throws IOException, InterruptedException, ClassNotFoundException { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - setupMRExport(new String[] { - "237,Krishna,01/01/1990,M,IN,TN\n", - "238,Kalpana,01/01/2000,F,IN,KA\n", - "239,Satya,01/01/2001,M,US,TN\n", - "240,Kavya,01/01/2002,F,US,KA\n" - - }); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - private void runPartExport(String record, String country, String state) throws IOException, InterruptedException, ClassNotFoundException { - setupMRExport(new String[] {record}); - List partValues = new ArrayList(2); - partValues.add(country); - partValues.add(state); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - partSchema , - partValues , - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - public void testNonPart() throws Exception { - try { - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testNonPartProjection() throws Exception { - try { - - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List readColumns = new ArrayList(); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - - HCatEximInputFormat.setOutputSchema(job, new HCatSchema(readColumns)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", null, null, null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPart() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPartWithPartCols() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(columns); - colsPlusPartKeys.addAll(partKeys); - - HCatBaseInputFormat.setOutputSchema(job, new HCatSchema(colsPlusPartKeys)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - public void testPartSelection() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - Map filter = new TreeMap(); - filter.put("emp_state", "ka"); - HCatEximInputFormat.setInput(job, "tmp/exports", filter); - job.waitForCompletion(true); - - assertEquals(2, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - private void assertEmpDetail(EmpDetails empDetails, String name, String dob, String mf, String country, String state) { - assertNotNull(empDetails); - assertEquals(name, empDetails.emp_name); - assertEquals(dob, empDetails.emp_dob); - assertEquals(mf, empDetails.emp_sex); - assertEquals(country, empDetails.emp_country); - assertEquals(state, empDetails.emp_state); - } - -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken deleted file mode 100644 index bf4fb48..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** - * - * TestHCatEximOutputFormat. Some basic testing here. More testing done via - * TestHCatEximInputFormat - * - */ -public class TestHCatEximOutputFormat extends TestCase { - - public static class TestMap extends - Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - System.out.println("TestMap/setup called"); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - System.out.println("TestMap/map called. Cols[0]:" + cols[0]); - System.out.println("TestMap/map called. Cols[1]:" + cols[1]); - System.out.println("TestMap/map called. Cols[2]:" + cols[2]); - System.out.println("TestMap/map called. Cols[3]:" + cols[3]); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path outputLocation; - Path dataLocation; - - public void testNonPart() throws Exception { - try { - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(0, table.getPartitionKeys().size()); - - assertEquals(0, partitions.size()); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - - } - - public void testPart() throws Exception { - try { - List partKeys = new ArrayList(); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country", - Constants.STRING_TYPE_NAME, ""))); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state", - Constants.STRING_TYPE_NAME, ""))); - HCatSchema partitionSchema = new HCatSchema(partKeys); - - List partitionVals = new ArrayList(); - partitionVals.add("IN"); - partitionVals.add("TN"); - - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - partitionSchema, - partitionVals, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_country", partSchema.get(0).getName()); - assertEquals("emp_state", partSchema.get(1).getName()); - - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals("IN", partition.getValues().get(0)); - assertEquals("TN", partition.getValues().get(1)); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximoutputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - if (fs.exists(dataLocation)) { - fs.delete(dataLocation, true); - } - FSDataOutputStream ds = fs.create(dataLocation, true); - ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n"); - ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n"); - ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n"); - ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n"); - ds.close(); - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, dataLocation); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestMap.class); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - fs.delete(dataLocation, true); - fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken deleted file mode 100644 index 368b3ff..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken +++ /dev/null @@ -1,294 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import junit.framework.Assert; -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hadoop.hive.shims.ShimLoader; - - -public class TestRCFileInputStorageDriver extends TestCase{ - private static final Configuration conf = new Configuration(); - private static final Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - private static final Path file = new Path(dir, "test_rcfile"); - private final HCatHadoopShims shim = ShimLoader.getHadoopShims().getHCatShim(); - - // Generate sample records to compare against - private byte[][][] getRecords() throws UnsupportedEncodingException { - byte[][] record_1 = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), - "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - return new byte[][][]{record_1, record_2}; - } - - // Write sample records to file for individual tests - private BytesRefArrayWritable[] initTestEnvironment() throws IOException { - FileSystem fs = FileSystem.getLocal(conf); - fs.delete(file, true); - - byte [][][] records = getRecords(); - RCFileOutputFormat.setColumnNumber(conf, 8); - RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); - - BytesRefArrayWritable bytes = writeBytesToFile(records[0], writer); - BytesRefArrayWritable bytes2 = writeBytesToFile(records[1], writer); - - writer.close(); - return new BytesRefArrayWritable[]{bytes,bytes2}; - } - - private BytesRefArrayWritable writeBytesToFile(byte[][] record, RCFile.Writer writer) throws IOException { - BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); - for (int i = 0; i < record.length; i++) { - BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); - bytes.set(i, cu); - } - writer.append(bytes); - return bytes; - } - - public void testConvertValueToTuple() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - HCatSchema schema = buildHiveSchema(); - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, schema); - sd.setOutputSchema(jc, schema); - sd.initialize(jc, getProps()); - - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getExpectedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertEquals(bytesArr[j], w); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(8, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - } - - public void testPruning() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildPrunedSchema()); - - sd.initialize(jc, getProps()); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getPrunedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(5, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - - public void testReorderdCols() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildReorderedSchema()); - - sd.initialize(jc, getProps()); - Map map = new HashMap(1); - map.put("part1", "first-part"); - sd.setPartitionValues(jc, map); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getReorderedCols(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(7, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - private HCatRecord[] getExpectedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Short("456"), - new Integer(789), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Short("200"), - new Integer(123), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatRecord[] getPrunedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Integer(789), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Integer(123), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("asmallint", "smallint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("along", "bigint", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""), - new FieldSchema("anullstring", "string", ""))); - } - - private HCatSchema buildPrunedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatSchema buildReorderedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("aint", "int", ""), - new FieldSchema("part1", "string", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("newCol", "tinyint", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatRecord[] getReorderedCols(){ - List rec_1 = new ArrayList(7); - Collections.addAll(rec_1, new Integer(789), - new String("first-part"), - new Double(5.3D), - null, // new column - new String("hcatalog and hadoop"), - new Byte("123"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(7); - Collections.addAll(rec_2, new Integer(123), - new String("first-part"), - new Double(5.3D), - null, - new String("hcatalog and hadoop"), - new Byte("100"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - - } - private Properties getProps(){ - Properties props = new Properties(); - props.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(Constants.SERIALIZATION_FORMAT, "9"); - return props; - } -} diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken deleted file mode 100644 index 1042501..0000000 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; -import org.apache.hcatalog.mapreduce.HCatOutputStorageDriver; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hadoop.hive.shims.ShimLoader; - -public class TestRCFileOutputStorageDriver extends TestCase { - - public void testConversion() throws IOException { - Configuration conf = new Configuration(); - JobContext jc = ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, new JobID()); - String jobString = HCatUtil.serialize(OutputJobInfo.create(null,null,null)); - jc.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO,jobString); - - HCatSchema schema = buildHiveSchema(); - HCatInputStorageDriver isd = new RCFileInputDriver(); - - isd.setOriginalSchema(jc, schema); - isd.setOutputSchema(jc, schema); - isd.initialize(jc, new Properties()); - - byte[][] byteArray = buildBytesArray(); - - BytesRefArrayWritable bytesWritable = new BytesRefArrayWritable(byteArray.length); - for (int i = 0; i < byteArray.length; i++) { - BytesRefWritable cu = new BytesRefWritable(byteArray[i], 0, byteArray[i].length); - bytesWritable.set(i, cu); - } - - //Convert byte array to HCatRecord using isd, convert hcatrecord back to byte array - //using osd, compare the two arrays - HCatRecord record = isd.convertToHCatRecord(null, bytesWritable); - - HCatOutputStorageDriver osd = new RCFileOutputDriver(); - - osd.setSchema(jc, schema); - osd.initialize(jc, new Properties()); - - BytesRefArrayWritable bytesWritableOutput = (BytesRefArrayWritable) osd.convertValue(record); - - assertTrue(bytesWritableOutput.compareTo(bytesWritable) == 0); - } - - private byte[][] buildBytesArray() throws UnsupportedEncodingException { - byte[][] bytes = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcat and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8") }; - return bytes; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - - List fields = new ArrayList(8); - fields.add(new FieldSchema("atinyint", "tinyint", "")); - fields.add(new FieldSchema("asmallint", "smallint", "")); - fields.add(new FieldSchema("aint", "int", "")); - fields.add(new FieldSchema("along", "bigint", "")); - fields.add(new FieldSchema("adouble", "double", "")); - fields.add(new FieldSchema("astring", "string", "")); - fields.add(new FieldSchema("anullint", "int", "")); - fields.add(new FieldSchema("anullstring", "string", "")); - - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(fields)); - } -} diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken deleted file mode 100644 index 33824b8..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatBaseInputFormat; -import org.apache.hcatalog.mapreduce.HCatEximInputFormat; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.Job; -import org.apache.pig.Expression; -import org.apache.pig.LoadFunc; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.util.UDFContext; - -/** - * Pig {@link LoadFunc} to read data/metadata from hcatalog exported location - */ - -public class HCatEximLoader extends HCatBaseLoader { - - private static final Log LOG = LogFactory.getLog(HCatEximLoader.class); - - private HCatSchema tableSchema; - private HCatSchema partitionSchema; - private HCatEximInputFormat inputFormat; - - public HCatEximLoader() { - LOG.debug("HCatEximLoader ctored"); - } - - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - LOG.debug("getSchema with location :" + location); - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - LOG.debug("getSchema got schema :" + tableSchema.toString()); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - return PigHCatUtil.getResourceSchema(outputSchema); - } - - @Override - public String[] getPartitionKeys(String location, Job job) throws IOException { - LOG.warn("getPartitionKeys with location :" + location); - /* - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - return partitionSchema.getFieldNames().toArray(new String[0]); - */ - return null; - } - - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - LOG.debug("setPartitionFilter with filter :" + partitionFilter.toString()); - } - - @Override - public void setLocation(String location, Job job) throws IOException { - LOG.debug("setLocation with location :" + location); - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties(this.getClass(), - new String[] {signature}); - RequiredFieldList requiredFieldsInfo = - (RequiredFieldList) props.get(PRUNE_PROJECTION_INFO); - if (requiredFieldsInfo != null) { - ArrayList fcols = new ArrayList(); - for (RequiredField rf : requiredFieldsInfo.getFields()) { - fcols.add(tableSchema.getFields().get(rf.getIndex())); - } - outputSchema = new HCatSchema(fcols); - try { - HCatBaseInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } - - - @Override - public InputFormat getInputFormat() throws IOException { - if (inputFormat == null) { - inputFormat = new HCatEximInputFormat(); - } - return inputFormat; - } - -} diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken deleted file mode 100644 index 3e21cc7..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatEximOutputCommitter; -import org.apache.hcatalog.mapreduce.HCatEximOutputFormat; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.ObjectSerializer; -import org.apache.pig.impl.util.UDFContext; - -/** - * HCatEximStorer. - * - */ - -public class HCatEximStorer extends HCatBaseStorer { - - private static final Log LOG = LogFactory.getLog(HCatEximStorer.class); - - private final String outputLocation; - - public HCatEximStorer(String outputLocation) throws Exception { - this(outputLocation, null, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec) throws Exception { - this(outputLocation, partitionSpec, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec, String schema) - throws Exception { - super(partitionSpec, schema); - this.outputLocation = outputLocation; - LOG.debug("HCatEximStorer called"); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - LOG.debug("getOutputFormat called"); - return new HCatEximOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - LOG.debug("setStoreLocation called with :" + location); - String[] userStr = location.split("\\."); - String dbname = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tablename = null; - if (userStr.length == 2) { - dbname = userStr[0]; - tablename = userStr[1]; - } else { - tablename = userStr[0]; - } - Properties p = UDFContext.getUDFContext() - .getUDFProperties(this.getClass(), new String[] {sign}); - Configuration config = job.getConfiguration(); - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - Schema schema = (Schema) ObjectSerializer.deserialize(p.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException("Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - HCatSchema hcatTblSchema = new HCatSchema(new ArrayList()); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - - List hcatFields = new ArrayList(); - List partVals = new ArrayList(); - for (String key : partitionKeys) { - hcatFields.add(new HCatFieldSchema(key, HCatFieldSchema.Type.STRING, "")); - partVals.add(partitions.get(key)); - } - - HCatSchema outputSchema = convertPigSchemaToHCatSchema(pigSchema, - hcatTblSchema); - LOG.debug("Pig Schema '" + pigSchema.toString() + "' was converted to HCatSchema '" - + outputSchema); - HCatEximOutputFormat.setOutput(job, - dbname, tablename, - outputLocation, - new HCatSchema(hcatFields), - partVals, - outputSchema); - p.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(outputSchema)); - p.setProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO, - config.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (config.get(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - p.setProperty(HCatConstants.HCAT_KEY_HIVE_CONF, - config.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } else { - config.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - p.getProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - config.set(HCatConstants.HCAT_KEY_HIVE_CONF, - p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } - } - - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - if( job.getConfiguration().get("mapred.job.tracker", "").equalsIgnoreCase("local") ) { - //In local mode, mapreduce will not call OutputCommitter.cleanupJob. - //Calling it from here so that the partition publish happens. - //This call needs to be removed after MAPREDUCE-1447 is fixed. - new HCatEximOutputCommitter(job,null).cleanupJob(job); - } - } -} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken deleted file mode 100644 index 238edb2..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken +++ /dev/null @@ -1,352 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hcatalog.MiniCluster; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.util.UDFContext; - -/** - * - * TestHCatEximLoader. Assumes Exim storer is working well - * - */ -public class TestHCatEximLoader extends TestCase { - - private static final String NONPART_TABLE = "junit_unparted"; - private static final String PARTITIONED_TABLE = "junit_parted"; - private static MiniCluster cluster = MiniCluster.buildCluster(); - - private static final String dataLocation = "/tmp/data"; - private static String fqdataLocation; - private static final String exportLocation = "/tmp/export"; - private static String fqexportLocation; - - private static Properties props; - - private void cleanup() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - MiniCluster.deleteFile(cluster, exportLocation); - } - - @Override - protected void setUp() throws Exception { - props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() - + ", fs.default.name : " + props.getProperty("fs.default.name")); - fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; - fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; - System.out.println("FQ Data Location :" + fqdataLocation); - System.out.println("FQ Export Location :" + fqexportLocation); - cleanup(); - } - - @Override - protected void tearDown() throws Exception { - cleanup(); - } - - private void populateDataFile() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - String[] input = new String[] { - "237,Krishna,01/01/1990,M,IN,TN", - "238,Kalpana,01/01/2000,F,IN,KA", - "239,Satya,01/01/2001,M,US,TN", - "240,Kavya,01/01/2002,F,US,KA" - }; - MiniCluster.createInputFile(cluster, dataLocation, input); - } - - private static class EmpDetail { - String name; - String dob; - String mf; - String country; - String state; - } - - private void assertEmpDetail(Tuple t, Map eds) throws ExecException { - assertNotNull(t); - assertEquals(6, t.size()); - - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - assertTrue(t.get(4).getClass() == String.class); - assertTrue(t.get(5).getClass() == String.class); - - EmpDetail ed = eds.remove(t.get(0)); - assertNotNull(ed); - - assertEquals(ed.name, t.get(1)); - assertEquals(ed.dob, t.get(2)); - assertEquals(ed.mf, t.get(3)); - assertEquals(ed.country, t.get(4)); - assertEquals(ed.state, t.get(5)); - } - - private void addEmpDetail(Map empDetails, int id, String name, - String dob, String mf, String country, String state) { - EmpDetail ed = new EmpDetail(); - ed.name = name; - ed.dob = dob; - ed.mf = mf; - ed.country = country; - ed.state = state; - empDetails.put(id, ed); - } - - - - private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf) - throws ExecException { - assertNotNull(t); - assertEquals(4, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - - assertEquals(id, t.get(0)); - assertEquals(name, t.get(1)); - assertEquals(dob, t.get(2)); - assertEquals(mf, t.get(3)); - } - - private void assertEmpDetail(Tuple t, String mf, String name) - throws ExecException { - assertNotNull(t); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == String.class); - assertTrue(t.get(1).getClass() == String.class); - - assertEquals(mf, t.get(0)); - assertEquals(name, t.get(1)); - } - - - - public void testLoadNonPartTable() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - Iterator XIter = server.openIterator("A"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 239, "Satya", "01/01/2001", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F"); - assertFalse(XIter.hasNext()); - } - } - - public void testLoadNonPartProjection() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - server.registerQuery("B = foreach A generate emp_sex, emp_name;"); - - Iterator XIter = server.openIterator("B"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, "M", "Krishna"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kalpana"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "M", "Satya"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kavya"); - assertFalse(XIter.hasNext()); - } - } - - - public void testLoadMultiPartTable() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); - + ";"); - - Iterator XIter = server.openIterator("A"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn"); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - public void testLoadMultiPartFilter() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - + ";"); - server.registerQuery("B = filter A by emp_state == 'ka';"); - - Iterator XIter = server.openIterator("B"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - -} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken deleted file mode 100644 index fce1e70..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - --->There are two pieces of code that sets directory permissions. --->One that sets the UMask which only woks for dfs filesystem. --->And the other change the permission of directories after they are created. --->I removed that since it is not secure and just add more load on the namenode. --->We should push this test to e2e to verify what actually runs in production. - -package org.apache.hcatalog.pig; - -import java.io.IOException; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hcatalog.ExitException; -import org.apache.hcatalog.NoExitSecurityManager; -import org.apache.hcatalog.cli.HCatCli; -import org.apache.hcatalog.pig.HCatStorer; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPermsInheritance extends TestCase { - - @Override - protected void setUp() throws Exception { - super.setUp(); - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - msc = new HiveMetaStoreClient(conf); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testNoPartTbl", true,true); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testPartTbl", true,true); - pig = new PigServer(ExecType.LOCAL, conf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - } - - private HiveMetaStoreClient msc; - private SecurityManager securityManager; - private PigServer pig; - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - System.setSecurityManager(securityManager); - } - - private final HiveConf conf = new HiveConf(this.getClass()); - - public void testNoPartTbl() throws IOException, MetaException, UnknownTableException, TException, NoSuchObjectException, HiveException{ - - try{ - HCatCli.main(new String[]{"-e","create table testNoPartTbl (line string) stored as RCFILE", "-p","rwx-wx---"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - Warehouse wh = new Warehouse(conf); - Path dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testNoPartTbl"); - FileSystem fs = dfsPath.getFileSystem(conf); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx---")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testNoPartTbl' using "+HCatStorer.class.getName()+"();"); - pig.executeBatch(); - FileStatus[] status = fs.listStatus(dfsPath,hiddenFileFilter); - - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx---"),status[0].getPermission()); - - try{ - HCatCli.main(new String[]{"-e","create table testPartTbl (line string) partitioned by (a string) stored as RCFILE", "-p","rwx-wx--x"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - - dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testPartTbl"); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx--x")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testPartTbl' using "+HCatStorer.class.getName()+"('a=part');"); - pig.executeBatch(); - - Path partPath = new Path(dfsPath,"a=part"); - assertEquals(FsPermission.valueOf("drwx-wx--x"),fs.getFileStatus(partPath).getPermission()); - status = fs.listStatus(partPath,hiddenFileFilter); - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx--x"),status[0].getPermission()); - } - - private static final PathFilter hiddenFileFilter = new PathFilter(){ - public boolean accept(Path p){ - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; -} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken deleted file mode 100644 index fdf3a98..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.BufferedInputStream; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.pig.HCatLoader; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPigStorageDriver extends TestCase { - - private HiveConf hcatConf; - private Driver hcatDriver; - private HiveMetaStoreClient msc; - private static String tblLocation = "/tmp/test_pig/data"; - private static String anyExistingFileInCurDir = "ivy.xml"; - private static String warehouseDir = "/tmp/hcat_junit_warehouse"; - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - super.setUp(); - } - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - } - - public void testPigStorageDriver() throws IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table " + tblName); - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " add partition (b='2010-10-10') location '"+new Path(fsLoc, "/tmp/test_pig")+"'"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " partition (b='2010-10-10') set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("desc extended " + tblName + " partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load '" + tblName + "' using "+HCatLoader.class.getName()+";"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 2, "2010-10-10", null); - assertTrue(result); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", null); - assertTrue(result); - - // Test multi-store - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-01');"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-02');"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-01' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-01", null); - assertTrue(result); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-02' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-02", null); - assertTrue(result); - - hcatDriver.run("drop table " + tblName); - } - - private boolean compareWithFile(Iterator itr, String factFile, int numColumn, String key, String valueSuffix) throws IOException { - DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(factFile)))); - while(itr.hasNext()){ - Tuple t = itr.next(); - assertEquals(numColumn, t.size()); - if(t.get(0) != null) { - // If underlying data-field is empty. PigStorage inserts null instead - // of empty String objects. - assertTrue(t.get(0) instanceof String); - String expected = stream.readLine(); - if (valueSuffix!=null) - expected += valueSuffix; - assertEquals(expected, t.get(0)); - } - else{ - assertTrue(stream.readLine().isEmpty()); - } - - if (numColumn>1) { - // The second column must be key - assertTrue(t.get(1) instanceof String); - assertEquals(key, t.get(1)); - } - } - assertEquals(0,stream.available()); - stream.close(); - return true; - } - - public void testDelim() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - hcatDriver.run("drop table junit_pigstorage_delim"); - - CommandProcessorResponse resp; - String createTable = "create table junit_pigstorage_delim (a0 string, a1 string) partitioned by (b string) stored as RCFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim add partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim partition (b='2010-10-10') set fileformat TEXTFILE"); - - Partition part = msc.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", "b=2010-10-10"); - Map partParms = part.getParameters(); - partParms.put(HCatConstants.HCAT_PIG_LOADER_ARGS, "control-A"); - partParms.put(HCatConstants.HCAT_PIG_STORER_ARGS, "control-A"); - - msc.alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", part); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load 'junit_pigstorage_delim' using "+HCatLoader.class.getName()+";"); - try{ - server.openIterator("a"); - }catch(FrontendException fe){} - - resp = hcatDriver.run("alter table junit_pigstorage_delim set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - resp = hcatDriver.run("alter table junit_pigstorage_delim set TBLPROPERTIES ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - File inputFile = File.createTempFile("hcat_test", ""); - PrintWriter p = new PrintWriter(new FileWriter(inputFile)); - p.println("1\t2"); - p.println("3\t4"); - p.close(); - server.registerQuery("a = load '"+inputFile.toString()+"' using PigStorage() as (a0:chararray, a1:chararray);"); - server.store("a", "junit_pigstorage_delim", HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '/tmp/hcat_junit_warehouse/junit_pigstorage_delim/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - - assertTrue(itr.hasNext()); - Tuple t = itr.next(); - assertTrue(t.get(0).equals("1:2")); - - assertTrue(itr.hasNext()); - t = itr.next(); - assertTrue(t.get(0).equals("3:4")); - - assertFalse(itr.hasNext()); - inputFile.delete(); - } - - public void testMultiConstructArgs() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage_constructs"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table junit_pigstorage_constructs"); - - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " set TBLPROPERTIES ('hcat.pig.storer'='org.apache.hcatalog.pig.MyPigStorage', 'hcat.pig.storer.args'=':#hello', 'hcat.pig.args.delimiter'='#')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", ":hello"); - assertTrue(result); - } -}