Index: ql/src/test/results/clientpositive/input3_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/input3_limit.q.out (revision 740371) +++ ql/src/test/results/clientpositive/input3_limit.q.out (working copy) @@ -1,32 +1,68 @@ ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF T1 a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB T2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF a key)) (TOK_SELEXPR (TOK_COLREF a value))) (TOK_LIMIT 20))) + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF T1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY (TOK_COLREF key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_COLREF key)) (TOK_TABSORTCOLNAMEASC (TOK_COLREF value))))) T)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB T2)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 20))) STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: - a + t:t1 Select Operator expressions: expr: key type: string expr: value type: string - Limit - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: 0 - type: string - expr: 1 - type: string + Reduce Output Operator + key expressions: + expr: 0 + type: string + expr: 1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 0 + type: string + expr: 1 + type: string Reduce Operator Tree: Extract + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: string + Limit + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/156027535/510873534.10001 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: 0 + type: string + expr: 1 + type: string + Reduce Operator Tree: + Extract Limit File Output Operator compressed: false @@ -47,23 +83,23 @@ name: t2 -128 val_128 -150 val_150 -165 val_165 -193 val_193 -213 val_213 -224 val_224 -238 val_238 -255 val_255 -265 val_265 -27 val_27 -273 val_273 -278 val_278 -311 val_311 -369 val_369 -401 val_401 -409 val_409 -484 val_484 -66 val_66 -86 val_86 -98 val_98 +0 val_0 +0 val_0 +0 val_0 +0 val_1 +0 val_1 +1 val_2 +10 val_10 +10 val_11 +100 val_100 +100 val_100 +100 val_101 +100 val_101 +101 val_102 +102 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_105 +104 val_105 Index: ql/src/test/results/clientpositive/sample3.q.out =================================================================== --- ql/src/test/results/clientpositive/sample3.q.out (revision 740371) +++ ql/src/test/results/clientpositive/sample3.q.out (working copy) @@ -33,210 +33,210 @@ limit: -1 -86 -27 -484 -150 +100 +100 +100 +100 +105 +105 +105 +114 +114 +114 +114 +119 +119 +119 +119 +119 +119 +123 +123 128 -213 -146 -281 -277 -209 -394 -466 -399 -489 -439 -367 -475 -155 128 -489 -72 -277 -399 -169 -498 -286 -187 -54 -213 +128 +128 +128 +132 +132 137 -489 -353 -272 -466 -411 -119 -100 -191 -308 -95 -196 -457 -187 -470 137 -169 -77 -72 -90 -277 -272 -452 -402 -317 -95 -498 -321 -119 -489 -218 -209 -367 -344 -263 -191 -128 -2 -321 -335 -466 -105 -114 -90 +137 +146 +146 +150 +155 164 164 -187 -119 -439 +164 +164 169 -443 -277 +169 +169 +169 178 -317 -493 -353 -407 +178 +178 18 -100 -498 -146 -362 18 -281 -344 -448 +182 +187 +187 +187 +191 +191 +191 +196 +196 +196 +196 +2 +2 +204 +209 +209 +209 +209 +213 +213 +213 +218 +22 222 -90 -169 -281 +222 +227 +231 +240 245 -425 +245 +254 +259 +259 +263 +268 +27 272 -231 -448 -31 -443 -371 -402 -128 -240 -286 -335 -367 -59 -349 -123 -402 -128 -63 -344 +272 +272 277 -114 -209 +277 +277 +277 +277 +277 281 -114 -254 -196 -86 -222 -411 281 -68 -119 -376 -213 -132 -191 -349 -475 -399 -77 -114 -443 -484 -407 -367 -2 -349 -164 -326 -308 -119 -358 -105 -416 -77 -461 -40 -385 -259 +281 +281 +281 +286 +286 +295 +295 303 -245 303 -277 -132 -443 -399 +308 +308 +308 +31 317 +317 +317 +321 +321 +326 330 -209 -489 -295 -105 -204 -439 -457 -470 -196 -119 -268 -295 -407 -439 335 -164 +335 +335 +344 +344 +344 349 -371 -123 -182 +349 +349 +349 353 +353 +353 +358 +362 +367 +367 +367 +367 371 +371 +371 +371 +376 385 +385 +394 +399 +399 +399 +399 40 -259 -178 -100 -22 +40 402 -137 -196 -100 +402 +402 +402 407 -371 -308 -178 +407 +407 +407 +411 +411 +416 +425 +439 +439 +439 +439 +443 +443 +443 +443 +448 +448 +452 457 -227 +457 +457 +461 +466 +466 +466 +470 +470 +475 +475 +484 +484 +489 +489 +489 +489 +489 +493 +498 +498 +498 +54 +59 +63 +68 +72 +72 +77 +77 +77 +86 +86 +90 +90 +90 +95 +95 Index: ql/src/test/results/clientpositive/sample5.q.out =================================================================== --- ql/src/test/results/clientpositive/sample5.q.out (revision 740371) +++ ql/src/test/results/clientpositive/sample5.q.out (working copy) @@ -28,7 +28,7 @@ type: string File Output Operator compressed: false - directory: /tmp/hive-zshao/211838955/159322479.10000.insclause-0 + directory: /data/users/athusoo/commits/hive_trunk_ws1/ql/../build/ql/tmp/379123596/110097939.10000.insclause-0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat @@ -41,14 +41,14 @@ serialization.lib org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - location file:/data/users/zshao/sync/apache-trunk-HIVE-104/build/ql/test/data/warehouse/dest1 + location file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/test/data/warehouse/dest1 serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe name: dest1 Needs Tagging: false Path -> Alias: - file:/data/users/zshao/sync/apache-trunk-HIVE-104/build/ql/test/data/warehouse/srcbucket + file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/test/data/warehouse/srcbucket Path -> Partition: - file:/data/users/zshao/sync/apache-trunk-HIVE-104/build/ql/test/data/warehouse/srcbucket + file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/test/data/warehouse/srcbucket Partition input format: org.apache.hadoop.mapred.TextInputFormat @@ -63,7 +63,7 @@ serialization.lib org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - location file:/data/users/zshao/sync/apache-trunk-HIVE-104/build/ql/test/data/warehouse/srcbucket + location file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/test/data/warehouse/srcbucket serde: org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe name: srcbucket @@ -71,7 +71,7 @@ Move Operator tables: replace: true - source: /tmp/hive-zshao/211838955/159322479.10000.insclause-0 + source: /data/users/athusoo/commits/hive_trunk_ws1/ql/../build/ql/tmp/379123596/110097939.10000.insclause-0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat @@ -84,215 +84,215 @@ serialization.lib org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - location file:/data/users/zshao/sync/apache-trunk-HIVE-104/build/ql/test/data/warehouse/dest1 + location file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/test/data/warehouse/dest1 serde: org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe name: dest1 -86 val_86 +2 val_2 +2 val_3 +18 val_18 +18 val_18 +22 val_23 27 val_27 -484 val_484 -150 val_150 -128 val_128 -213 val_213 -146 val_146 -281 val_281 -277 val_277 -209 val_209 -394 val_394 -466 val_466 -399 val_399 -489 val_489 -439 val_439 -367 val_367 -475 val_475 -155 val_155 -128 val_128 -489 val_489 +31 val_32 +40 val_41 +40 val_41 +54 val_54 +59 val_60 +63 val_64 +68 val_69 72 val_72 -277 val_277 -399 val_399 -169 val_169 -498 val_498 -286 val_286 -187 val_187 -54 val_54 -213 val_213 -137 val_137 -489 val_489 -353 val_353 -272 val_272 -466 val_466 -411 val_411 -119 val_119 -100 val_100 -191 val_191 -308 val_308 -95 val_95 -196 val_196 -457 val_457 -187 val_187 -470 val_470 -137 val_137 -169 val_169 +72 val_72 77 val_77 -72 val_72 +77 val_78 +77 val_78 +86 val_87 +86 val_86 90 val_90 -277 val_277 -272 val_272 -452 val_452 -402 val_402 -317 val_317 +90 val_90 +90 val_90 95 val_95 -498 val_498 -321 val_321 +95 val_95 +100 val_100 +100 val_100 +100 val_101 +100 val_101 +105 val_106 +105 val_106 +105 val_105 +114 val_114 +114 val_115 +114 val_115 +114 val_115 +119 val_120 +119 val_120 +119 val_120 119 val_119 -489 val_489 -218 val_218 -209 val_209 -367 val_367 -344 val_344 -263 val_263 -191 val_191 +119 val_119 +119 val_119 +123 val_124 +123 val_124 +128 val_129 +128 val_129 128 val_128 -2 val_2 -321 val_321 -335 val_335 -466 val_466 -105 val_105 -114 val_114 -90 val_90 +128 val_128 +128 val_128 +132 val_133 +132 val_133 +137 val_138 +137 val_137 +137 val_137 +146 val_146 +146 val_146 +150 val_150 +155 val_155 164 val_164 164 val_164 -187 val_187 -119 val_119 -439 val_439 +164 val_165 +164 val_165 169 val_169 -443 val_443 -277 val_277 +169 val_169 +169 val_169 +169 val_169 178 val_178 -317 val_317 -493 val_493 -353 val_353 -407 val_407 -18 val_18 -100 val_100 -498 val_498 -146 val_146 -362 val_362 -18 val_18 -281 val_281 -344 val_344 -448 val_448 +178 val_179 +178 val_179 +182 val_183 +187 val_187 +187 val_187 +187 val_187 +191 val_191 +191 val_191 +191 val_192 +196 val_197 +196 val_197 +196 val_197 +196 val_196 +204 val_205 +209 val_210 +209 val_210 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_214 +218 val_218 222 val_222 -90 val_90 -169 val_169 -281 val_282 +222 val_223 +227 val_228 +231 val_232 +240 val_241 245 val_246 -425 val_426 +245 val_246 +254 val_255 +259 val_260 +259 val_260 +263 val_263 +268 val_269 272 val_273 -231 val_232 -448 val_449 -31 val_32 -443 val_444 -371 val_372 -402 val_403 -128 val_129 -240 val_241 -286 val_287 -335 val_336 -367 val_368 -59 val_60 -349 val_350 -123 val_124 -402 val_403 -128 val_129 -63 val_64 -344 val_345 +272 val_272 +272 val_272 +277 val_277 +277 val_277 +277 val_277 +277 val_277 277 val_278 -114 val_115 -209 val_210 +277 val_278 281 val_282 -114 val_115 -254 val_255 -196 val_197 -86 val_87 -222 val_223 -411 val_412 281 val_282 -68 val_69 -119 val_120 -376 val_377 -213 val_214 -132 val_133 -191 val_192 -349 val_350 -475 val_476 -399 val_400 -77 val_78 -114 val_115 -443 val_444 -484 val_485 -407 val_408 -367 val_368 -2 val_3 -349 val_350 -164 val_165 -326 val_327 -308 val_309 -119 val_120 -358 val_359 -105 val_106 -416 val_417 -77 val_78 -461 val_462 -40 val_41 -385 val_386 -259 val_260 +281 val_282 +281 val_281 +281 val_281 +286 val_286 +286 val_287 +295 val_296 +295 val_296 303 val_304 -245 val_246 303 val_304 -277 val_278 -132 val_133 -443 val_444 -399 val_400 +308 val_309 +308 val_309 +308 val_308 +317 val_317 +317 val_317 317 val_318 +321 val_321 +321 val_321 +326 val_327 330 val_331 -209 val_210 -489 val_490 -295 val_296 -105 val_106 -204 val_205 -439 val_440 -457 val_458 -470 val_471 -196 val_197 -119 val_120 -268 val_269 -295 val_296 -407 val_408 -439 val_440 335 val_336 -164 val_165 +335 val_336 +335 val_335 +344 val_344 +344 val_344 +344 val_345 349 val_350 -371 val_372 -123 val_124 -182 val_183 +349 val_350 +349 val_350 +349 val_350 353 val_354 +353 val_353 +353 val_353 +358 val_359 +362 val_362 +367 val_367 +367 val_367 +367 val_368 +367 val_368 371 val_372 +371 val_372 +371 val_372 +371 val_372 +376 val_377 385 val_386 -40 val_41 -259 val_260 -178 val_179 -100 val_101 -22 val_23 +385 val_386 +394 val_394 +399 val_399 +399 val_399 +399 val_400 +399 val_400 402 val_403 -137 val_138 -196 val_197 -100 val_101 +402 val_403 +402 val_403 +402 val_402 +407 val_407 407 val_408 -371 val_372 -308 val_309 -178 val_179 +407 val_408 +407 val_408 +411 val_412 +411 val_411 +416 val_417 +425 val_426 +439 val_440 +439 val_440 +439 val_439 +439 val_439 +443 val_443 +443 val_444 +443 val_444 +443 val_444 +448 val_449 +448 val_448 +452 val_452 +457 val_457 457 val_458 -227 val_228 +457 val_458 +461 val_462 +466 val_466 +466 val_466 +466 val_466 +470 val_470 +470 val_471 +475 val_476 +475 val_475 +484 val_484 +484 val_485 +489 val_490 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +493 val_493 +498 val_498 +498 val_498 +498 val_498 Index: ql/src/test/queries/clientpositive/input3_limit.q =================================================================== --- ql/src/test/queries/clientpositive/input3_limit.q (revision 740371) +++ ql/src/test/queries/clientpositive/input3_limit.q (working copy) @@ -7,12 +7,11 @@ CREATE TABLE T2(key STRING, value STRING); EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT a.key, a.value from T1 a LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; -INSERT OVERWRITE TABLE T2 SELECT a.key, a.value from T1 a LIMIT 20; +SELECT * FROM T2; -SELECT * FROM (SELECT * FROM T2 DISTRIBUTE BY key SORT BY key, value) T; - DROP TABLE T1; DROP TABLE T2; Index: ql/src/test/queries/clientpositive/sample3.q =================================================================== --- ql/src/test/queries/clientpositive/sample3.q (revision 740371) +++ ql/src/test/queries/clientpositive/sample3.q (working copy) @@ -4,5 +4,5 @@ FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s; SELECT s.key -FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s; +FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s SORT BY key; Index: ql/src/test/queries/clientpositive/sample5.q =================================================================== --- ql/src/test/queries/clientpositive/sample5.q (revision 740371) +++ ql/src/test/queries/clientpositive/sample5.q (working copy) @@ -8,4 +8,4 @@ INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s; -SELECT dest1.* FROM dest1; +SELECT dest1.* FROM dest1 SORT BY key; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 740371) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -46,142 +47,141 @@ */ public class Partition { - @SuppressWarnings("nls") - static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Partition"); + @SuppressWarnings("nls") + static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Partition"); - private Table table; - private org.apache.hadoop.hive.metastore.api.Partition tPartition; - /** - * @return the tPartition - */ - public org.apache.hadoop.hive.metastore.api.Partition getTPartition() { - return tPartition; - } + private Table table; + private org.apache.hadoop.hive.metastore.api.Partition tPartition; + /** + * @return the tPartition + */ + public org.apache.hadoop.hive.metastore.api.Partition getTPartition() { + return tPartition; + } - private LinkedHashMap spec; - - /** - * @return - * @see org.apache.hadoop.hive.metastore.api.Partition#getValues() - */ - public List getValues() { - return tPartition.getValues(); - } + private LinkedHashMap spec; - private Path partPath; - private URI partURI; + /** + * @return + * @see org.apache.hadoop.hive.metastore.api.Partition#getValues() + */ + public List getValues() { + return tPartition.getValues(); + } - public Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp) throws HiveException { - initialize(tbl, tp); + private Path partPath; + private URI partURI; + + public Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp) throws HiveException { + initialize(tbl, tp); + } + + /** + * Create partition object with the given info. + * @param tbl Table the partition will be in. + * @param partSpec Partition specifications. + * @param location Location of the partition, relative to the table. + * @throws HiveException Thrown if we could not create the partition. + */ + public Partition(Table tbl, Map partSpec, + Path location) throws HiveException { + + List pvals = new ArrayList(); + for (FieldSchema field : tbl.getPartCols()) { + pvals.add(partSpec.get(field.getName())); } - /** - * Create partition object with the given info. - * @param tbl Table the partition will be in. - * @param partSpec Partition specifications. - * @param location Location of the partition, relative to the table. - * @throws HiveException Thrown if we could not create the partition. - */ - public Partition(Table tbl, Map partSpec, - Path location) throws HiveException { - - List pvals = new ArrayList(); - for (FieldSchema field : tbl.getPartCols()) { - pvals.add(partSpec.get(field.getName())); - } - - org.apache.hadoop.hive.metastore.api.Partition tpart = - new org.apache.hadoop.hive.metastore.api.Partition(); - tpart.setDbName(tbl.getDbName()); - tpart.setTableName(tbl.getName()); - tpart.setValues(pvals); - - StorageDescriptor sd = new StorageDescriptor(); + org.apache.hadoop.hive.metastore.api.Partition tpart = + new org.apache.hadoop.hive.metastore.api.Partition(); + tpart.setDbName(tbl.getDbName()); + tpart.setTableName(tbl.getName()); + tpart.setValues(pvals); + + StorageDescriptor sd = new StorageDescriptor(); + try { + //replace with THRIFT-138 + TMemoryBuffer buffer = new TMemoryBuffer(1024); + TBinaryProtocol prot = new TBinaryProtocol(buffer); + tbl.getTTable().getSd().write(prot); + + sd.read(prot); + } catch (TException e) { + LOG.error("Could not create a copy of StorageDescription"); + throw new HiveException("Could not create a copy of StorageDescription"); + } + + tpart.setSd(sd); + tpart.getSd().setLocation(location.toString()); + + initialize(tbl, tpart); + } + + /** + * Initializes this object with the given variables + * @param tbl Table the partition belongs to + * @param tp Thrift Partition object + * @throws HiveException Thrown if we cannot initialize the partition + */ + private void initialize(Table tbl, + org.apache.hadoop.hive.metastore.api.Partition tp) + throws HiveException { + + this.table = tbl; + this.tPartition = tp; + this.partName = ""; + + if(tbl.isPartitioned()) { try { - //replace with THRIFT-138 - TMemoryBuffer buffer = new TMemoryBuffer(1024); - TBinaryProtocol prot = new TBinaryProtocol(buffer); - tbl.getTTable().getSd().write(prot); - - sd.read(prot); - } catch (TException e) { - LOG.error("Could not create a copy of StorageDescription"); - throw new HiveException("Could not create a copy of StorageDescription"); - } - - tpart.setSd(sd); - tpart.getSd().setLocation(location.toString()); - - initialize(tbl, tpart); - } - - /** - * Initializes this object with the given variables - * @param tbl Table the partition belongs to - * @param tp Thrift Partition object - * @throws HiveException Thrown if we cannot initialize the partition - */ - private void initialize(Table tbl, - org.apache.hadoop.hive.metastore.api.Partition tp) - throws HiveException { - - this.table = tbl; - this.tPartition = tp; - this.partName = ""; - - if(tbl.isPartitioned()) { - try { - this.partName = Warehouse.makePartName(tbl.getPartCols(), - tp.getValues()); - } catch (MetaException e) { - throw new HiveException("Invalid partition for table " + tbl.getName(), - e); - } - this.partPath = new Path(tp.getSd().getLocation()); - } else { - // We are in the HACK territory. - // SemanticAnalyzer expects a single partition whose schema - // is same as the table partition. - this.partPath = table.getPath(); + this.partName = Warehouse.makePartName(tbl.getPartCols(), + tp.getValues()); + } catch (MetaException e) { + throw new HiveException("Invalid partition for table " + tbl.getName(), + e); } - - this.spec = new LinkedHashMap(tbl.createSpec(tp)); - this.partURI = partPath.toUri(); + this.partPath = new Path(tp.getSd().getLocation()); + } else { + // We are in the HACK territory. + // SemanticAnalyzer expects a single partition whose schema + // is same as the table partition. + this.partPath = table.getPath(); } - - - public String getName() { - return partName; - } - public Table getTable() { - return (this.table); - } + this.spec = new LinkedHashMap(tbl.createSpec(tp)); + this.partURI = partPath.toUri(); + } - public Path [] getPath() { - Path [] ret = new Path [1]; - ret[0] = this.partPath; - return(ret); - } + public String getName() { + return partName; + } - public Path getPartitionPath() { - return this.partPath; - } + public Table getTable() { + return (this.table); + } - final public URI getDataLocation() { - return this.partURI; - } + public Path [] getPath() { + Path [] ret = new Path [1]; + ret[0] = this.partPath; + return(ret); + } - /** - * The number of buckets is a property of the partition. However - internally we are just - * storing it as a property of the table as a short term measure. - */ - public int getBucketCount() { - return this.table.getNumBuckets(); - /* + public Path getPartitionPath() { + return this.partPath; + } + + final public URI getDataLocation() { + return this.partURI; + } + + /** + * The number of buckets is a property of the partition. However - internally we are just + * storing it as a property of the table as a short term measure. + */ + public int getBucketCount() { + return this.table.getNumBuckets(); + /* TODO: Keeping this code around for later use when we will support sampling on tables which are not created with CLUSTERED INTO clause - + // read from table meta data int numBuckets = this.table.getNumBuckets(); if (numBuckets == -1) { @@ -198,147 +198,148 @@ } } return numBuckets; - */ + */ + } + + public List getBucketCols() { + return this.table.getBucketCols(); + } + + /** + * mapping from bucket number to bucket path + */ + //TODO: add test case and clean it up + @SuppressWarnings("nls") + public Path getBucketPath(int bucketNum) { + try { + FileSystem fs = FileSystem.get(this.table.getDataLocation(), Hive.get().getConf()); + String pathPattern = this.partPath.toString(); + if (getBucketCount() > 0) { + pathPattern = pathPattern + "/*"; + } + LOG.info("Path pattern = " + pathPattern); + FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); + Arrays.sort(srcs); + for (FileStatus src: srcs) { + LOG.info("Got file: " + src.getPath()); + } + return srcs[bucketNum].getPath(); } - - public List getBucketCols() { - return this.table.getBucketCols(); + catch (Exception e) { + throw new RuntimeException("Cannot get bucket path for bucket " + bucketNum, e); } + // return new Path(this.partPath, String.format("part-%1$05d", bucketNum)); + } - /** - * mapping from bucket number to bucket path - */ - //TODO: add test case and clean it up - @SuppressWarnings("nls") - public Path getBucketPath(int bucketNum) { - try { - FileSystem fs = FileSystem.get(this.table.getDataLocation(), Hive.get().getConf()); - String pathPattern = this.partPath.toString(); - if (getBucketCount() > 0) { - pathPattern = pathPattern + "/*"; - } - LOG.info("Path pattern = " + pathPattern); - FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); - for (FileStatus src: srcs) { - LOG.info("Got file: " + src.getPath()); - } - return srcs[bucketNum].getPath(); - } - catch (Exception e) { - throw new RuntimeException("Cannot get bucket path for bucket " + bucketNum, e); - } - // return new Path(this.partPath, String.format("part-%1$05d", bucketNum)); - } + /** + * mapping from a Path to the bucket number if any + */ + private static Pattern bpattern = Pattern.compile("part-([0-9][0-9][0-9][0-9][0-9])"); - /** - * mapping from a Path to the bucket number if any - */ - private static Pattern bpattern = Pattern.compile("part-([0-9][0-9][0-9][0-9][0-9])"); - - private String partName; - @SuppressWarnings("nls") - public static int getBucketNum(Path p) { - Matcher m = bpattern.matcher(p.getName()); - if(m.find()) { - String bnum_str = m.group(1); - try { - return (Integer.parseInt(bnum_str)); - } catch (NumberFormatException e) { - throw new RuntimeException("Unexpected error parsing: "+p.getName()+","+bnum_str); - } - } - return 0; + private String partName; + @SuppressWarnings("nls") + public static int getBucketNum(Path p) { + Matcher m = bpattern.matcher(p.getName()); + if(m.find()) { + String bnum_str = m.group(1); + try { + return (Integer.parseInt(bnum_str)); + } catch (NumberFormatException e) { + throw new RuntimeException("Unexpected error parsing: "+p.getName()+","+bnum_str); + } } + return 0; + } - @SuppressWarnings("nls") - public Path [] getPath(Sample s) throws HiveException { - if(s == null) { - return getPath(); - } else { - int bcount = this.getBucketCount(); - if(bcount == 0) { - return getPath(); - } + @SuppressWarnings("nls") + public Path [] getPath(Sample s) throws HiveException { + if(s == null) { + return getPath(); + } else { + int bcount = this.getBucketCount(); + if(bcount == 0) { + return getPath(); + } - Dimension d = s.getSampleDimension(); - if(!d.getDimensionId().equals(this.table.getBucketingDimensionId())) { - // if the bucket dimension is not the same as the sampling dimension - // we must scan all the data - return getPath(); - } + Dimension d = s.getSampleDimension(); + if(!d.getDimensionId().equals(this.table.getBucketingDimensionId())) { + // if the bucket dimension is not the same as the sampling dimension + // we must scan all the data + return getPath(); + } - int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList (); + int scount = s.getSampleFraction(); + ArrayList ret = new ArrayList (); - if(bcount == scount) { - ret.add(getBucketPath(s.getSampleNum()-1)); - } else if (bcount < scount) { - if((scount/bcount)*bcount != scount) { - throw new HiveException("Sample Count"+scount+" is not a multiple of bucket count " + - bcount + " for table " + this.table.getName()); - } - // undersampling a bucket - ret.add(getBucketPath((s.getSampleNum()-1)%bcount)); - } else if (bcount > scount) { - if((bcount/scount)*scount != bcount) { - throw new HiveException("Sample Count"+scount+" is not a divisor of bucket count " + - bcount + " for table " + this.table.getName()); - } - // sampling multiple buckets - for(int i=0; i scount) { + if((bcount/scount)*scount != bcount) { + throw new HiveException("Sample Count"+scount+" is not a divisor of bucket count " + + bcount + " for table " + this.table.getName()); + } + // sampling multiple buckets + for(int i=0; i getSpec() { - return this.spec; - } + public LinkedHashMap getSpec() { + return this.spec; + } - /** - * Replaces files in the partition with new data set specified by srcf. Works by moving files - * - * @param srcf Files to be moved. Leaf Directories or Globbed File Paths - */ - @SuppressWarnings("nls") - protected void replaceFiles(Path srcf) throws HiveException { - FileSystem fs; - try { - fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf()); - Hive.get().replaceFiles(srcf, partPath, fs); - } catch (IOException e) { - throw new HiveException("addFiles: filesystem error in check phase", e); - } + /** + * Replaces files in the partition with new data set specified by srcf. Works by moving files + * + * @param srcf Files to be moved. Leaf Directories or Globbed File Paths + */ + @SuppressWarnings("nls") + protected void replaceFiles(Path srcf) throws HiveException { + FileSystem fs; + try { + fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf()); + Hive.get().replaceFiles(srcf, partPath, fs); + } catch (IOException e) { + throw new HiveException("addFiles: filesystem error in check phase", e); } + } - /** - * Inserts files specified into the partition. Works by moving files - * - * @param srcf Files to be moved. Leaf Directories or Globbed File Paths - */ - @SuppressWarnings("nls") - protected void copyFiles(Path srcf) throws HiveException { - FileSystem fs; - try { - fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf()); - Hive.get().copyFiles(srcf, partPath, fs); - } catch (IOException e) { - throw new HiveException("addFiles: filesystem error in check phase", e); - } + /** + * Inserts files specified into the partition. Works by moving files + * + * @param srcf Files to be moved. Leaf Directories or Globbed File Paths + */ + @SuppressWarnings("nls") + protected void copyFiles(Path srcf) throws HiveException { + FileSystem fs; + try { + fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf()); + Hive.get().copyFiles(srcf, partPath, fs); + } catch (IOException e) { + throw new HiveException("addFiles: filesystem error in check phase", e); } + } - @SuppressWarnings("nls") - @Override - public String toString() { - String pn = "Invalid Partition"; - try { - pn = Warehouse.makePartName(spec); - } catch (MetaException e) { - // ignore as we most probably in an exception path already otherwise this error wouldn't occur - } - return table.toString() + "(" + pn + ")"; + @SuppressWarnings("nls") + @Override + public String toString() { + String pn = "Invalid Partition"; + try { + pn = Warehouse.makePartName(spec); + } catch (MetaException e) { + // ignore as we most probably in an exception path already otherwise this error wouldn't occur } + return table.toString() + "(" + pn + ")"; + } }