diff --git beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index 1078ca3..c376687 100644 --- beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -287,7 +287,7 @@ public void doInit(String toVersion) throws HiveMetaException { } } catch (IOException e) { throw new HiveMetaException("Schema initialization FAILED!" + - " Metastore state would be inconsistent !!", e); + " Metastore state would be inconsistent !!", e); } } diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index e41f87c..cd4beeb 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -116,8 +116,7 @@ public String getAggregator(Configuration conf) { /** * @return List of all supported statistics */ - public static final String[] supportedStats = new String[] - {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; + public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; /** * @return List of all statistics that need to be collected during query execution. These are @@ -142,8 +141,8 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; - public static boolean areStatsUptoDate(Map params) { - String statsAcc = params.get(COLUMN_STATS_ACCURATE); - return statsAcc == null ? false : statsAcc.equals(TRUE); + public static boolean areStatsUptoDate(Map params) { + String statsAcc = params.get(COLUMN_STATS_ACCURATE); + return statsAcc == null ? false : statsAcc.equals(TRUE); } } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c02cd93..d9ea404 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -800,7 +800,7 @@ " config (hive.exec.orc.block.padding.tolerance)."), HIVEMERGEINPUTFORMATSTRIPELEVEL("hive.merge.input.format.stripe.level", "org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat", - "Input file format to use for ORC stripe level merging (for internal use only)"), + "Input file format to use for ORC stripe level merging (for internal use only)"), HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS( "hive.merge.current.job.has.dynamic.partitions", false, ""), diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java index d2127e1..0ca8ccc 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java @@ -653,7 +653,7 @@ public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, verifyParam(inputs, "input"); verifyParam(mapper, "mapper"); verifyParam(reducer, "reducer"); - + Map userArgs = new HashMap(); userArgs.put("user.name", getDoAsUser()); userArgs.put("input", inputs); @@ -680,8 +680,8 @@ public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, /** * Run a MapReduce Jar job. * Params correspond to the REST api params - * @param usesHcatalog if {@code true}, means the Jar uses HCat and thus needs to access - * metastore, which requires additional steps for WebHCat to perform in a secure cluster. + * @param usesHcatalog if {@code true}, means the Jar uses HCat and thus needs to access + * metastore, which requires additional steps for WebHCat to perform in a secure cluster. * @param callback URL which WebHCat will call when the hive job finishes * @see org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob */ @@ -703,7 +703,7 @@ public EnqueueBean mapReduceJar(@FormParam("jar") String jar, verifyUser(); verifyParam(jar, "jar"); verifyParam(mainClass, "class"); - + Map userArgs = new HashMap(); userArgs.put("user.name", getDoAsUser()); userArgs.put("jar", jar); @@ -729,7 +729,7 @@ public EnqueueBean mapReduceJar(@FormParam("jar") String jar, * Run a Pig job. * Params correspond to the REST api params. If '-useHCatalog' is in the {@code pigArgs, usesHcatalog}, * is interpreted as true. - * @param usesHcatalog if {@code true}, means the Pig script uses HCat and thus needs to access + * @param usesHcatalog if {@code true}, means the Pig script uses HCat and thus needs to access * metastore, which requires additional steps for WebHCat to perform in a secure cluster. * This does nothing to ensure that Pig is installed on target node in the cluster. * @param callback URL which WebHCat will call when the hive job finishes @@ -752,7 +752,7 @@ public EnqueueBean pig(@FormParam("execute") String execute, if (execute == null && srcFile == null) { throw new BadParam("Either execute or file parameter required"); } - + //add all function arguments to a map Map userArgs = new HashMap(); userArgs.put("user.name", getDoAsUser()); @@ -819,7 +819,7 @@ public EnqueueBean sqoop(@FormParam("command") String command, * @param execute SQL statement to run, equivalent to "-e" from hive command line * @param srcFile name of hive script file to run, equivalent to "-f" from hive * command line - * @param hiveArgs additional command line argument passed to the hive command line. + * @param hiveArgs additional command line argument passed to the hive command line. * Please check https://cwiki.apache.org/Hive/languagemanual-cli.html * for detailed explanation of command line arguments * @param otherFiles additional files to be shipped to the launcher, such as the jars @@ -846,7 +846,7 @@ public EnqueueBean hive(@FormParam("execute") String execute, if (execute == null && srcFile == null) { throw new BadParam("Either execute or file parameter required"); } - + //add all function arguments to a map Map userArgs = new HashMap(); userArgs.put("user.name", getDoAsUser()); @@ -903,42 +903,42 @@ public QueueStatusBean deleteJobId(@PathParam("jobid") String jobid) * Example usages: * 1. curl -s 'http://localhost:50111/templeton/v1/jobs?user.name=hsubramaniyan' * Return all the Job IDs submitted by hsubramaniyan - * 2. curl -s + * 2. curl -s * 'http://localhost:50111/templeton/v1/jobs?user.name=hsubramaniyan&showall=true' * Return all the Job IDs that are visible to hsubramaniyan * 3. curl -s * 'http://localhost:50111/templeton/v1/jobs?user.name=hsubramaniyan&jobid=job_201312091733_0003' * Return all the Job IDs for hsubramaniyan after job_201312091733_0003. - * 4. curl -s 'http://localhost:50111/templeton/v1/jobs? + * 4. curl -s 'http://localhost:50111/templeton/v1/jobs? * user.name=hsubramaniyan&jobid=job_201312091733_0003&numrecords=5' - * Return the first 5(atmost) Job IDs submitted by hsubramaniyan after job_201312091733_0003. - * 5. curl -s + * Return the first 5(atmost) Job IDs submitted by hsubramaniyan after job_201312091733_0003. + * 5. curl -s * 'http://localhost:50111/templeton/v1/jobs?user.name=hsubramaniyan&numrecords=5' - * Return the first 5(atmost) Job IDs submitted by hsubramaniyan after sorting the Job ID list + * Return the first 5(atmost) Job IDs submitted by hsubramaniyan after sorting the Job ID list * lexicographically. *

*

* Supporting pagination using "jobid" and "numrecords" parameters: * Step 1: Get the start "jobid" = job_xxx_000, "numrecords" = n - * Step 2: Issue a curl command by specifying the user-defined "numrecords" and "jobid" - * Step 3: If list obtained from Step 2 has size equal to "numrecords", retrieve the list's + * Step 2: Issue a curl command by specifying the user-defined "numrecords" and "jobid" + * Step 3: If list obtained from Step 2 has size equal to "numrecords", retrieve the list's * last record and get the Job Id of the last record as job_yyy_k, else quit. * Step 4: set "jobid"=job_yyy_k and go to step 2. - *

+ *

* @param fields If "fields" set to "*", the request will return full details of the job. * If "fields" is missing, will only return the job ID. Currently the value can only * be "*", other values are not allowed and will throw exception. * @param showall If "showall" is set to "true", the request will return all jobs the user * has permission to view, not only the jobs belonging to the user. - * @param jobid If "jobid" is present, the records whose Job Id is lexicographically greater - * than "jobid" are only returned. For example, if "jobid" = "job_201312091733_0001", - * the jobs whose Job ID is greater than "job_201312091733_0001" are returned. The number of + * @param jobid If "jobid" is present, the records whose Job Id is lexicographically greater + * than "jobid" are only returned. For example, if "jobid" = "job_201312091733_0001", + * the jobs whose Job ID is greater than "job_201312091733_0001" are returned. The number of * records returned depends on the value of "numrecords". - * @param numrecords If the "jobid" and "numrecords" parameters are present, the top #numrecords - * records appearing after "jobid" will be returned after sorting the Job Id list - * lexicographically. - * If "jobid" parameter is missing and "numrecords" is present, the top #numrecords will - * be returned after lexicographically sorting the Job Id list. If "jobid" parameter is present + * @param numrecords If the "jobid" and "numrecords" parameters are present, the top #numrecords + * records appearing after "jobid" will be returned after sorting the Job Id list + * lexicographically. + * If "jobid" parameter is missing and "numrecords" is present, the top #numrecords will + * be returned after lexicographically sorting the Job Id list. If "jobid" parameter is present * and "numrecords" is missing, all the records whose Job Id is greater than "jobid" are returned. * @return list of job items based on the filter conditions specified by the user. */ @@ -950,7 +950,7 @@ public QueueStatusBean deleteJobId(@PathParam("jobid") String jobid) @QueryParam("jobid") String jobid, @QueryParam("numrecords") String numrecords) throws NotAuthorizedException, BadParam, IOException, InterruptedException { - + verifyUser(); boolean showDetails = false; @@ -971,9 +971,9 @@ public QueueStatusBean deleteJobId(@PathParam("jobid") String jobid) try { if (numrecords != null) { numRecords = Integer.parseInt(numrecords); - if (numRecords <= 0) { - throw new BadParam("numrecords should be an integer > 0"); - } + if (numRecords <= 0) { + throw new BadParam("numrecords should be an integer > 0"); + } } else { numRecords = -1; @@ -983,18 +983,18 @@ public QueueStatusBean deleteJobId(@PathParam("jobid") String jobid) throw new BadParam("Invalid numrecords format: numrecords should be an integer > 0"); } - // Sort the list lexicographically + // Sort the list lexicographically Collections.sort(list); for (String job : list) { // If numRecords = -1, fetch all records. // Hence skip all the below checks when numRecords = -1. if (numRecords != -1) { - // If currRecord >= numRecords, we have already fetched the top #numRecords + // If currRecord >= numRecords, we have already fetched the top #numRecords if (currRecord >= numRecords) { break; - } - // If the current record needs to be returned based on the + } + // If the current record needs to be returned based on the // filter conditions specified by the user, increment the counter else if ((jobid != null && job.compareTo(jobid) > 0) || jobid == null) { currRecord++; @@ -1101,7 +1101,7 @@ public void verifyDdlParam(String param, String name) * value of user.name query param, in kerberos mode it's the kinit'ed user. */ private String getRequestingUser() { - if (theSecurityContext == null) { + if (theSecurityContext == null) { return null; } String userName = null; @@ -1114,7 +1114,7 @@ private String getRequestingUser() { if(userName == null) { return null; } - //map hue/foo.bar@something.com->hue since user group checks + //map hue/foo.bar@something.com->hue since user group checks // and config files are in terms of short name return UserGroupInformation.createRemoteUser(userName).getShortUserName(); } @@ -1161,7 +1161,7 @@ private static String getRequestingHost(String requestingUser, HttpServletReques return unkHost; } } - + private void checkEnableLogPrerequisite(boolean enablelog, String statusdir) throws BadParam { if (enablelog && !TempletonUtils.isset(statusdir)) throw new BadParam("enablelog is only applicable when statusdir is set"); diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java index addd0c2..36b64da 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java @@ -169,9 +169,9 @@ public String getParent() throws IOException { String childJobIDs = getField("children"); if (childJobIDs != null) { for (String jobid : childJobIDs.split(",")) { - children.add(new JobState(jobid, config)); + children.add(new JobState(jobid, config)); } - } + } return children; } diff --git jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java index 5898a6b..ec5e555 100644 --- jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java +++ jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java @@ -160,7 +160,7 @@ public InputStream getBinaryStream(int columnIndex) throws SQLException { } public InputStream getBinaryStream(String columnName) throws SQLException { - return getBinaryStream(findColumn(columnName)); + return getBinaryStream(findColumn(columnName)); } public Blob getBlob(int i) throws SQLException { diff --git metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index 2627ff0..cd8c5c2 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -65,20 +65,20 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); mColStats.setBooleanStats( - boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); mColStats.setLongStats( - longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats( - doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); @@ -87,20 +87,20 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; mColStats.setDecimalStats( - decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, - decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( - stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, - stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); mColStats.setBinaryStats( - binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } @@ -109,9 +109,9 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl public static void setFieldsIntoOldStats( MTableColumnStatistics mStatsObj, MTableColumnStatistics oldStatsObj) { - if (mStatsObj.getAvgColLen() != null) { - oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - } + if (mStatsObj.getAvgColLen() != null) { + oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + } if (mStatsObj.getLongHighValue() != null) { oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); } @@ -131,19 +131,19 @@ public static void setFieldsIntoOldStats( oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); } if (mStatsObj.getMaxColLen() != null) { - oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); + oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); } if (mStatsObj.getNumDVs() != null) { - oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); + oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); } if (mStatsObj.getNumFalses() != null) { - oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); + oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); } if (mStatsObj.getNumTrues() != null) { - oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); + oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); } if (mStatsObj.getNumNulls() != null) { - oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); } @@ -152,13 +152,13 @@ public static void setFieldsIntoOldStats( MPartitionColumnStatistics mStatsObj, MPartitionColumnStatistics oldStatsObj) { if (mStatsObj.getAvgColLen() != null) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - } + } if (mStatsObj.getLongHighValue() != null) { - oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); - } - if (mStatsObj.getDoubleHighValue() != null) { - oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); - } + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + } + if (mStatsObj.getDoubleHighValue() != null) { + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); if (mStatsObj.getLongLowValue() != null) { oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); @@ -292,20 +292,20 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); mColStats.setBooleanStats( - boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); mColStats.setLongStats( - longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats( - doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); @@ -314,20 +314,20 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; mColStats.setDecimalStats( - decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, - decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); mColStats.setStringStats( - stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, - stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); mColStats.setBinaryStats( - binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index a2975cb..7477199 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -348,17 +348,17 @@ public int execute(DriverContext driverContext) { // want to isolate any potential issue it may introduce. ArrayList> dp = db.loadDynamicPartitions( - tbd.getSourcePath(), - tbd.getTable().getTableName(), - tbd.getPartitionSpec(), - tbd.getReplace(), - dpCtx.getNumDPCols(), - tbd.getHoldDDLTime(), - isSkewedStoredAsDirs(tbd)); + tbd.getSourcePath(), + tbd.getTable().getTableName(), + tbd.getPartitionSpec(), + tbd.getReplace(), + dpCtx.getNumDPCols(), + tbd.getHoldDDLTime(), + isSkewedStoredAsDirs(tbd)); if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) { throw new HiveException("This query creates no partitions." + - " To turn off this error, set hive.error.on.empty.partition=false."); + " To turn off this error, set hive.error.on.empty.partition=false."); } // for each partition spec, get the partition @@ -412,13 +412,13 @@ public int execute(DriverContext driverContext) { numBuckets, sortCols); } - dc = new DataContainer(table.getTTable(), partn.getTPartition()); - // add this partition to post-execution hook - if (work.getOutputs() != null) { - work.getOutputs().add(new WriteEntity(partn, + dc = new DataContainer(table.getTTable(), partn.getTPartition()); + // add this partition to post-execution hook + if (work.getOutputs() != null) { + work.getOutputs().add(new WriteEntity(partn, (tbd.getReplace() ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT))); - } + } } } if (SessionState.get() != null && dc != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java index e917cdf..4b267bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java @@ -45,61 +45,60 @@ public class PTFOperator extends Operator implements Serializable { - private static final long serialVersionUID = 1L; - boolean isMapOperator; - - transient KeyWrapperFactory keyWrapperFactory; - protected transient KeyWrapper currentKeys; - protected transient KeyWrapper newKeys; - /* - * for map-side invocation of PTFs, we cannot utilize the currentkeys null check - * to decide on invoking startPartition in streaming mode. Hence this extra flag. - */ - transient boolean firstMapRow; - transient Configuration hiveConf; - transient PTFInvocation ptfInvocation; - - /* - * 1. Find out if the operator is invoked at Map-Side or Reduce-side - * 2. Get the deserialized QueryDef - * 3. Reconstruct the transient variables in QueryDef - * 4. Create input partition to store rows coming from previous operator - */ - @Override - protected void initializeOp(Configuration jobConf) throws HiveException { - hiveConf = jobConf; - // if the parent is ExtractOperator, this invocation is from reduce-side - isMapOperator = conf.isMapSide(); - - reconstructQueryDef(hiveConf); - - if (isMapOperator) { - PartitionedTableFunctionDef tDef = conf.getStartOfChain(); - outputObjInspector = tDef.getRawInputShape().getOI(); - } else { - outputObjInspector = conf.getFuncDef().getOutputShape().getOI(); - } - - setupKeysWrapper(inputObjInspectors[0]); - - ptfInvocation = setupChain(); - ptfInvocation.initializeStreaming(jobConf, isMapOperator); - firstMapRow = true; - - super.initializeOp(jobConf); - } - - @Override - protected void closeOp(boolean abort) throws HiveException { - super.closeOp(abort); + private static final long serialVersionUID = 1L; + boolean isMapOperator; + + transient KeyWrapperFactory keyWrapperFactory; + protected transient KeyWrapper currentKeys; + protected transient KeyWrapper newKeys; + /* + * for map-side invocation of PTFs, we cannot utilize the currentkeys null check + * to decide on invoking startPartition in streaming mode. Hence this extra flag. + */ + transient boolean firstMapRow; + transient Configuration hiveConf; + transient PTFInvocation ptfInvocation; + + /* + * 1. Find out if the operator is invoked at Map-Side or Reduce-side + * 2. Get the deserialized QueryDef + * 3. Reconstruct the transient variables in QueryDef + * 4. Create input partition to store rows coming from previous operator + */ + @Override + protected void initializeOp(Configuration jobConf) throws HiveException { + hiveConf = jobConf; + // if the parent is ExtractOperator, this invocation is from reduce-side + isMapOperator = conf.isMapSide(); + + reconstructQueryDef(hiveConf); + + if (isMapOperator) { + PartitionedTableFunctionDef tDef = conf.getStartOfChain(); + outputObjInspector = tDef.getRawInputShape().getOI(); + } else { + outputObjInspector = conf.getFuncDef().getOutputShape().getOI(); + } + + setupKeysWrapper(inputObjInspectors[0]); + + ptfInvocation = setupChain(); + ptfInvocation.initializeStreaming(jobConf, isMapOperator); + firstMapRow = true; + + super.initializeOp(jobConf); + } + + @Override + protected void closeOp(boolean abort) throws HiveException { + super.closeOp(abort); ptfInvocation.finishPartition(); ptfInvocation.close(); } - @Override - public void processOp(Object row, int tag) throws HiveException - { - if (!isMapOperator ) { + @Override + public void processOp(Object row, int tag) throws HiveException { + if (!isMapOperator ) { /* * checkif current row belongs to the current accumulated Partition: * - If not: @@ -129,51 +128,51 @@ public void processOp(Object row, int tag) throws HiveException } ptfInvocation.processRow(row); - } - - /** - * Initialize the visitor to use the QueryDefDeserializer Use the order - * defined in QueryDefWalker to visit the QueryDef - * - * @param hiveConf - * @throws HiveException - */ - protected void reconstructQueryDef(Configuration hiveConf) throws HiveException { - - PTFDeserializer dS = - new PTFDeserializer(conf, (StructObjectInspector)inputObjInspectors[0], hiveConf); - dS.initializePTFChain(conf.getFuncDef()); - } - - protected void setupKeysWrapper(ObjectInspector inputOI) throws HiveException { - PartitionDef pDef = conf.getStartOfChain().getPartition(); - List exprs = pDef.getExpressions(); - int numExprs = exprs.size(); - ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numExprs]; - ObjectInspector[] keyOIs = new ObjectInspector[numExprs]; - ObjectInspector[] currentKeyOIs = new ObjectInspector[numExprs]; - - for(int i=0; i exprs = pDef.getExpressions(); + int numExprs = exprs.size(); + ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numExprs]; + ObjectInspector[] keyOIs = new ObjectInspector[numExprs]; + ObjectInspector[] currentKeyOIs = new ObjectInspector[numExprs]; + + for(int i=0; i fnDefs = new Stack(); PTFInputDef iDef = conf.getFuncDef(); @@ -197,9 +196,9 @@ private PTFInvocation setupChain() { fnDefs.push((PartitionedTableFunctionDef) iDef); iDef = ((PartitionedTableFunctionDef) iDef).getInput(); } - + PTFInvocation curr = null, first = null; - + while(!fnDefs.isEmpty()) { PartitionedTableFunctionDef currFn = fnDefs.pop(); curr = new PTFInvocation(curr, currFn.getTFunction()); @@ -222,26 +221,26 @@ public static void connectLeadLagFunctionsToPartition(PTFDesc ptfDesc, llFn.setpItr(pItr); } } - + /* * Responsible for the flow of rows through the PTF Chain. - * An Invocation wraps a TableFunction. - * The PTFOp hands the chain each row through the processRow call. + * An Invocation wraps a TableFunction. + * The PTFOp hands the chain each row through the processRow call. * It also notifies the chain of when a Partition starts/finishes. - * + * * There are several combinations depending * whether the TableFunction and its successor support Streaming or Batch mode. - * + * * Combination 1: Streaming + Streaming * - Start Partition: invoke startPartition on tabFn. - * - Process Row: invoke process Row on tabFn. + * - Process Row: invoke process Row on tabFn. * Any output rows hand to next tabFn in chain or forward to next Operator. * - Finish Partition: invoke finishPartition on tabFn. * Any output rows hand to next tabFn in chain or forward to next Operator. - * + * * Combination 2: Streaming + Batch * same as Combination 1 - * + * * Combination 3: Batch + Batch * - Start Partition: create or reset the Input Partition for the tabFn * caveat is: if prev is also batch and it is not providing an Output Iterator @@ -251,22 +250,22 @@ public static void connectLeadLagFunctionsToPartition(PTFDesc ptfDesc, * If function gives an Output Partition: set it on next Invocation's Input Partition * If function gives an Output Iterator: iterate and call processRow on next Invocation. * For last Invocation in chain: forward rows to next Operator. - * + * * Combination 3: Batch + Stream * Similar to Combination 3, except Finish Partition behavior slightly different * - Finish Partition : invoke evaluate on tabFn on Input Partition * iterate output rows: hand to next tabFn in chain or forward to next Operator. - * + * */ class PTFInvocation { - + PTFInvocation prev; PTFInvocation next; TableFunctionEvaluator tabFn; PTFPartition inputPart; PTFPartition outputPart; Iterator outputPartRowsItr; - + public PTFInvocation(PTFInvocation prev, TableFunctionEvaluator tabFn) { this.prev = prev; this.tabFn = tabFn; @@ -274,19 +273,19 @@ public PTFInvocation(PTFInvocation prev, TableFunctionEvaluator tabFn) { prev.next = this; } } - + boolean isOutputIterator() { return tabFn.canAcceptInputAsStream() || tabFn.canIterateOutput(); } - + boolean isStreaming() { return tabFn.canAcceptInputAsStream(); } - + void initializeStreaming(Configuration cfg, boolean isMapSide) throws HiveException { PartitionedTableFunctionDef tabDef = tabFn.getTableDef(); PTFInputDef inputDef = tabDef.getInput(); - ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? + ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? inputObjInspectors[0] : inputDef.getOutputShape().getOI(); tabFn.initializeStreaming(cfg, (StructObjectInspector) inputOI, isMapSide); @@ -295,7 +294,7 @@ void initializeStreaming(Configuration cfg, boolean isMapSide) throws HiveExcept next.initializeStreaming(cfg, isMapSide); } } - + void startPartition() throws HiveException { if ( isStreaming() ) { tabFn.startPartition(); @@ -312,7 +311,7 @@ void startPartition() throws HiveException { next.startPartition(); } } - + void processRow(Object row) throws HiveException { if ( isStreaming() ) { handleOutputRows(tabFn.processRow(row)); @@ -320,7 +319,7 @@ void processRow(Object row) throws HiveException { inputPart.append(row); } } - + void handleOutputRows(List outRows) throws HiveException { if ( outRows != null ) { for (Object orow : outRows ) { @@ -332,7 +331,7 @@ void handleOutputRows(List outRows) throws HiveException { } } } - + void finishPartition() throws HiveException { if ( isStreaming() ) { handleOutputRows(tabFn.finishPartition()); @@ -353,7 +352,7 @@ void finishPartition() throws HiveException { } } } - + if ( next != null ) { next.finishPartition(); } else { @@ -364,7 +363,7 @@ void finishPartition() throws HiveException { } } } - + /** * Create a new Partition. * A partition has 2 OIs: the OI for the rows being put in and the OI for the rows @@ -388,7 +387,7 @@ void finishPartition() throws HiveException { private void createInputPartition() throws HiveException { PartitionedTableFunctionDef tabDef = tabFn.getTableDef(); PTFInputDef inputDef = tabDef.getInput(); - ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? + ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? inputObjInspectors[0] : inputDef.getOutputShape().getOI(); SerDe serde = conf.isMapSide() ? tabDef.getInput().getOutputShape().getSerde() : @@ -400,7 +399,7 @@ private void createInputPartition() throws HiveException { (StructObjectInspector) inputOI, outputOI); } - + void close() { if ( inputPart != null ) { inputPart.close(); @@ -411,5 +410,5 @@ void close() { } } } - + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionTableFunctionDescription.java ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionTableFunctionDescription.java index bd48531..463a148 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionTableFunctionDescription.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionTableFunctionDescription.java @@ -27,14 +27,13 @@ import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction; @Retention(RetentionPolicy.RUNTIME) -@Target({ElementType.TYPE}) +@Target(ElementType.TYPE) @Documented -public @interface PartitionTableFunctionDescription -{ - Description description (); +public @interface PartitionTableFunctionDescription { + Description description (); - /** - * if true it is not usable in the language. {@link WindowingTableFunction} is the only internal function. - */ - boolean isInternal() default false; + /** + * if true it is not usable in the language. {@link WindowingTableFunction} is the only internal function. + */ + boolean isInternal() default false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 70047a2..9f727c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1363,8 +1363,8 @@ public static String getFileExtension(JobConf jc, boolean isCompressed, codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc); } - return (SequenceFile.createWriter(fs, jc, file, keyClass, valClass, compressionType, codec, - progressable)); + return SequenceFile.createWriter(fs, jc, file, keyClass, valClass, compressionType, codec, + progressable); } @@ -3525,7 +3525,7 @@ public static boolean createDirsWithPermission(Configuration conf, Path mkdir, return createDirsWithPermission(conf, mkdir, fsPermission, recursive); } - private static void resetConfAndCloseFS (Configuration conf, boolean unsetUmask, + private static void resetConfAndCloseFS (Configuration conf, boolean unsetUmask, String origUmask, FileSystem fs) throws IOException { if (unsetUmask) { if (origUmask != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java index f61eab4..b2be226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java @@ -28,39 +28,38 @@ import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction; @Retention(RetentionPolicy.RUNTIME) -@Target({ElementType.TYPE}) +@Target(ElementType.TYPE) @Documented -public @interface WindowFunctionDescription -{ - Description description (); - /** - * controls whether this function can be applied to a Window. - *

- * Ranking function: Rank, Dense_Rank, Percent_Rank and Cume_Dist don't operate on Windows. - * Why? a window specification implies a row specific range i.e. every row gets its own set of rows to process the UDAF on. - * For ranking defining a set of rows for every row makes no sense. - *

- * All other UDAFs can be computed for a Window. - */ - boolean supportsWindow() default true; - /** - * A WindowFunc is implemented as {@link GenericUDAFResolver2}. It returns only one value. - * If this is true then the function must return a List which is taken to be the column for this function in the Output table returned by the - * {@link WindowingTableFunction}. Otherwise the output is assumed to be a single value, the column of the Output will contain the same value - * for all the rows. - */ - boolean pivotResult() default false; +public @interface WindowFunctionDescription { + Description description (); + /** + * controls whether this function can be applied to a Window. + *

+ * Ranking function: Rank, Dense_Rank, Percent_Rank and Cume_Dist don't operate on Windows. + * Why? a window specification implies a row specific range i.e. every row gets its own set of rows to process the UDAF on. + * For ranking defining a set of rows for every row makes no sense. + *

+ * All other UDAFs can be computed for a Window. + */ + boolean supportsWindow() default true; + /** + * A WindowFunc is implemented as {@link GenericUDAFResolver2}. It returns only one value. + * If this is true then the function must return a List which is taken to be the column for this function in the Output table returned by the + * {@link WindowingTableFunction}. Otherwise the output is assumed to be a single value, the column of the Output will contain the same value + * for all the rows. + */ + boolean pivotResult() default false; - /** - * Used in translations process to validate arguments - * @return true if ranking function - */ - boolean rankingFunction() default false; + /** + * Used in translations process to validate arguments + * @return true if ranking function + */ + boolean rankingFunction() default false; - /** - * Using in analytical functions to specify that UDF implies an ordering - * @return true if the function implies order - */ - boolean impliesOrder() default false; + /** + * Using in analytical functions to specify that UDF implies an ordering + * @return true if the function implies order + */ + boolean impliesOrder() default false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java index 9b2d787..efecb05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java @@ -22,45 +22,39 @@ import org.apache.hive.common.util.AnnotationUtils; @SuppressWarnings("deprecation") -public class WindowFunctionInfo implements CommonFunctionInfo -{ - boolean supportsWindow = true; - boolean pivotResult = false; - boolean impliesOrder = false; - FunctionInfo fInfo; - - WindowFunctionInfo(FunctionInfo fInfo) - { - assert fInfo.isGenericUDAF(); - this.fInfo = fInfo; - Class wfnCls = fInfo.getGenericUDAFResolver().getClass(); - WindowFunctionDescription def = +public class WindowFunctionInfo implements CommonFunctionInfo { + boolean supportsWindow = true; + boolean pivotResult = false; + boolean impliesOrder = false; + FunctionInfo fInfo; + + WindowFunctionInfo(FunctionInfo fInfo) { + assert fInfo.isGenericUDAF(); + this.fInfo = fInfo; + Class wfnCls = fInfo.getGenericUDAFResolver().getClass(); + WindowFunctionDescription def = AnnotationUtils.getAnnotation(wfnCls, WindowFunctionDescription.class); - if ( def != null) - { - supportsWindow = def.supportsWindow(); - pivotResult = def.pivotResult(); - impliesOrder = def.impliesOrder(); - } - } + if ( def != null) { + supportsWindow = def.supportsWindow(); + pivotResult = def.pivotResult(); + impliesOrder = def.impliesOrder(); + } + } - public boolean isSupportsWindow() - { - return supportsWindow; - } + public boolean isSupportsWindow() { + return supportsWindow; + } - public boolean isPivotResult() - { - return pivotResult; - } + public boolean isPivotResult() { + return pivotResult; + } - public boolean isImpliesOrder(){ - return impliesOrder; - } - public FunctionInfo getfInfo() - { - return fInfo; - } + public boolean isImpliesOrder() { + return impliesOrder; + } + public FunctionInfo getfInfo() { + return fInfo; + } @Override public Class getFunctionClass() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mapjoin/MapJoinMemoryExhaustionHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/mapjoin/MapJoinMemoryExhaustionHandler.java index 905e65e..547c2bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mapjoin/MapJoinMemoryExhaustionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mapjoin/MapJoinMemoryExhaustionHandler.java @@ -63,7 +63,7 @@ public MapJoinMemoryExhaustionHandler(LogHelper console, double maxMemoryUsage) if(maxHeapSize == -1) { this.maxHeapSize = 200L * 1024L * 1024L; LOG.warn("MemoryMXBean.getHeapMemoryUsage().getMax() returned -1, " + - "defaulting maxHeapSize to 200MB"); + "defaulting maxHeapSize to 200MB"); } else { this.maxHeapSize = maxHeapSize; } @@ -91,4 +91,4 @@ public void checkMemoryStatus(long tableContainerSize, long numRows) throw new MapJoinMemoryExhaustionException(msg); } } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java index a22c3c2..da36848 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java @@ -32,7 +32,7 @@ @SuppressWarnings("deprecation") public class MapJoinTableContainerSerDe { - + private final MapJoinObjectSerDeContext keyContext; private final MapJoinObjectSerDeContext valueContext; public MapJoinTableContainerSerDe(MapJoinObjectSerDeContext keyContext, @@ -70,7 +70,7 @@ public MapJoinPersistableTableContainer load(ObjectInputStream in) } try { Writable keyContainer = keySerDe.getSerializedClass().newInstance(); - Writable valueContainer = valueSerDe.getSerializedClass().newInstance(); + Writable valueContainer = valueSerDe.getSerializedClass().newInstance(); int numKeys = in.readInt(); for (int keyIndex = 0; keyIndex < numKeys; keyIndex++) { MapJoinKeyObject key = new MapJoinKeyObject(); @@ -89,7 +89,7 @@ public MapJoinPersistableTableContainer load(ObjectInputStream in) public void persist(ObjectOutputStream out, MapJoinPersistableTableContainer tableContainer) throws HiveException { int numKeys = tableContainer.size(); - try { + try { out.writeUTF(tableContainer.getClass().getName()); out.writeObject(tableContainer.getMetaData()); out.writeInt(numKeys); @@ -108,7 +108,7 @@ public void persist(ObjectOutputStream out, MapJoinPersistableTableContainer tab throw new ConcurrentModificationException("TableContainer was modified while persisting: " + tableContainer); } } - + public static void persistDummyTable(ObjectOutputStream out) throws IOException { MapJoinPersistableTableContainer tableContainer = new HashMapWrapper(); out.writeUTF(tableContainer.getClass().getName()); @@ -127,8 +127,8 @@ private MapJoinPersistableTableContainer create( return constructor.newInstance(metaData); } catch (Exception e) { String msg = "Error while attemping to create table container" + - " of type: " + name + ", with metaData: " + metaData; + " of type: " + name + ", with metaData: " + metaData; throw new HiveException(msg, e); } } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java index 360096b..d2bfea6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/PTFRowContainer.java @@ -270,7 +270,7 @@ public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, FileSystem fs = finalOutPath.getFileSystem(jc); final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath, - BytesWritable.class, valueClass, isCompressed, progress); + BytesWritable.class, valueClass, isCompressed, progress); return new PTFRecordWriter(outStream); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 5f6df78..4692e33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -146,7 +146,7 @@ protected Object clone() { duplicateTo(clone); return clone; } - + public void duplicateTo(VectorHashKeyWrapper clone) { clone.longValues = longValues.clone(); clone.doubleValues = doubleValues.clone(); @@ -155,7 +155,7 @@ public void duplicateTo(VectorHashKeyWrapper clone) { // Decimal128 requires deep clone clone.decimalValues = new Decimal128[decimalValues.length]; for(int i = 0; i < decimalValues.length; ++i) { - clone.decimalValues[i] = new Decimal128().update(decimalValues[i]); + clone.decimalValues[i] = new Decimal128().update(decimalValues[i]); } clone.byteValues = new byte[byteValues.length][]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index a78c396..57f4682 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -325,11 +325,11 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th childExpressions, mode, exprDesc.getTypeInfo()); } } else if (exprDesc instanceof ExprNodeNullDesc) { - ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode); + ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode); } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); - } + } if (ve == null) { throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); } @@ -413,8 +413,8 @@ private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, List evaluator = ExprNodeEvaluatorFactory.get(exprDesc); - ObjectInspector output = evaluator.initialize(childoi); - Object constant = evaluator.evaluate(null); - Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); - return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java); - } - - return exprDesc; - } - + if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) { + return exprDesc; + } + + if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) { + return exprDesc; + } + + ExprNodeConstantDesc foldedChild = null; + if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) { + + // try recursive folding + ExprNodeDesc expr = evaluateCastOnConstants(exprDesc.getChildren().get(0)); + if (expr instanceof ExprNodeConstantDesc) { + foldedChild = (ExprNodeConstantDesc) expr; + } + } else { + foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0); + } + + if (foldedChild == null) { + return exprDesc; + } + + ObjectInspector childoi = foldedChild.getWritableObjectInspector(); + GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF(); + + // Only evaluate +ve/-ve or cast on constant or recursive casting. + if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive || + castExpressionUdfs.contains(gudf.getClass()) + || ((gudf instanceof GenericUDFBridge) + && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) { + ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc); + ObjectInspector output = evaluator.initialize(childoi); + Object constant = evaluator.evaluate(null); + Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); + return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java); + } + + return exprDesc; + } + /* For cast on constant operator in all members of the input list and return new list * containing results. */ private List evaluateCastOnConstants(List childExpr) - throws HiveException { - List evaluatedChildren = new ArrayList(); - if (childExpr != null) { + throws HiveException { + List evaluatedChildren = new ArrayList(); + if (childExpr != null) { for (ExprNodeDesc expr : childExpr) { - expr = this.evaluateCastOnConstants(expr); - evaluatedChildren.add(expr); + expr = this.evaluateCastOnConstants(expr); + evaluatedChildren.add(expr); } - } - return evaluatedChildren; + } + return evaluatedChildren; } - + private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo, Mode mode) throws HiveException { String type = typeInfo.getTypeName(); @@ -728,7 +728,7 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI outCol = ocm.allocateOutputColumn(colVectorType); } if (constantValue == null) { - return new ConstantVectorExpression(outCol, type, true); + return new ConstantVectorExpression(outCol, type, true); } else if (decimalTypePattern.matcher(type).matches()) { VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue); ve.setOutputType(typeInfo.getTypeName()); @@ -907,9 +907,9 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List childExpr, Mode mode, TypeInfo returnType) throws HiveException { - List castedChildren = evaluateCastOnConstants(childExpr); - childExpr = castedChildren; - + List castedChildren = evaluateCastOnConstants(childExpr); + childExpr = castedChildren; + //First handle special cases if (udf instanceof GenericUDFBetween) { return getBetweenFilterExpression(childExpr, mode, returnType); @@ -933,8 +933,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, } } else if (udf instanceof GenericUDFToDecimal) { return getCastToDecimal(childExpr, returnType); - } - + } + // Now do a general lookup Class udfClass = udf.getClass(); if (udf instanceof GenericUDFBridge) { @@ -1003,7 +1003,7 @@ private VectorExpression getEltExpression(List childExpr, TypeInfo } } } - + /** * Create a filter or boolean-valued expression for column IN ( ) */ @@ -1014,8 +1014,8 @@ private VectorExpression getInExpression(List childExpr, Mode mode String colType = colExpr.getTypeString(); // prepare arguments for createVectorExpression - List childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size())); - + List childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size())); + /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere, * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined. * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued @@ -1110,105 +1110,105 @@ private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge ud return getCastToString(childExpr, returnType); } return null; - } - + } + private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - Decimal128 d = new Decimal128(); - int scale = HiveDecimalUtils.getScaleForType(ptinfo); - switch (ptinfo.getPrimitiveCategory()) { - case FLOAT: - float floatVal = ((Float) scalar).floatValue(); - d.update(floatVal, (short) scale); - break; - case DOUBLE: - double doubleVal = ((Double) scalar).doubleValue(); - d.update(doubleVal, (short) scale); - break; - case BYTE: - byte byteVal = ((Byte) scalar).byteValue(); - d.update(byteVal, (short) scale); - break; - case SHORT: - short shortVal = ((Short) scalar).shortValue(); - d.update(shortVal, (short) scale); - break; - case INT: - int intVal = ((Integer) scalar).intValue(); - d.update(intVal, (short) scale); - break; - case LONG: - long longVal = ((Long) scalar).longValue(); - d.update(longVal, (short) scale); - break; - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - d.update(decimalVal.unscaledValue(), (short) scale); - break; - default: - throw new HiveException("Unsupported type "+typename+" for cast to Decimal128"); - } - return d; + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + Decimal128 d = new Decimal128(); + int scale = HiveDecimalUtils.getScaleForType(ptinfo); + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + float floatVal = ((Float) scalar).floatValue(); + d.update(floatVal, (short) scale); + break; + case DOUBLE: + double doubleVal = ((Double) scalar).doubleValue(); + d.update(doubleVal, (short) scale); + break; + case BYTE: + byte byteVal = ((Byte) scalar).byteValue(); + d.update(byteVal, (short) scale); + break; + case SHORT: + short shortVal = ((Short) scalar).shortValue(); + d.update(shortVal, (short) scale); + break; + case INT: + int intVal = ((Integer) scalar).intValue(); + d.update(intVal, (short) scale); + break; + case LONG: + long longVal = ((Long) scalar).longValue(); + d.update(longVal, (short) scale); + break; + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + d.update(decimalVal.unscaledValue(), (short) scale); + break; + default: + throw new HiveException("Unsupported type "+typename+" for cast to Decimal128"); + } + return d; } private String castConstantToString(Object scalar, TypeInfo type) throws HiveException { - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).toString(); - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - return decimalVal.toString(); - default: - throw new HiveException("Unsupported type "+typename+" for cast to String"); - } + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + case DOUBLE: + case BYTE: + case SHORT: + case INT: + case LONG: + return ((Number) scalar).toString(); + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + return decimalVal.toString(); + default: + throw new HiveException("Unsupported type "+typename+" for cast to String"); + } } private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException { - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).doubleValue(); - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - return decimalVal.doubleValue(); - default: - throw new HiveException("Unsupported type "+typename+" for cast to Double"); - } - } + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + case DOUBLE: + case BYTE: + case SHORT: + case INT: + case LONG: + return ((Number) scalar).doubleValue(); + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + return decimalVal.doubleValue(); + default: + throw new HiveException("Unsupported type "+typename+" for cast to Double"); + } + } private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException { - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).longValue(); - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - return decimalVal.longValue(); - default: - throw new HiveException("Unsupported type "+typename+" for cast to Long"); - } - } - + PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; + String typename = type.getTypeName(); + switch (ptinfo.getPrimitiveCategory()) { + case FLOAT: + case DOUBLE: + case BYTE: + case SHORT: + case INT: + case LONG: + return ((Number) scalar).longValue(); + case DECIMAL: + HiveDecimal decimalVal = (HiveDecimal) scalar; + return decimalVal.longValue(); + default: + throw new HiveException("Unsupported type "+typename+" for cast to Long"); + } + } + private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType) throws HiveException { ExprNodeDesc child = childExpr.get(0); @@ -1217,9 +1217,9 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo // Return a constant vector expression Object constantValue = ((ExprNodeConstantDesc) child).getValue(); Decimal128 decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION); + return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION); } else if (child instanceof ExprNodeNullDesc) { - return getConstantVectorExpression(null, returnType, Mode.PROJECTION); + return getConstantVectorExpression(null, returnType, Mode.PROJECTION); } if (isIntFamily(inputType)) { return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType); @@ -1234,8 +1234,8 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType); } throw new HiveException("Unhandled cast input type: " + inputType); - } - + } + private VectorExpression getCastToString(List childExpr, TypeInfo returnType) throws HiveException { ExprNodeDesc child = childExpr.get(0); @@ -1244,9 +1244,9 @@ private VectorExpression getCastToString(List childExpr, TypeInfo // Return a constant vector expression Object constantValue = ((ExprNodeConstantDesc) child).getValue(); String strValue = castConstantToString(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION); + return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION); } else if (child instanceof ExprNodeNullDesc) { - return getConstantVectorExpression(null, returnType, Mode.PROJECTION); + return getConstantVectorExpression(null, returnType, Mode.PROJECTION); } if (inputType.equals("boolean")) { // Boolean must come before the integer family. It's a special case. @@ -1273,9 +1273,9 @@ private VectorExpression getCastToDoubleExpression(Class udf, List udf, List childExpr) // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. return null; } else if (child instanceof ExprNodeNullDesc) { - return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); + return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); } // Long and double are handled using descriptors, string needs to be specially handled. if (inputType.equals("string")) { @@ -1329,9 +1329,9 @@ private VectorExpression getCastToLongExpression(List childExpr) // Return a constant vector expression Object constantValue = ((ExprNodeConstantDesc) child).getValue(); Long longValue = castConstantToLong(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); + return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); } else if (child instanceof ExprNodeNullDesc) { - return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); + return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); } // Float family, timestamp are handled via descriptor based lookup, int family needs // special handling. @@ -1519,11 +1519,11 @@ public static boolean isDatetimeFamily(String resultType) { public static boolean isTimestampFamily(String resultType) { return resultType.equalsIgnoreCase("timestamp"); } - + public static boolean isDateFamily(String resultType) { return resultType.equalsIgnoreCase("date"); } - + // return true if this is any kind of float public static boolean isFloatFamily(String resultType) { return resultType.equalsIgnoreCase("double") diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index f521b84..4b46162 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -75,23 +75,23 @@ public ConstantVectorExpression(int outputColumn, Decimal128 value) { this(outputColumn, "decimal"); setDecimalValue(value); } - + /* * Support for null constant object */ public ConstantVectorExpression(int outputColumn, String typeString, boolean isNull) { - this(outputColumn, typeString); - isNullValue = isNull; + this(outputColumn, typeString); + isNullValue = isNull; } - + private void evaluateLong(VectorizedRowBatch vrg) { LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumn]; cv.isRepeating = true; cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = longValue; + cv.vector[0] = longValue; } else { - cv.isNull[0] = true; + cv.isNull[0] = true; } } @@ -100,10 +100,10 @@ private void evaluateDouble(VectorizedRowBatch vrg) { cv.isRepeating = true; cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = doubleValue; + cv.vector[0] = doubleValue; } else { - cv.isNull[0] = true; - } + cv.isNull[0] = true; + } } private void evaluateBytes(VectorizedRowBatch vrg) { @@ -112,9 +112,9 @@ private void evaluateBytes(VectorizedRowBatch vrg) { cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { - cv.setVal(0, bytesValue, 0, bytesValueLength); + cv.setVal(0, bytesValue, 0, bytesValueLength); } else { - cv.isNull[0] = true; + cv.isNull[0] = true; } } @@ -123,9 +123,9 @@ private void evaluateDecimal(VectorizedRowBatch vrg) { dcv.isRepeating = true; dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].update(decimalValue); + dcv.vector[0].update(decimalValue); } else { - dcv.isNull[0] = true; + dcv.isNull[0] = true; } } @@ -194,7 +194,7 @@ public void setTypeString(String typeString) { } else if (VectorizationContext.isDecimalFamily(typeString)){ this.type = Type.DECIMAL; } else { - // everything else that does not belong to string, double, decimal is treated as long. + // everything else that does not belong to string, double, decimal is treated as long. this.type = Type.LONG; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java index d4bf622..56e0d39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java @@ -33,17 +33,17 @@ public IdentityExpression() { } - public IdentityExpression(int colNum, String type) { - this.colNum = colNum; + public IdentityExpression(int colNum, String type) { + this.colNum = colNum; this.type = type; - } + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { this.evaluateChildren(batch); } - } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index c2bc012..aea1877 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -82,7 +82,7 @@ public VectorExpressionWriter init(ObjectInspector objectInspector) throws HiveE this.objectInspector = objectInspector; return this; } - + /** * The base implementation must be overridden by the Long specialization */ @@ -90,7 +90,7 @@ public VectorExpressionWriter init(ObjectInspector objectInspector) throws HiveE public Object writeValue(long value) throws HiveException { throw new HiveException("Internal error: should not reach here"); } - + /** * The base implementation must be overridden by the Long specialization */ @@ -112,7 +112,7 @@ public Object writeValue(double value) throws HiveException { public Object setValue(Object field, double value) throws HiveException { throw new HiveException("Internal error: should not reach here"); } - + /** * The base implementation must be overridden by the Bytes specialization */ @@ -120,7 +120,7 @@ public Object setValue(Object field, double value) throws HiveException { public Object writeValue(byte[] value, int start, int length) throws HiveException { throw new HiveException("Internal error: should not reach here"); } - + /** * The base implementation must be overridden by the Bytes specialization */ @@ -171,7 +171,7 @@ public Object writeValue(ColumnVector column, int row) throws HiveException { "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, lcv.noNulls, lcv.isRepeating, lcv.isNull[row], lcv.isNull[0])); } - + @Override public Object setValue(Object field, ColumnVector column, int row) throws HiveException { LongColumnVector lcv = (LongColumnVector) column; @@ -192,7 +192,7 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx String.format( "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, lcv.noNulls, lcv.isRepeating, lcv.isNull[row], lcv.isNull[0])); - } + } } /** @@ -221,7 +221,7 @@ public Object writeValue(ColumnVector column, int row) throws HiveException { "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); } - + @Override public Object setValue(Object field, ColumnVector column, int row) throws HiveException { DoubleColumnVector dcv = (DoubleColumnVector) column; @@ -242,7 +242,7 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx String.format( "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); - } + } } /** @@ -292,7 +292,7 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx String.format( "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, bcv.noNulls, bcv.isRepeating, bcv.isNull[row], bcv.isNull[0])); - } + } } @@ -396,7 +396,7 @@ public static VectorExpressionWriter genVectorExpressionWritable( (SettableLongObjectInspector) fieldObjInspector); case VOID: return genVectorExpressionWritableVoid( - (VoidObjectInspector) fieldObjInspector); + (VoidObjectInspector) fieldObjInspector); case BINARY: return genVectorExpressionWritableBinary( (SettableBinaryObjectInspector) fieldObjInspector); @@ -419,7 +419,7 @@ public static VectorExpressionWriter genVectorExpressionWritable( throw new IllegalArgumentException("Unknown primitive type: " + ((PrimitiveObjectInspector) fieldObjInspector).getPrimitiveCategory()); } - + case STRUCT: case UNION: case MAP: @@ -428,7 +428,7 @@ public static VectorExpressionWriter genVectorExpressionWritable( fieldObjInspector.getCategory()); default: throw new IllegalArgumentException("Unknown type " + - fieldObjInspector.getCategory()); + fieldObjInspector.getCategory()); } } @@ -526,7 +526,7 @@ private static VectorExpressionWriter genVectorExpressionWritableTimestamp( private Object obj; private Timestamp ts; - public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) + public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) throws HiveException { super.init(objInspector); ts = new Timestamp(0); @@ -550,7 +550,7 @@ public Object setValue(Object field, long value) { ((SettableTimestampObjectInspector) this.objectInspector).set(field, ts); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableTimestampObjectInspector) this.objectInspector).create(new Timestamp(0)); @@ -563,15 +563,15 @@ private static VectorExpressionWriter genVectorExpressionWritableVarchar( return new VectorExpressionWriterBytes() { private Object obj; private Text text; - - public VectorExpressionWriter init(SettableHiveVarcharObjectInspector objInspector) + + public VectorExpressionWriter init(SettableHiveVarcharObjectInspector objInspector) throws HiveException { super.init(objInspector); this.text = new Text(); this.obj = initValue(null); return this; } - + @Override public Object writeValue(byte[] value, int start, int length) throws HiveException { text.set(value, start, length); @@ -580,7 +580,7 @@ public Object writeValue(byte[] value, int start, int length) throws HiveExcepti } @Override - public Object setValue(Object field, byte[] value, int start, int length) + public Object setValue(Object field, byte[] value, int start, int length) throws HiveException { if (null == field) { field = initValue(null); @@ -589,7 +589,7 @@ public Object setValue(Object field, byte[] value, int start, int length) ((SettableHiveVarcharObjectInspector) this.objectInspector).set(field, text.toString()); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableHiveVarcharObjectInspector) this.objectInspector) @@ -603,24 +603,24 @@ private static VectorExpressionWriter genVectorExpressionWritableString( return new VectorExpressionWriterBytes() { private Object obj; private Text text; - - public VectorExpressionWriter init(SettableStringObjectInspector objInspector) + + public VectorExpressionWriter init(SettableStringObjectInspector objInspector) throws HiveException { super.init(objInspector); this.text = new Text(); this.obj = initValue(null); return this; } - + @Override public Object writeValue(byte[] value, int start, int length) throws HiveException { this.text.set(value, start, length); ((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text.toString()); return this.obj; } - + @Override - public Object setValue(Object field, byte[] value, int start, int length) + public Object setValue(Object field, byte[] value, int start, int length) throws HiveException { if (null == field) { field = initValue(null); @@ -628,12 +628,12 @@ public Object setValue(Object field, byte[] value, int start, int length) this.text.set(value, start, length); ((SettableStringObjectInspector) this.objectInspector).set(field, this.text.toString()); return field; - } - + } + @Override public Object initValue(Object ignored) { return ((SettableStringObjectInspector) this.objectInspector).create(StringUtils.EMPTY); - } + } }.init(fieldObjInspector); } @@ -642,22 +642,22 @@ private static VectorExpressionWriter genVectorExpressionWritableBinary( return new VectorExpressionWriterBytes() { private Object obj; private byte[] bytes; - - public VectorExpressionWriter init(SettableBinaryObjectInspector objInspector) + + public VectorExpressionWriter init(SettableBinaryObjectInspector objInspector) throws HiveException { super.init(objInspector); this.bytes = ArrayUtils.EMPTY_BYTE_ARRAY; this.obj = initValue(null); return this; } - + @Override public Object writeValue(byte[] value, int start, int length) throws HiveException { bytes = Arrays.copyOfRange(value, start, start + length); ((SettableBinaryObjectInspector) this.objectInspector).set(this.obj, bytes); return this.obj; } - + @Override public Object setValue(Object field, byte[] value, int start, int length) throws HiveException { if (null == field) { @@ -666,7 +666,7 @@ public Object setValue(Object field, byte[] value, int start, int length) throws bytes = Arrays.copyOfRange(value, start, start + length); ((SettableBinaryObjectInspector) this.objectInspector).set(field, bytes); return field; - } + } @Override public Object initValue(Object ignored) { @@ -680,20 +680,20 @@ private static VectorExpressionWriter genVectorExpressionWritableLong( SettableLongObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterLong() { private Object obj; - - public VectorExpressionWriter init(SettableLongObjectInspector objInspector) + + public VectorExpressionWriter init(SettableLongObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(long value) throws HiveException { ((SettableLongObjectInspector) this.objectInspector).set(this.obj, value); return this.obj; } - + @Override public Object setValue(Object field, long value) throws HiveException { if (null == field) { @@ -712,56 +712,55 @@ public Object initValue(Object ignored) { } private static VectorExpressionWriter genVectorExpressionWritableVoid( - VoidObjectInspector fieldObjInspector) throws HiveException { - return new VectorExpressionWriterLong() { - private Object obj; - - public VectorExpressionWriter init(VoidObjectInspector objInspector) - throws HiveException { - super.init(objInspector); - this.obj = initValue(null); - return this; - } - - @Override - public Object writeValue(long value) throws HiveException { - return this.obj; - } - - @Override - public Object setValue(Object field, long value) throws HiveException { - if (null == field) { - field = initValue(null); - } - return field; - } - - @Override - public Object initValue(Object ignored) { - return ((VoidObjectInspector) this.objectInspector).copyObject(null); - } - }.init(fieldObjInspector); - } - - + VoidObjectInspector fieldObjInspector) throws HiveException { + return new VectorExpressionWriterLong() { + private Object obj; + + public VectorExpressionWriter init(VoidObjectInspector objInspector) throws HiveException { + super.init(objInspector); + this.obj = initValue(null); + return this; + } + + @Override + public Object writeValue(long value) throws HiveException { + return this.obj; + } + + @Override + public Object setValue(Object field, long value) throws HiveException { + if (null == field) { + field = initValue(null); + } + return field; + } + + @Override + public Object initValue(Object ignored) { + return ((VoidObjectInspector) this.objectInspector).copyObject(null); + } + }.init(fieldObjInspector); + } + + private static VectorExpressionWriter genVectorExpressionWritableInt( SettableIntObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterLong() { private Object obj; - - public VectorExpressionWriter init(SettableIntObjectInspector objInspector) + + public VectorExpressionWriter init(SettableIntObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(long value) throws HiveException { ((SettableIntObjectInspector) this.objectInspector).set(this.obj, (int) value); return this.obj; } - + @Override public Object setValue(Object field, long value) throws HiveException { if (null == field) { @@ -770,7 +769,7 @@ public Object setValue(Object field, long value) throws HiveException { ((SettableIntObjectInspector) this.objectInspector).set(field, (int) value); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableIntObjectInspector) this.objectInspector) @@ -783,20 +782,20 @@ private static VectorExpressionWriter genVectorExpressionWritableShort( SettableShortObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterLong() { private Object obj; - - public VectorExpressionWriter init(SettableShortObjectInspector objInspector) + + public VectorExpressionWriter init(SettableShortObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(long value) throws HiveException { ((SettableShortObjectInspector) this.objectInspector).set(this.obj, (short) value); return this.obj; } - + @Override public Object setValue(Object field, long value) throws HiveException { if (null == field) { @@ -805,7 +804,7 @@ public Object setValue(Object field, long value) throws HiveException { ((SettableShortObjectInspector) this.objectInspector).set(field, (short) value); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableShortObjectInspector) this.objectInspector) @@ -818,20 +817,20 @@ private static VectorExpressionWriter genVectorExpressionWritableByte( SettableByteObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterLong() { private Object obj; - - public VectorExpressionWriter init(SettableByteObjectInspector objInspector) + + public VectorExpressionWriter init(SettableByteObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(long value) throws HiveException { ((SettableByteObjectInspector) this.objectInspector).set(this.obj, (byte) value); return this.obj; } - + @Override public Object setValue(Object field, long value) throws HiveException { if (null == field) { @@ -840,7 +839,7 @@ public Object setValue(Object field, long value) throws HiveException { ((SettableByteObjectInspector) this.objectInspector).set(field, (byte) value); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableByteObjectInspector) this.objectInspector) @@ -853,31 +852,31 @@ private static VectorExpressionWriter genVectorExpressionWritableBoolean( SettableBooleanObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterLong() { private Object obj; - - public VectorExpressionWriter init(SettableBooleanObjectInspector objInspector) + + public VectorExpressionWriter init(SettableBooleanObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(long value) throws HiveException { - ((SettableBooleanObjectInspector) this.objectInspector).set(this.obj, + ((SettableBooleanObjectInspector) this.objectInspector).set(this.obj, value == 0 ? false : true); return this.obj; } - + @Override public Object setValue(Object field, long value) throws HiveException { if (null == field) { field = initValue(null); } - ((SettableBooleanObjectInspector) this.objectInspector).set(field, + ((SettableBooleanObjectInspector) this.objectInspector).set(field, value == 0 ? false : true); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableBooleanObjectInspector) this.objectInspector) @@ -890,20 +889,20 @@ private static VectorExpressionWriter genVectorExpressionWritableDouble( SettableDoubleObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterDouble() { private Object obj; - - public VectorExpressionWriter init(SettableDoubleObjectInspector objInspector) + + public VectorExpressionWriter init(SettableDoubleObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(double value) throws HiveException { ((SettableDoubleObjectInspector) this.objectInspector).set(this.obj, value); return this.obj; } - + @Override public Object setValue(Object field, double value) throws HiveException { if (null == field) { @@ -911,8 +910,8 @@ public Object setValue(Object field, double value) throws HiveException { } ((SettableDoubleObjectInspector) this.objectInspector).set(field, value); return field; - } - + } + @Override public Object initValue(Object ignored) { return ((SettableDoubleObjectInspector) this.objectInspector) @@ -925,20 +924,20 @@ private static VectorExpressionWriter genVectorExpressionWritableFloat( SettableFloatObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterDouble() { private Object obj; - - public VectorExpressionWriter init(SettableFloatObjectInspector objInspector) + + public VectorExpressionWriter init(SettableFloatObjectInspector objInspector) throws HiveException { super.init(objInspector); this.obj = initValue(null); return this; } - + @Override public Object writeValue(double value) throws HiveException { ((SettableFloatObjectInspector) this.objectInspector).set(this.obj, (float) value); return this.obj; } - + @Override public Object setValue(Object field, double value) throws HiveException { if (null == field) { @@ -947,7 +946,7 @@ public Object setValue(Object field, double value) throws HiveException { ((SettableFloatObjectInspector) this.objectInspector).set(field, (float) value); return field; } - + @Override public Object initValue(Object ignored) { return ((SettableFloatObjectInspector) this.objectInspector) @@ -1027,25 +1026,25 @@ public static void processVectorExpressions( */ public static VectorExpressionWriter[] getExpressionWriters(StructObjectInspector objInspector) throws HiveException { - + if (objInspector.isSettable()) { return getSettableExpressionWriters((SettableStructObjectInspector) objInspector); } - + List allFieldRefs = objInspector.getAllStructFieldRefs(); - + VectorExpressionWriter[] expressionWriters = new VectorExpressionWriter[allFieldRefs.size()]; - + for(int i=0; i fieldsRef = objInspector.getAllStructFieldRefs(); + List fieldsRef = objInspector.getAllStructFieldRefs(); VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()]; for(int i=0; i cache; public RCFileSyncCache() { - cache = Collections.synchronizedMap(new WeakHashMap()); + cache = Collections.synchronizedMap(new WeakHashMap()); } public void put(FileSplit split, long endSync) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 2fcc207..9820cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -910,31 +910,31 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, static List generateSplitsInfo(Configuration conf) throws IOException { - // use threads to resolve directories into splits - Context context = new Context(conf); - for(Path dir: getInputPaths(conf)) { - FileSystem fs = dir.getFileSystem(conf); - context.schedule(new FileGenerator(context, fs, dir)); - } - context.waitForTasks(); - // deal with exceptions - if (!context.errors.isEmpty()) { - List errors = - new ArrayList(context.errors.size()); - for(Throwable th: context.errors) { - if (th instanceof IOException) { - errors.add((IOException) th); - } else { - throw new RuntimeException("serious problem", th); - } - } - throw new InvalidInputException(errors); - } + // use threads to resolve directories into splits + Context context = new Context(conf); + for(Path dir: getInputPaths(conf)) { + FileSystem fs = dir.getFileSystem(conf); + context.schedule(new FileGenerator(context, fs, dir)); + } + context.waitForTasks(); + // deal with exceptions + if (!context.errors.isEmpty()) { + List errors = + new ArrayList(context.errors.size()); + for(Throwable th: context.errors) { + if (th instanceof IOException) { + errors.add((IOException) th); + } else { + throw new RuntimeException("serious problem", th); + } + } + throw new InvalidInputException(errors); + } if (context.cacheStripeDetails) { LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get()); } - return context.splits; + return context.splits; } @Override @@ -998,14 +998,14 @@ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, ((FileSplit) inputSplit).getPath(), OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit); } - + OrcSplit split = (OrcSplit) inputSplit; reporter.setStatus(inputSplit.toString()); Options options = new Options(conf).reporter(reporter); final RowReader inner = getReader(inputSplit, options); - - + + /*Even though there are no delta files, we still need to produce row ids so that an * UPDATE or DELETE statement would work on a table which didn't have any previous updates*/ if (split.isOriginal() && split.getDeltas().isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java index 3116451..472de8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -95,7 +95,7 @@ private static Type convertType(final String name, final TypeInfo typeInfo, fina int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL). - scale(scale).precision(prec).named(name); + scale(scale).precision(prec).named(name); } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { throw new UnsupportedOperationException("Unknown type not implemented"); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 3b9bf43..5e5df57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -140,7 +140,7 @@ throw new IllegalStateException(msg); } } - } + } } requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(), typeListWanted), fileSchema, configuration); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 802a5c1..0a9c6d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -369,7 +369,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, return null; } cols = cols == null ? new ArrayList() : cols; - + cppCtx.getPrunedColLists().put((Operator) nd, cols); RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver(); @@ -479,13 +479,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, flags[index] = true; colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols()); } - + Collections.sort(colLists); pruneReduceSinkOperator(flags, op, cppCtx); cppCtx.getPrunedColLists().put(op, colLists); return null; } - + // Reduce Sink contains the columns needed - no need to aggregate from // children for (ExprNodeDesc val : valCols) { @@ -519,7 +519,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, if (cols == null) { return null; } - + Map colExprMap = op.getColumnExprMap(); // As columns go down the DAG, the LVJ will transform internal column // names from something like 'key' to '_col0'. Because of this, we need @@ -604,8 +604,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { SelectOperator op = (SelectOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; - - + + if (op.getChildOperators() != null) { for (Operator child : op.getChildOperators()) { // UDTF is not handled yet, so the parent SelectOp of UDTF should just assume @@ -858,11 +858,11 @@ private static void pruneOperator(NodeProcessorCtx ctx, if (inputSchema != null) { ArrayList rs = new ArrayList(); ArrayList inputCols = inputSchema.getSignature(); - for (ColumnInfo i: inputCols) { + for (ColumnInfo i: inputCols) { if (cols.contains(i.getInternalName())) { rs.add(i); } - } + } op.getSchema().setSignature(rs); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index b15aedc..1476119 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -4,9 +4,9 @@ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -100,7 +100,7 @@ private ConstantPropagateProcFactory() { /** * Get ColumnInfo from column expression. - * + * * @param rr * @param desc * @return @@ -139,7 +139,7 @@ public static ColumnInfo resolveColumn(RowResolver rr, /** * Cast type from expression type to expected type ti. - * + * * @param desc constant expression * @param ti expected type info * @return cast constant, or null if the type cast failed. @@ -189,10 +189,10 @@ private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti) { /** * Fold input expression desc. - * + * * If desc is a UDF and all parameters are constants, evaluate it. If desc is a column expression, * find it from propagated constants, and if there is, replace it with constant. - * + * * @param desc folding expression * @param constants current propagated constant map * @param cppCtx @@ -296,7 +296,7 @@ private static boolean isDeterministicUdf(GenericUDF udf) { /** * Propagate assignment expression, adding an entry into constant map constants. - * + * * @param udf expression UDF, currently only 2 UDFs are supported: '=' and 'is null'. * @param newExprs child expressions (parameters). * @param cppCtx @@ -350,7 +350,7 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; if (Boolean.TRUE.equals(c.getValue())) { - // if true, prune it + // if true, prune it return newExprs.get(Math.abs(i - 1)); } else { @@ -384,7 +384,7 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List /** * Evaluate column, replace the deterministic columns with constants if possible - * + * * @param desc * @param ctx * @param op @@ -435,7 +435,7 @@ private static ExprNodeDesc evaluateColumn(ExprNodeColumnDesc desc, /** * Evaluate UDF - * + * * @param udf UDF object * @param exprs * @param oldExprs @@ -512,7 +512,7 @@ private static ExprNodeDesc evaluateFunction(GenericUDF udf, List /** * Change operator row schema, replace column with constant if it is. - * + * * @param op * @param constants * @throws SemanticException @@ -584,7 +584,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. /** * Factory method to get the ConstantPropagateFilterProc class. - * + * * @return ConstantPropagateFilterProc */ public static ConstantPropagateFilterProc getFilterProc() { @@ -621,7 +621,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. /** * Factory method to get the ConstantPropagateGroupByProc class. - * + * * @return ConstantPropagateGroupByProc */ public static ConstantPropagateGroupByProc getGroupByProc() { @@ -650,7 +650,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. /** * Factory method to get the ConstantPropagateDefaultProc class. - * + * * @return ConstantPropagateDefaultProc */ public static ConstantPropagateDefaultProc getDefaultProc() { @@ -683,7 +683,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. /** * The Factory method to get the ConstantPropagateSelectProc class. - * + * * @return ConstantPropagateSelectProc */ public static ConstantPropagateSelectProc getSelectProc() { @@ -877,7 +877,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. return null; } - // Note: the following code (removing folded constants in exprs) is deeply coupled with + // Note: the following code (removing folded constants in exprs) is deeply coupled with // ColumnPruner optimizer. // Assuming ColumnPrunner will remove constant columns so we don't deal with output columns. // Except one case that the join operator is followed by a redistribution (RS operator). diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java index 48ca8f9..2f517f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java @@ -353,14 +353,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (inpOp.getSchema() != null && inpOp.getSchema().getSignature() != null ) { for(ColumnInfo ci : inpOp.getSchema().getSignature()) { Dependency inp_dep = lctx.getIndex().getDependency(inpOp, ci); - // The dependency can be null as some of the input cis may not have - // been set in case of joins. - if (inp_dep != null) { - for(BaseColumnInfo bci : inp_dep.getBaseCols()) { - new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type); - tai_set.add(bci.getTabAlias()); - } - } + // The dependency can be null as some of the input cis may not have + // been set in case of joins. + if (inp_dep != null) { + for(BaseColumnInfo bci : inp_dep.getBaseCols()) { + new_type = LineageCtx.getNewDependencyType(inp_dep.getType(), new_type); + tai_set.add(bci.getTabAlias()); + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 908db1e..b3c4b47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -66,26 +66,26 @@ private HashMap destToWindowingSpec; /* - * If this QB represents a SubQuery predicate then this will point to the SubQuery object. + * If this QB represents a SubQuery predicate then this will point to the SubQuery object. */ private QBSubQuery subQueryPredicateDef; - - /* - * used to give a unique name to each SubQuery QB Currently there can be at - * most 2 SubQueries in a Query: 1 in the Where clause, and 1 in the Having - * clause. - */ - private int numSubQueryPredicates; - - /* - * for now a top level QB can have 1 where clause SQ predicate. - */ - private QBSubQuery whereClauseSubQueryPredicate; - + + /* + * used to give a unique name to each SubQuery QB Currently there can be at + * most 2 SubQueries in a Query: 1 in the Where clause, and 1 in the Having + * clause. + */ + private int numSubQueryPredicates; + /* * for now a top level QB can have 1 where clause SQ predicate. */ - private QBSubQuery havingClauseSubQueryPredicate; + private QBSubQuery whereClauseSubQueryPredicate; + + /* + * for now a top level QB can have 1 where clause SQ predicate. + */ + private QBSubQuery havingClauseSubQueryPredicate; // results @@ -341,28 +341,28 @@ protected void setSubQueryDef(QBSubQuery subQueryPredicateDef) { protected QBSubQuery getSubQueryPredicateDef() { return subQueryPredicateDef; } - - protected int getNumSubQueryPredicates() { - return numSubQueryPredicates; - } - - protected int incrNumSubQueryPredicates() { - return ++numSubQueryPredicates; - } - - void setWhereClauseSubQueryPredicate(QBSubQuery sq) { - whereClauseSubQueryPredicate = sq; - } - - public QBSubQuery getWhereClauseSubQueryPredicate() { - return whereClauseSubQueryPredicate; - } - - void setHavingClauseSubQueryPredicate(QBSubQuery sq) { + + protected int getNumSubQueryPredicates() { + return numSubQueryPredicates; + } + + protected int incrNumSubQueryPredicates() { + return ++numSubQueryPredicates; + } + + void setWhereClauseSubQueryPredicate(QBSubQuery sq) { + whereClauseSubQueryPredicate = sq; + } + + public QBSubQuery getWhereClauseSubQueryPredicate() { + return whereClauseSubQueryPredicate; + } + + void setHavingClauseSubQueryPredicate(QBSubQuery sq) { havingClauseSubQueryPredicate = sq; } - - public QBSubQuery getHavingClauseSubQueryPredicate() { + + public QBSubQuery getHavingClauseSubQueryPredicate() { return havingClauseSubQueryPredicate; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index d398c88..3c7b707 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -38,7 +38,7 @@ import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic.QBSubQueryRewrite; public class QBSubQuery implements ISubQueryJoinInfo { - + public static enum SubQueryType { EXISTS, NOT_EXISTS, @@ -149,16 +149,16 @@ public boolean refersSubQuery() { } /* - * This class captures the information about a + * This class captures the information about a * conjunct in the where clause of the SubQuery. * For a equality predicate it capture for each side: * - the AST * - the type of Expression (basically what columns are referenced) - * - for Expressions that refer the parent it captures the + * - for Expressions that refer the parent it captures the * parent's ColumnInfo. In case of outer Aggregation expressions * we need this to introduce a new mapping in the OuterQuery * RowResolver. A join condition must use qualified column references, - * so we generate a new name for the aggr expression and use it in the + * so we generate a new name for the aggr expression and use it in the * joining condition. * For e.g. * having exists ( select x from R2 where y = min(R1.z) ) @@ -174,8 +174,8 @@ public boolean refersSubQuery() { private final ColumnInfo leftOuterColInfo; private final ColumnInfo rightOuterColInfo; - Conjunct(ASTNode leftExpr, - ASTNode rightExpr, + Conjunct(ASTNode leftExpr, + ASTNode rightExpr, ExprType leftExprType, ExprType rightExprType, ColumnInfo leftOuterColInfo, @@ -239,8 +239,8 @@ ColumnInfo getRightOuterColInfo() { Stack stack; ConjunctAnalyzer(RowResolver parentQueryRR, - boolean forHavingClause, - String parentQueryNewAlias) { + boolean forHavingClause, + String parentQueryNewAlias) { this.parentQueryRR = parentQueryRR; defaultExprProcessor = new DefaultExprProcessor(); this.forHavingClause = forHavingClause; @@ -260,13 +260,13 @@ ColumnInfo getRightOuterColInfo() { private ObjectPair analyzeExpr(ASTNode expr) { ColumnInfo cInfo = null; if ( forHavingClause ) { - try { - cInfo = parentQueryRR.getExpression(expr); - if ( cInfo != null) { - return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); - } - } catch(SemanticException se) { - } + try { + cInfo = parentQueryRR.getExpression(expr); + if ( cInfo != null) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); + } + } catch(SemanticException se) { + } } if ( expr.getType() == HiveParser.DOT) { ASTNode dot = firstDot(expr); @@ -308,12 +308,12 @@ Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { ObjectPair leftInfo = analyzeExpr(left); ObjectPair rightInfo = analyzeExpr(right); - return new Conjunct(left, right, + return new Conjunct(left, right, leftInfo.getFirst(), rightInfo.getFirst(), leftInfo.getSecond(), rightInfo.getSecond()); } else { ObjectPair sqExprInfo = analyzeExpr(conjunct); - return new Conjunct(conjunct, null, + return new Conjunct(conjunct, null, sqExprInfo.getFirst(), null, sqExprInfo.getSecond(), sqExprInfo.getSecond()); } @@ -354,86 +354,86 @@ protected ASTNode firstDot(ASTNode dot) { } /* - * When transforming a Not In SubQuery we need to check for nulls in the + * When transforming a Not In SubQuery we need to check for nulls in the * Joining expressions of the SubQuery. If there are nulls then the SubQuery always - * return false. For more details see + * return false. For more details see * https://issues.apache.org/jira/secure/attachment/12614003/SubQuerySpec.pdf - * + * * Basically, SQL semantics say that: * - R1.A not in (null, 1, 2, ...) - * is always false. - * A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null + * is always false. + * A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null * returns false, a not in predicate against aset with a 'null' value always returns false. - * + * * So for not in SubQuery predicates: * - we join in a null count predicate. * - And the joining condition is that the 'Null Count' query has a count of 0. - * + * */ class NotInCheck implements ISubQueryJoinInfo { - + private static final String CNT_ALIAS = "c1"; - + /* * expressions in SubQ that are joined to the Outer Query. */ List subQryCorrExprs; - + /* * row resolver of the SubQuery. * Set by the SemanticAnalyzer after the Plan for the SubQuery is genned. * This is neede in case the SubQuery select list contains a TOK_ALLCOLREF */ RowResolver sqRR; - + NotInCheck() { subQryCorrExprs = new ArrayList(); } - + void addCorrExpr(ASTNode corrExpr) { subQryCorrExprs.add(corrExpr); } - + public ASTNode getSubQueryAST() { ASTNode ast = SubQueryUtils.buildNotInNullCheckQuery( - QBSubQuery.this.getSubQueryAST(), - QBSubQuery.this.getAlias(), - CNT_ALIAS, + QBSubQuery.this.getSubQueryAST(), + QBSubQuery.this.getAlias(), + CNT_ALIAS, subQryCorrExprs, sqRR); SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin); return ast; } - + public String getAlias() { return QBSubQuery.this.getAlias() + "_notin_nullcheck"; } - + public JoinType getJoinType() { return JoinType.LEFTSEMI; } - + public ASTNode getJoinConditionAST() { - ASTNode ast = + ASTNode ast = SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS); SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin); return ast; } - + public QBSubQuery getSubQuery() { return QBSubQuery.this; } - + public String getOuterQueryId() { return QBSubQuery.this.getOuterQueryId(); } - + void setSQRR(RowResolver sqRR) { this.sqRR = sqRR; } - + } - + private final String outerQueryId; private final int sqIdx; private final String alias; @@ -455,11 +455,11 @@ void setSQRR(RowResolver sqRR) { private int numOfCorrelationExprsAddedToSQSelect; private boolean groupbyAddedToSQ; - + private int numOuterCorrExprsForHaving; - + private NotInCheck notInCheck; - + private QBSubQueryRewrite subQueryDiagnostic; public QBSubQuery(String outerQueryId, @@ -483,11 +483,11 @@ public QBSubQuery(String outerQueryId, originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); numOfCorrelationExprsAddedToSQSelect = 0; groupbyAddedToSQ = false; - + if ( operator.getType() == SubQueryType.NOT_IN ) { notInCheck = new NotInCheck(); } - + subQueryDiagnostic = SubQueryDiagnostic.getRewrite(this, ctx.getTokenRewriteStream(), ctx); } @@ -500,18 +500,18 @@ public ASTNode getOuterQueryExpression() { public SubQueryTypeDef getOperator() { return operator; } - + public ASTNode getOriginalSubQueryASTForRewrite() { return (operator.getType() == SubQueryType.NOT_EXISTS - || operator.getType() == SubQueryType.NOT_IN ? - (ASTNode) originalSQASTOrigin.getUsageNode().getParent() : + || operator.getType() == SubQueryType.NOT_IN ? + (ASTNode) originalSQASTOrigin.getUsageNode().getParent() : originalSQASTOrigin.getUsageNode()); } void validateAndRewriteAST(RowResolver outerQueryRR, - boolean forHavingClause, - String outerQueryAlias, - Set outerQryAliases) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias, + Set outerQryAliases) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); @@ -519,12 +519,12 @@ void validateAndRewriteAST(RowResolver outerQueryRR, if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) { selectExprStart = 1; } - + /* * Restriction.16.s :: Correlated Expression in Outer Query must not contain * unqualified column references. */ - if ( parentQueryExpression != null && !forHavingClause ) { + if ( parentQueryExpression != null && !forHavingClause ) { ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(parentQueryExpression); if ( u != null ) { subQueryAST.setOrigin(originalSQASTOrigin); @@ -532,7 +532,7 @@ void validateAndRewriteAST(RowResolver outerQueryRR, u, "Correlating expression cannot contain unqualified column references.")); } } - + /* * Restriction 17.s :: SubQuery cannot use the same table alias as one used in * the Outer Query. @@ -546,14 +546,14 @@ void validateAndRewriteAST(RowResolver outerQueryRR, } if ( sharedAlias != null) { ASTNode whereClause = SubQueryUtils.subQueryWhere(subQueryAST); - + if ( whereClause != null ) { ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(whereClause); if ( u != null ) { subQueryAST.setOrigin(originalSQASTOrigin); throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( u, "SubQuery cannot use the table alias: " + sharedAlias + "; " + - "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference")); + "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference")); } } } @@ -641,25 +641,25 @@ private void setJoinType() { } void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, - boolean forHavingClause, - String outerQueryAlias) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode parentQueryJoinCond = null; if ( parentQueryExpression != null ) { - + ColumnInfo outerQueryCol = null; try { outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); } catch(SemanticException se) { } - + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( getOuterQueryExpression(), alias, sqRR); - + if ( outerQueryCol != null ) { - rewriteCorrConjunctForHaving(parentQueryJoinCond, true, + rewriteCorrConjunctForHaving(parentQueryJoinCond, true, outerQueryAlias, outerQueryRR, outerQueryCol); } subQueryDiagnostic.addJoinCondition(parentQueryJoinCond, outerQueryCol != null, true); @@ -682,10 +682,10 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) { if (postJoinConditionAST == null ) { return outerQryFilter; - } - + } + subQueryDiagnostic.addPostJoinCondition(postJoinConditionAST); - + if ( outerQryFilter == null ) { return postJoinConditionAST; } @@ -738,7 +738,7 @@ String getNextCorrExprAlias() { * Additional things for Having clause: * - A correlation predicate may refer to an aggregation expression. * - This introduces 2 twists to the rewrite: - * a. When analyzing equality predicates we need to analyze each side + * a. When analyzing equality predicates we need to analyze each side * to see if it is an aggregation expression from the Outer Query. * So for e.g. this is a valid correlation predicate: * R2.x = min(R1.y) @@ -748,12 +748,12 @@ String getNextCorrExprAlias() { * to contain a qualified column references. * We handle this by generating a new name for the aggregation expression, * like R1._gby_sq_col_1 and adding this mapping to the Outer Query's - * Row Resolver. Then we construct a joining predicate using this new + * Row Resolver. Then we construct a joining predicate using this new * name; so in our e.g. the condition would be: R2.x = R1._gby_sq_col_1 */ private void rewrite(RowResolver parentQueryRR, - boolean forHavingClause, - String outerQueryAlias) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); ASTNode whereClause = SubQueryUtils.subQueryWhere(subQueryAST); @@ -766,7 +766,7 @@ private void rewrite(RowResolver parentQueryRR, SubQueryUtils.extractConjuncts(searchCond, conjuncts); ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR, - forHavingClause, outerQueryAlias); + forHavingClause, outerQueryAlias); ASTNode sqNewSearchCond = null; for(ASTNode conjunctAST : conjuncts) { @@ -805,7 +805,7 @@ private void rewrite(RowResolver parentQueryRR, corrCondLeftIsRewritten = true; if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { corrCondRightIsRewritten = true; - rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, + rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, parentQueryRR, conjunct.getRightOuterColInfo()); } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( @@ -829,7 +829,7 @@ private void rewrite(RowResolver parentQueryRR, corrCondRightIsRewritten = true; if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { corrCondLeftIsRewritten = true; - rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, + rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, parentQueryRR, conjunct.getLeftOuterColInfo()); } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( @@ -901,7 +901,7 @@ private ASTNode getSubQueryGroupByAST() { for(ASTNode child : newChildren ) { subQueryAST.addChild(child); } - + subQueryDiagnostic.setAddGroupByClause(); return groupBy; @@ -927,26 +927,26 @@ public ASTNode getJoinConditionAST() { public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } - - + + public QBSubQueryRewrite getDiagnostic() { return subQueryDiagnostic; } - + public QBSubQuery getSubQuery() { return this; } - + NotInCheck getNotInCheck() { return notInCheck; } - + private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, boolean refersLeft, String outerQueryAlias, RowResolver outerQueryRR, ColumnInfo outerQueryCol) { - + String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++; ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias); if ( refersLeft ) { @@ -956,5 +956,5 @@ private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, } outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol); } - + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 33b8a21..2e58b80 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -98,7 +98,7 @@ public ASTNode getExpressionSource(ASTNode node) { public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (!addMappingOnly(tab_alias, col_alias, colInfo)) { - rowSchema.getSignature().add(colInfo); + rowSchema.getSignature().add(colInfo); } } @@ -289,7 +289,7 @@ public void setIsExprResolver(boolean isExprResolver) { public boolean getIsExprResolver() { return isExprResolver; } - + public String[] getAlternateMappings(String internalName) { return altInvRslvMap.get(internalName); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a8457ab..e4a30a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2239,8 +2239,8 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, String havingInputAlias = null; if ( forHavingClause ) { - havingInputAlias = "gby_sq" + sqIdx; - aliasToOpInfo.put(havingInputAlias, input); + havingInputAlias = "gby_sq" + sqIdx; + aliasToOpInfo.put(havingInputAlias, input); } subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet()); @@ -11589,40 +11589,40 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, } private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, - Operator reduceSinkOp, RowResolver gByRR) { - if ( gByExpr.getType() == HiveParser.DOT + Operator reduceSinkOp, RowResolver gByRR) { + if ( gByExpr.getType() == HiveParser.DOT && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { - String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr - .getChild(0).getChild(0).getText()); - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( - gByExpr.getChild(1).getText()); - gByRR.put(tab_alias, col_alias, colInfo); - } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr - .getChild(0).getText()); - String tab_alias = null; - /* - * If the input to the GBy has a tab alias for the column, then add an entry - * based on that tab_alias. - * For e.g. this query: - * select b.x, count(*) from t1 b group by x - * needs (tab_alias=b, col_alias=x) in the GBy RR. - * tab_alias=b comes from looking at the RowResolver that is the ancestor - * before any GBy/ReduceSinks added for the GBY operation. - */ - Operator parent = reduceSinkOp; - while ( parent instanceof ReduceSinkOperator || - parent instanceof GroupByOperator ) { - parent = parent.getParentOperators().get(0); - } - RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); - try { - ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); - tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); - } catch(SemanticException se) { - } - gByRR.put(tab_alias, col_alias, colInfo); - } + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getChild(0).getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( + gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an entry + * based on that tab_alias. + * For e.g. this query: + * select b.x, count(*) from t1 b group by x + * needs (tab_alias=b, col_alias=x) in the GBy RR. + * tab_alias=b comes from looking at the RowResolver that is the ancestor + * before any GBy/ReduceSinks added for the GBY operation. + */ + Operator parent = reduceSinkOp; + while ( parent instanceof ReduceSinkOperator || + parent instanceof GroupByOperator ) { + parent = parent.getParentOperators().get(0); + } + RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); + try { + ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch(SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } } private WriteEntity.WriteType determineWriteType(LoadTableDesc ltd, boolean isNonNativeTable) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java index 8da2848..0226278 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java @@ -37,7 +37,7 @@ private String destinationCreateTable; static { - PTFUtils.makeTransient(LoadFileDesc.class, "targetDir"); + PTFUtils.makeTransient(LoadFileDesc.class, "targetDir"); } public LoadFileDesc() { } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java index fffac7b..0627b9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java @@ -38,7 +38,7 @@ private transient List srcDirs; static { - PTFUtils.makeTransient(LoadMultiFilesDesc.class, "targetDirs"); + PTFUtils.makeTransient(LoadMultiFilesDesc.class, "targetDirs"); } public LoadMultiFilesDesc() { } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java index 026f4e0..027bd39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java @@ -60,8 +60,8 @@ private JobConf toJobConf(Configuration hconf) { @Override public String aggregateStats(String counterGrpName, String statType) { // In case of counters, aggregation is done by JobTracker / MR AM itself - // so no need to aggregate, simply return the counter value for requested stat. - return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType)); + // so no need to aggregate, simply return the counter value for requested stat. + return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType)); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java index fbadb91..bc93204 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java @@ -34,43 +34,38 @@ @WindowFunctionDescription ( - description = @Description( - name = "cume_dist", - value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + - "statistical books) computes the position of a specified value relative to a set of values. " + - "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + - "CUME_DIST(x) = number of values in S coming before " + - " and including x in the specified order/ N" - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true + description = @Description( + name = "cume_dist", + value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + + "statistical books) computes the position of a specified value relative to a set of values. " + + "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + + "CUME_DIST(x) = number of values in S coming before " + + " and including x in the specified order/ N" + ), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) -public class GenericUDAFCumeDist extends GenericUDAFRank -{ +public class GenericUDAFCumeDist extends GenericUDAFRank { - static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName()); + static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName()); - @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFCumeDistEvaluator(); - } + @Override + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFCumeDistEvaluator(); + } - public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator - { + public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator { @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); return ObjectInspectorFactory .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { + public Object terminate(AggregationBuffer agg) throws HiveException { List ranks = ((RankBuffer) agg).rowNums; int ranksSize = ranks.size(); double ranksSizeDouble = ranksSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java index 8856fb7..50ee4ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java @@ -23,41 +23,38 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; -@WindowFunctionDescription -( - description = @Description( - name = "dense_rank", - value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + - "gaps in ranking sequence when there are ties. That is, if you were " + - "ranking a competition using DENSE_RANK and had three people tie for " + - "second place, you would say that all three were in second place and " + - "that the next person came in third." - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true +@WindowFunctionDescription( + description = @Description( + name = "dense_rank", + value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + + "gaps in ranking sequence when there are ties. That is, if you were " + + "ranking a competition using DENSE_RANK and had three people tie for " + + "second place, you would say that all three were in second place and " + + "that the next person came in third." + ), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) -public class GenericUDAFDenseRank extends GenericUDAFRank -{ - static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName()); - - @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFDenseRankEvaluator(); - } - - public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator - { - /* - * Called when the value in the partition has changed. Update the currentRank - */ - @Override - protected void nextRank(RankBuffer rb) - { - rb.currentRank++; - } - } +public class GenericUDAFDenseRank extends GenericUDAFRank { + + static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName()); + + @Override + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFDenseRankEvaluator(); + } + + public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator { + + /* + * Called when the value in the partition has changed. Update the currentRank + */ + @Override + protected void nextRank(RankBuffer rb) { + rb.currentRank++; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java index 2ca48b3..767314b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java @@ -41,147 +41,128 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription -( - description = @Description( - name = "first_value", - value = "_FUNC_(x)" - ), - supportsWindow = true, - pivotResult = false, - impliesOrder = true +@WindowFunctionDescription( + description = @Description( + name = "first_value", + value = "_FUNC_(x)" + ), + supportsWindow = true, + pivotResult = false, + impliesOrder = true ) -public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException - { - if (parameters.length > 2) - { - throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); - } - if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) ) - { - throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); - } - return createEvaluator(); - } - - protected GenericUDAFFirstValueEvaluator createEvaluator() - { - return new GenericUDAFFirstValueEvaluator(); - } - - static class FirstValueBuffer implements AggregationBuffer - { - Object val; - boolean valSet; - boolean firstRow; - boolean skipNulls; - - FirstValueBuffer() - { - init(); - } - - void init() - { - val = null; - valSet = false; - firstRow = true; - skipNulls = false; - } - - } - - public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator - { - ObjectInspector inputOI; - ObjectInspector outputOI; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for Rank function"); - } - inputOI = parameters[0]; - outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, ObjectInspectorCopyOption.WRITABLE); - return outputOI; - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new FirstValueBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((FirstValueBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException - { - FirstValueBuffer fb = (FirstValueBuffer) agg; - - if (fb.firstRow ) - { - fb.firstRow = false; - if ( parameters.length == 2 ) - { - fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean( - parameters[1], - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); - } - } - - if ( !fb.valSet ) - { - fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, ObjectInspectorCopyOption.WRITABLE); - if ( !fb.skipNulls || fb.val != null ) - { - fb.valSet = true; - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - return ((FirstValueBuffer) agg).val; - } - +public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length > 2) { + throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); + } + if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) { + throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); + } + return createEvaluator(); + } + + protected GenericUDAFFirstValueEvaluator createEvaluator() { + return new GenericUDAFFirstValueEvaluator(); + } + + static class FirstValueBuffer implements AggregationBuffer { + + Object val; + boolean valSet; + boolean firstRow; + boolean skipNulls; + + FirstValueBuffer() { + init(); + } + + void init() { + val = null; + valSet = false; + firstRow = true; + skipNulls = false; + } + + } + + public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator { + + ObjectInspector inputOI; + ObjectInspector outputOI; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for Rank function"); + } + inputOI = parameters[0]; + outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, + ObjectInspectorCopyOption.WRITABLE); + return outputOI; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new FirstValueBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((FirstValueBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + FirstValueBuffer fb = (FirstValueBuffer) agg; + + if (fb.firstRow) { + fb.firstRow = false; + if (parameters.length == 2) { + fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1], + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + } + } + + if (!fb.valSet) { + fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, + ObjectInspectorCopyOption.WRITABLE); + if (!fb.skipNulls || fb.val != null) { + fb.valSet = true; + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((FirstValueBuffer) agg).val; + } + @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { BoundaryDef start = wFrmDef.getStart(); BoundaryDef end = wFrmDef.getEnd(); - return new FirstValStreamingFixedWindow(this, start.getAmt(), - end.getAmt()); + return new FirstValStreamingFixedWindow(this, start.getAmt(), end.getAmt()); } - } - + } + static class ValIndexPair { + Object val; int idx; @@ -191,16 +172,15 @@ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { } } - static class FirstValStreamingFixedWindow extends - GenericUDAFStreamingEvaluator { + static class FirstValStreamingFixedWindow extends GenericUDAFStreamingEvaluator { class State extends GenericUDAFStreamingEvaluator.StreamingState { + private final Deque valueChain; public State(int numPreceding, int numFollowing, AggregationBuffer buf) { super(numPreceding, numFollowing, buf); - valueChain = new ArrayDeque(numPreceding + numFollowing - + 1); + valueChain = new ArrayDeque(numPreceding + numFollowing + 1); } @Override @@ -222,8 +202,8 @@ public int estimate() { */ int wdwSz = numPreceding + numFollowing + 1; - return underlying + (underlying * wdwSz) + (underlying * wdwSz) - + (3 * JavaDataModel.PRIMITIVES1); + return underlying + (underlying * wdwSz) + (underlying * wdwSz) + (3 + * JavaDataModel.PRIMITIVES1); } protected void reset() { @@ -232,8 +212,8 @@ protected void reset() { } } - public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, - int numPreceding, int numFollowing) { + public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding, + int numFollowing) { super(wrappedEval, numPreceding, numFollowing); } @@ -253,8 +233,7 @@ protected ObjectInspector inputOI() { } @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException { + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { State s = (State) agg; FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf; @@ -266,15 +245,14 @@ public void iterate(AggregationBuffer agg, Object[] parameters) wrappedEval.iterate(fb, parameters); } - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI(), ObjectInspectorCopyOption.WRITABLE); + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(), + ObjectInspectorCopyOption.WRITABLE); /* * add row to chain. except in case of UNB preceding: - only 1 firstVal * needs to be tracked. */ - if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT - || s.valueChain.isEmpty()) { + if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT || s.valueChain.isEmpty()) { /* * add value to chain if it is not null or if skipNulls is false. */ @@ -309,8 +287,7 @@ public void iterate(AggregationBuffer agg, Object[] parameters) public Object terminate(AggregationBuffer agg) throws HiveException { State s = (State) agg; FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf; - ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null - : s.valueChain.getFirst(); + ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null : s.valueChain.getFirst(); for (int i = 0; i < s.numFollowing; i++) { s.results.add(r == null ? null : r.val); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java index 99c41b5..e099154 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java @@ -37,131 +37,107 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true) -public class GenericUDAFLastValue extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class - .getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) - throws SemanticException - { - if (parameters.length > 2) - { - throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); - } - if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) ) - { - throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); - } - return createEvaluator(); - } - - protected GenericUDAFLastValueEvaluator createEvaluator() - { - return new GenericUDAFLastValueEvaluator(); - } - - static class LastValueBuffer implements AggregationBuffer - { - Object val; - boolean firstRow; - boolean skipNulls; - - LastValueBuffer() - { - init(); - } - - void init() - { - val = null; - firstRow = true; - skipNulls = false; - } - - } - - public static class GenericUDAFLastValueEvaluator extends - GenericUDAFEvaluator - { - ObjectInspector inputOI; - ObjectInspector outputOI; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) - throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for Rank function"); - } - inputOI = parameters[0]; - outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, - ObjectInspectorCopyOption.WRITABLE); - return outputOI; - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new LastValueBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((LastValueBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException - { - LastValueBuffer lb = (LastValueBuffer) agg; - if (lb.firstRow ) - { - lb.firstRow = false; - if ( parameters.length == 2 ) - { - lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean( - parameters[1], - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); - } - } - - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI, ObjectInspectorCopyOption.WRITABLE); +@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), + supportsWindow = true, pivotResult = false, impliesOrder = true) +public class GenericUDAFLastValue extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length > 2) { + throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); + } + if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) { + throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); + } + return createEvaluator(); + } + + protected GenericUDAFLastValueEvaluator createEvaluator() { + return new GenericUDAFLastValueEvaluator(); + } + + static class LastValueBuffer implements AggregationBuffer { + + Object val; + boolean firstRow; + boolean skipNulls; + + LastValueBuffer() { + init(); + } + + void init() { + val = null; + firstRow = true; + skipNulls = false; + } + + } + + public static class GenericUDAFLastValueEvaluator extends GenericUDAFEvaluator { + + ObjectInspector inputOI; + ObjectInspector outputOI; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for Rank function"); + } + inputOI = parameters[0]; + outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, + ObjectInspectorCopyOption.WRITABLE); + return outputOI; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new LastValueBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((LastValueBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + LastValueBuffer lb = (LastValueBuffer) agg; + if (lb.firstRow) { + lb.firstRow = false; + if (parameters.length == 2) { + lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1], + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + } + } + + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, + ObjectInspectorCopyOption.WRITABLE); if (!lb.skipNulls || o != null) { lb.val = o; } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) - throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) - throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - LastValueBuffer lb = (LastValueBuffer) agg; - return lb.val; - - } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + LastValueBuffer lb = (LastValueBuffer) agg; + return lb.val; + + } @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { @@ -169,12 +145,12 @@ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { BoundaryDef end = wFrmDef.getEnd(); return new LastValStreamingFixedWindow(this, start.getAmt(), end.getAmt()); } - } + } - static class LastValStreamingFixedWindow extends - GenericUDAFStreamingEvaluator { + static class LastValStreamingFixedWindow extends GenericUDAFStreamingEvaluator { class State extends GenericUDAFStreamingEvaluator.StreamingState { + private Object lastValue; private int lastIdx; @@ -203,8 +179,8 @@ protected void reset() { } } - public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, - int numPreceding, int numFollowing) { + public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding, + int numFollowing) { super(wrappedEval, numPreceding, numFollowing); } @@ -224,8 +200,7 @@ protected ObjectInspector inputOI() { } @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException { + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { State s = (State) agg; LastValueBuffer lb = (LastValueBuffer) s.wrappedBuf; @@ -237,8 +212,8 @@ public void iterate(AggregationBuffer agg, Object[] parameters) wrappedEval.iterate(lb, parameters); } - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI(), ObjectInspectorCopyOption.WRITABLE); + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(), + ObjectInspectorCopyOption.WRITABLE); if (!lb.skipNulls || o != null) { s.lastValue = o; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java index 18cde76..83693a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java @@ -38,144 +38,129 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; -@WindowFunctionDescription -( - description = @Description( - name = "rank", - value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other " + - "common summary statistics. This function divides an ordered partition into a specified " + - "number of groups called buckets and assigns a bucket number to each row in the partition." - ), - supportsWindow = false, - pivotResult = true +@WindowFunctionDescription( + description = @Description( + name = "rank", + value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other " + +"common summary statistics. This function divides an ordered partition into a " + + "specified number of groups called buckets and assigns a bucket number to each row " + + "in the partition." + ), + supportsWindow = false, + pivotResult = true ) -public class GenericUDAFNTile extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFNTile.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException - { - if (parameters.length != 1) - { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); - } - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); - - boolean c = ObjectInspectorUtils.compareTypes(oi, PrimitiveObjectInspectorFactory.writableIntObjectInspector); - if (!c) - { - throw new UDFArgumentTypeException(0, "Number of tiles must be an int expression"); - } - - return new GenericUDAFNTileEvaluator(); - } - - static class NTileBuffer implements AggregationBuffer - { - Integer numBuckets; - int numRows; - - void init() - { - numBuckets = null; - numRows = 0; - } - - NTileBuffer() - { - init(); - } - } - - public static class GenericUDAFNTileEvaluator extends GenericUDAFEvaluator - { - private transient PrimitiveObjectInspector inputOI; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { - assert (parameters.length == 1); - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for NTile function"); - } - inputOI = (PrimitiveObjectInspector) parameters[0]; - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector); - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new NTileBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((NTileBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException - { - NTileBuffer rb = (NTileBuffer) agg; - if ( rb.numBuckets == null) - { - rb.numBuckets = PrimitiveObjectInspectorUtils.getInt(parameters[0], inputOI); - } - rb.numRows++; - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - NTileBuffer rb = (NTileBuffer) agg; - ArrayList res = new ArrayList(rb.numRows); - - /* - * if there is a remainder from numRows/numBuckets; then distribute increase the size of the first 'rem' buckets by 1. - */ - - int bucketsz = rb.numRows / rb.numBuckets; - int rem = rb.numRows % rb.numBuckets; - int start = 0; - int bucket = 1; - while ( start < rb.numRows) - { - int end = start + bucketsz; - if (rem > 0) - { - end++; rem--; - } - end = Math.min(rb.numRows, end); - for(int i = start; i < end; i++) - { - res.add(new IntWritable(bucket)); - } - start = end; - bucket++; - } - - return res; - } - - } - +public class GenericUDAFNTile extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFNTile.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 1) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly one argument is expected."); + } + ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); + + boolean c = ObjectInspectorUtils.compareTypes(oi, + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + if (!c) { + throw new UDFArgumentTypeException(0, "Number of tiles must be an int expression"); + } + + return new GenericUDAFNTileEvaluator(); + } + + static class NTileBuffer implements AggregationBuffer { + + Integer numBuckets; + int numRows; + + void init() { + numBuckets = null; + numRows = 0; + } + + NTileBuffer() { + init(); + } + } + + public static class GenericUDAFNTileEvaluator extends GenericUDAFEvaluator { + + private transient PrimitiveObjectInspector inputOI; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + assert (parameters.length == 1); + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for NTile function"); + } + inputOI = (PrimitiveObjectInspector) parameters[0]; + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new NTileBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NTileBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + NTileBuffer rb = (NTileBuffer) agg; + if (rb.numBuckets == null) { + rb.numBuckets = PrimitiveObjectInspectorUtils.getInt(parameters[0], inputOI); + } + rb.numRows++; + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + NTileBuffer rb = (NTileBuffer) agg; + ArrayList res = new ArrayList(rb.numRows); + + /* + * if there is a remainder from numRows/numBuckets; then distribute increase the size of the first 'rem' buckets by 1. + */ + + int bucketsz = rb.numRows / rb.numBuckets; + int rem = rb.numRows % rb.numBuckets; + int start = 0; + int bucket = 1; + while (start < rb.numRows) { + int end = start + bucketsz; + if (rem > 0) { + end++; + rem--; + } + end = Math.min(rb.numRows, end); + for (int i = start; i < end; i++) { + res.add(new IntWritable(bucket)); + } + start = end; + bucket++; + } + + return res; + } + + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java index 1cca03ec..b73a6eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java @@ -31,56 +31,52 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; -@WindowFunctionDescription -( - description = @Description( - name = "percent_rank", - value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " + - "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " + - "(rank of row in its partition - 1) / (number of rows in the partition - 1)" - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true +@WindowFunctionDescription( + description = @Description( + name = "percent_rank", + value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " + + "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " + + "(rank of row in its partition - 1) / (number of rows in the partition - 1)" + ), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) -public class GenericUDAFPercentRank extends GenericUDAFRank -{ - static final Log LOG = LogFactory.getLog(GenericUDAFPercentRank.class.getName()); +public class GenericUDAFPercentRank extends GenericUDAFRank { - @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFPercentRankEvaluator(); - } + static final Log LOG = LogFactory.getLog(GenericUDAFPercentRank.class.getName()); - public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator - { - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { - super.init(m, parameters); - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - } + @Override + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFPercentRankEvaluator(); + } - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - ArrayList ranks = ((RankBuffer) agg).rowNums; - double sz = ranks.size(); - if ( sz > 1 ) { + public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + ArrayList ranks = ((RankBuffer) agg).rowNums; + double sz = ranks.size(); + if (sz > 1) { sz = sz - 1; } - ArrayList pranks = new ArrayList(ranks.size()); + ArrayList pranks = new ArrayList(ranks.size()); - for(IntWritable i : ranks) - { - double pr = ((double)i.get() - 1)/sz; - pranks.add(new DoubleWritable(pr)); - } + for (IntWritable i : ranks) { + double pr = ((double) i.get() - 1) / sz; + pranks.add(new DoubleWritable(pr)); + } - return pranks; - } - } + return pranks; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java index 3eea6b2..528cdbc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java @@ -38,170 +38,150 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; -@WindowFunctionDescription -( - description = @Description( - name = "rank", - value = "_FUNC_(x)" - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true -) -public class GenericUDAFRank extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFRank.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException - { - if (parameters.length < 1) - { - throw new UDFArgumentTypeException(parameters.length - 1, "One or more arguments are expected."); - } - for(int i=0; i type or complex type containing map<>."); - } - } - return createEvaluator(); - } - - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFRankEvaluator(); - } - - static class RankBuffer implements AggregationBuffer - { - ArrayList rowNums; - int currentRowNum; - Object[] currVal; - int currentRank; - int numParams; - boolean supportsStreaming; - - RankBuffer(int numParams, boolean supportsStreaming) - { - this.numParams = numParams; - this.supportsStreaming = supportsStreaming; - init(); - } - - void init() - { - rowNums = new ArrayList(); - currentRowNum = 0; - currentRank = 0; - currVal = new Object[numParams]; - if ( supportsStreaming ) { - /* initialize rowNums to have 1 row */ - rowNums.add(null); - } - } - - void incrRowNum() { currentRowNum++; } - - void addRank() - { - if ( supportsStreaming ) { - rowNums.set(0, new IntWritable(currentRank)); - } else { - rowNums.add(new IntWritable(currentRank)); - } - } - } - - public static abstract class GenericUDAFAbstractRankEvaluator extends GenericUDAFEvaluator - { - ObjectInspector[] inputOI; - ObjectInspector[] outputOI; - boolean isStreamingMode = false; - - protected boolean isStreaming() { - return isStreamingMode; - } - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for Rank function"); - } - inputOI = parameters; - outputOI = new ObjectInspector[inputOI.length]; - for(int i=0; i < inputOI.length; i++) - { - outputOI[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOI[i], ObjectInspectorCopyOption.JAVA); - } - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector); - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new RankBuffer(inputOI.length, isStreamingMode); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((RankBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException - { - RankBuffer rb = (RankBuffer) agg; - int c = GenericUDAFRank.compare(rb.currVal, outputOI, parameters, inputOI); - rb.incrRowNum(); - if ( rb.currentRowNum == 1 || c != 0 ) - { - nextRank(rb); - rb.currVal = GenericUDAFRank.copyToStandardObject(parameters, inputOI, ObjectInspectorCopyOption.JAVA); - } - rb.addRank(); - } - - /* - * Called when the value in the partition has changed. Update the currentRank - */ - protected void nextRank(RankBuffer rb) - { - rb.currentRank = rb.currentRowNum; - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - return ((RankBuffer) agg).rowNums; - } - - } - - public static class GenericUDAFRankEvaluator extends - GenericUDAFAbstractRankEvaluator implements - ISupportStreamingModeForWindowing { +@WindowFunctionDescription( + description = @Description( + name = "rank", + value = "_FUNC_(x)"), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true) +public class GenericUDAFRank extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFRank.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length < 1) { + throw new UDFArgumentTypeException(parameters.length - 1, + "One or more arguments are expected."); + } + for (int i = 0; i < parameters.length; i++) { + ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[i]); + if (!ObjectInspectorUtils.compareSupported(oi)) { + throw new UDFArgumentTypeException(i, + "Cannot support comparison of map<> type or complex type containing map<>."); + } + } + return createEvaluator(); + } + + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFRankEvaluator(); + } + + static class RankBuffer implements AggregationBuffer { + + ArrayList rowNums; + int currentRowNum; + Object[] currVal; + int currentRank; + int numParams; + boolean supportsStreaming; + + RankBuffer(int numParams, boolean supportsStreaming) { + this.numParams = numParams; + this.supportsStreaming = supportsStreaming; + init(); + } + + void init() { + rowNums = new ArrayList(); + currentRowNum = 0; + currentRank = 0; + currVal = new Object[numParams]; + if (supportsStreaming) { + /* initialize rowNums to have 1 row */ + rowNums.add(null); + } + } + + void incrRowNum() { currentRowNum++; } + + void addRank() { + if (supportsStreaming) { + rowNums.set(0, new IntWritable(currentRank)); + } else { + rowNums.add(new IntWritable(currentRank)); + } + } + } + + public static abstract class GenericUDAFAbstractRankEvaluator extends GenericUDAFEvaluator { + + ObjectInspector[] inputOI; + ObjectInspector[] outputOI; + boolean isStreamingMode = false; + + protected boolean isStreaming() { + return isStreamingMode; + } + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for Rank function"); + } + inputOI = parameters; + outputOI = new ObjectInspector[inputOI.length]; + for (int i = 0; i < inputOI.length; i++) { + outputOI[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOI[i], + ObjectInspectorCopyOption.JAVA); + } + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new RankBuffer(inputOI.length, isStreamingMode); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((RankBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + RankBuffer rb = (RankBuffer) agg; + int c = GenericUDAFRank.compare(rb.currVal, outputOI, parameters, inputOI); + rb.incrRowNum(); + if (rb.currentRowNum == 1 || c != 0) { + nextRank(rb); + rb.currVal = + GenericUDAFRank.copyToStandardObject(parameters, inputOI, ObjectInspectorCopyOption.JAVA); + } + rb.addRank(); + } + + /* + * Called when the value in the partition has changed. Update the currentRank + */ + protected void nextRank(RankBuffer rb) { + rb.currentRank = rb.currentRowNum; + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((RankBuffer) agg).rowNums; + } + + } + + public static class GenericUDAFRankEvaluator extends GenericUDAFAbstractRankEvaluator + implements ISupportStreamingModeForWindowing { @Override public Object getNextResult(AggregationBuffer agg) throws HiveException { @@ -215,18 +195,15 @@ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { } @Override - public int getRowsRemainingAfterTerminate() - throws HiveException { + public int getRowsRemainingAfterTerminate() throws HiveException { return 0; } } public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, - ObjectInspector[] oi2) - { + ObjectInspector[] oi2) { int c = 0; - for (int i = 0; i < oi1.length; i++) - { + for (int i = 0; i < oi1.length; i++) { c = ObjectInspectorUtils.compare(o1[i], oi1[i], o2[i], oi2[i]); if (c != 0) { return c; @@ -235,15 +212,11 @@ public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, return c; } - public static Object[] copyToStandardObject(Object[] o, - ObjectInspector[] oi, - ObjectInspectorCopyOption objectInspectorOption) - { + public static Object[] copyToStandardObject(Object[] o, ObjectInspector[] oi, + ObjectInspectorCopyOption objectInspectorOption) { Object[] out = new Object[o.length]; - for (int i = 0; i < oi.length; i++) - { - out[i] = ObjectInspectorUtils.copyToStandardObject(o[i], oi[i], - objectInspectorOption); + for (int i = 0; i < oi.length; i++) { + out[i] = ObjectInspectorUtils.copyToStandardObject(o[i], oi[i], objectInspectorOption); } return out; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java index 987da3d..d733e2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java @@ -34,110 +34,89 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.IntWritable; -@WindowFunctionDescription -( - description = @Description( - name = "row_number", - value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting from 1, as defined by ORDER BY) to each row within the partition." - ), - supportsWindow = false, - pivotResult = true +@WindowFunctionDescription( + description = @Description( + name = "row_number", + value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting " + + "from 1, as defined by ORDER BY) to each row within the partition." + ), + supportsWindow = false, + pivotResult = true ) -public class GenericUDAFRowNumber extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFRowNumber.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) - throws SemanticException - { - if (parameters.length != 0) - { - throw new UDFArgumentTypeException(parameters.length - 1, - "No argument is expected."); - } - return new GenericUDAFRowNumberEvaluator(); - } - - static class RowNumberBuffer implements AggregationBuffer - { - ArrayList rowNums; - int nextRow; - - void init() - { - rowNums = new ArrayList(); - } - - RowNumberBuffer() - { - init(); - nextRow = 1; - } - - void incr() - { - rowNums.add(new IntWritable(nextRow++)); - } - } - - public static class GenericUDAFRowNumberEvaluator extends - GenericUDAFEvaluator - { - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) - throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException("Only COMPLETE mode supported for row_number function"); - } - - return ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableIntObjectInspector); - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new RowNumberBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((RowNumberBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException - { - ((RowNumberBuffer) agg).incr(); - } - - @Override - public Object terminatePartial(AggregationBuffer agg) - throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) - throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - return ((RowNumberBuffer) agg).rowNums; - } - - } +public class GenericUDAFRowNumber extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFRowNumber.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 0) { + throw new UDFArgumentTypeException(parameters.length - 1, "No argument is expected."); + } + return new GenericUDAFRowNumberEvaluator(); + } + + static class RowNumberBuffer implements AggregationBuffer { + + ArrayList rowNums; + int nextRow; + + void init() { + rowNums = new ArrayList(); + } + + RowNumberBuffer() { + init(); + nextRow = 1; + } + + void incr() { + rowNums.add(new IntWritable(nextRow++)); + } + } + + public static class GenericUDAFRowNumberEvaluator extends GenericUDAFEvaluator { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for row_number function"); + } + + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new RowNumberBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((RowNumberBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + ((RowNumberBuffer) agg).incr(); + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((RowNumberBuffer) agg).rowNums; + } + + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java index 2363ff3..c86a9bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java @@ -46,7 +46,7 @@ public GenericUDFBasePad(String _udfName) { public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 3) { throw new UDFArgumentException(udfName + " requires three arguments. Found :" - + arguments.length); + + arguments.length); } converter1 = checkTextArguments(arguments, 0); converter2 = checkIntArguments(arguments, 1); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java index b3a9e67..b12f856 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseTrim.java @@ -40,14 +40,14 @@ public GenericUDFBaseTrim(String _udfName) { public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentException(udfName + " requires one value argument. Found :" - + arguments.length); + + arguments.length); } PrimitiveObjectInspector argumentOI; if(arguments[0] instanceof PrimitiveObjectInspector) { argumentOI = (PrimitiveObjectInspector) arguments[0]; } else { throw new UDFArgumentException(udfName + " takes only primitive types. found " - + arguments[0].getTypeName()); + + arguments[0].getTypeName()); } switch (argumentOI.getPrimitiveCategory()) { case STRING: @@ -56,7 +56,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen break; default: throw new UDFArgumentException(udfName + " takes only STRING/CHAR/VARCHAR types. Found " - + argumentOI.getPrimitiveCategory()); + + argumentOI.getPrimitiveCategory()); } converter = new TextConverter(argumentOI); return PrimitiveObjectInspectorFactory.writableStringObjectInspector; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java index 9d05e12..9d9ee57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java @@ -51,7 +51,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { boolean invert = (Boolean) ((PrimitiveObjectInspector) argumentOIs[0]) - .getPrimitiveJavaObject(arguments[0].get()); + .getPrimitiveJavaObject(arguments[0].get()); BooleanWritable left = ((BooleanWritable)egt.evaluate(new DeferredObject[] {arguments[1], arguments[2]})); if (left == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java index 5d8bd0d..73ae384 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java @@ -68,7 +68,7 @@ private transient PrimitiveCategory inputType1; private transient PrimitiveCategory inputType2; private IntWritable result = new IntWritable(); - + public GenericUDFDateDiff() { formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } @@ -89,8 +89,8 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen @Override public IntWritable evaluate(DeferredObject[] arguments) throws HiveException { - output = evaluate(convertToDate(inputType1, inputConverter1, arguments[0]), - convertToDate(inputType2, inputConverter2, arguments[1])); + output = evaluate(convertToDate(inputType1, inputConverter1, arguments[0]), + convertToDate(inputType2, inputConverter2, arguments[1])); return output; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java index aed82b3..4d8473f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java @@ -44,8 +44,8 @@ @Description(name = "decode", value = "_FUNC_(bin, str) - Decode the first argument using the second argument character set", extended = "Possible options for the character set are 'US_ASCII', 'ISO-8859-1',\n" + - "'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument\n" + - "is null, the result will also be null") + "'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument\n" + + "is null, the result will also be null") public class GenericUDFDecode extends GenericUDF { private transient CharsetDecoder decoder = null; private transient BinaryObjectInspector bytesOI = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java index 76ee94e..f15a869 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java @@ -46,7 +46,7 @@ protected void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text s // Copy the padding for (int i = 0; i < pos; i += pad.getLength()) { for (int j = 0; j < pad.getLength() && j < pos - i; j++) { - data[i + j] = padTxt[j]; + data[i + j] = padTxt[j]; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java index e436f3a..b512332 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java @@ -48,7 +48,7 @@ protected void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text s // Copy the padding while (pos < len) { for (int i = 0; i < pad.getLength() && i < len - pos; i++) { - data[pos + i] = padTxt[i]; + data[pos + i] = padTxt[i]; } pos += pad.getLength(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java index e449e74..d1397a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java @@ -59,7 +59,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen || (argumentOI instanceof StringObjectInspector) || (argumentOI instanceof VoidObjectInspector))){ throw new UDFArgumentException("Only string, char, varchar or binary data can be cast into binary " + - "data types."); + "data types."); } baConverter = new BinaryConverter(argumentOI, PrimitiveObjectInspectorFactory.writableBinaryObjectInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java index 0336ecd..ea5aeec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java @@ -47,7 +47,7 @@ */ @Description(name = "json_tuple", value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. " + - "All the input parameters and output column types are string.") + "All the input parameters and output column types are string.") public class GenericUDTFJSONTuple extends GenericUDTF { @@ -106,7 +106,7 @@ public StructObjectInspector initialize(ObjectInspector[] args) if (numCols < 1) { throw new UDFArgumentException("json_tuple() takes at least two arguments: " + - "the json string and a path expression"); + "the json string and a path expression"); } for (int i = 0; i < args.length; ++i) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java index dff9ba6..f3ef0f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java @@ -86,7 +86,7 @@ public StructObjectInspector initialize(ObjectInspector[] args) if (numCols < 1) { throw new UDFArgumentException("parse_url_tuple() takes at least two arguments: " + - "the url string and a part name"); + "the url string and a part name"); } for (int i = 0; i < args.length; ++i) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java index 8215fc0..aa48a6c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java @@ -225,7 +225,7 @@ private void validateAndSetupSymbolInfo(MatchPath evaluator, if ( symbolArgsSz % 2 != 0) { throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + - "there are odd number of symbol args"); + "there are odd number of symbol args"); } evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); @@ -253,7 +253,7 @@ private void validateAndSetupSymbolInfo(MatchPath evaluator, PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) { throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + - "must be a boolean expression", symolExprArg.getExpressionTreeString())); + "must be a boolean expression", symolExprArg.getExpressionTreeString())); } evaluator.symInfo.add(symbolName, symolExprArg); } diff --git service/src/java/org/apache/hive/service/auth/KerberosSaslHelper.java service/src/java/org/apache/hive/service/auth/KerberosSaslHelper.java index 4b70558..83c4ee3 100644 --- service/src/java/org/apache/hive/service/auth/KerberosSaslHelper.java +++ service/src/java/org/apache/hive/service/auth/KerberosSaslHelper.java @@ -79,8 +79,8 @@ public static TTransport getKerberosTransport(String principal, String host, } } - public static TTransport createSubjectAssumedTransport(String principal, - TTransport underlyingTransport, Map saslProps) throws IOException { + public static TTransport createSubjectAssumedTransport(String principal, + TTransport underlyingTransport, Map saslProps) throws IOException { TTransport saslTransport = null; final String names[] = principal.split("[/@]"); try {