From 1e46300451a526482a3c17415c0ba52f2bad2602 Mon Sep 17 00:00:00 2001 From: Panos Garefalakis Date: Mon, 2 Mar 2020 16:16:35 +0000 Subject: [PATCH 1/3] Expose FilterContext as part of Hive storage-api --- .../hive/ql/io/filter/FilterContext.java | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java new file mode 100644 index 0000000000..d00c42963d --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.filter; + +/** + * A representation of a Filter applied on the rows of a VectorizedRowBatch + * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}. + * + * Each FilterContext consists of an array with the ids (int) of rows that are selected + * by the filter, an integer representing the number of selected rows, and a boolean showing + * if the filter actually selected any rows. + * + */ +public class FilterContext { + private boolean currBatchIsSelectedInUse = false; + private int[] currBatchSelected = null; + private int currBatchSelectedSize = 0; + + /** + * Empty constructor + */ + public FilterContext(){}; + + /** + * Update context with the given values + * @param isSelectedInUse if the filter is applied + * @param selected an array of the selected rows + * @param selectedSize the number of the selected rows + */ + public void updateFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) { + this.currBatchIsSelectedInUse = isSelectedInUse; + this.currBatchSelected = selected; + this.currBatchSelectedSize = selectedSize; + } + + /** + * Copy context variables from the a given FilterContext + * @param other FilterContext to copy from + */ + public void copyFilterContextFromOther(FilterContext other) { + this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; + this.currBatchSelected = other.currBatchSelected; + this.currBatchSelectedSize = other.currBatchSelectedSize; + } + + /** + * Reset FilterContext variables + */ + public void resetFilterContext() { + this.currBatchIsSelectedInUse = false; + this.currBatchSelected = null; + this.currBatchSelectedSize = 0; + } + + /** + * Set the selectedInUse boolean showing if the filter is applied + * @param selectedInUse + */ + public void setSelectedInUse(boolean selectedInUse) { + this.currBatchIsSelectedInUse = selectedInUse; + } + + /** + * Is the filter applied? + * @return true if the filter is actually applied + */ + public boolean isSelectedInUse() { + return this.currBatchIsSelectedInUse; + } + + /** + * Set the array of the rows that pass the filter + * @param selectedArray + */ + public void setSelected(int[] selectedArray) { + this.currBatchSelected = selectedArray; + } + + /** + * Return an int array with the rows that pass the filter + * @return int array + */ + public int[] getSelected() { + return this.currBatchSelected; + } + + /** + * Set the number of the rows that pass the filter + * @param selectedSize + */ + public void setSelectedSize(int selectedSize) { + this.currBatchSelectedSize = selectedSize; + } + + /** + * Return the number of rows that pass the filter + * @return an int + */ + public int getSelectedSize() { + return this.currBatchSelectedSize; + } +} -- 2.20.1 (Apple Git-117) From 2dccae8d6057256fb0f4bde1bd505e25cfeef2dc Mon Sep 17 00:00:00 2001 From: Panos Garefalakis Date: Fri, 6 Mar 2020 12:49:17 +0000 Subject: [PATCH 2/3] Addressing first round of comments by making FilterContext class immutable and exposing a MutableFilterContext subclass that can be edited. Added Test cases demonstrating the functionality. --- .../hive/ql/io/filter/TestFilterContext.java | 107 ++++++++++++++++ .../hive/ql/io/filter/FilterContext.java | 72 +++-------- .../ql/io/filter/MutableFilterContext.java | 121 ++++++++++++++++++ 3 files changed, 243 insertions(+), 57 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java create mode 100644 storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java new file mode 100644 index 0000000000..495321543b --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java @@ -0,0 +1,107 @@ +package org.apache.hadoop.hive.ql.io.filter; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +/** + * Test creation and manipulation of MutableFilterContext and FilterContext. + */ +public class TestFilterContext { + + private int[] makeValidSelected() { + int[] selected = new int[512]; + for (int i=0; i < selected.length; i++){ + selected[i] = i*2; + } + return selected; + } + + private int[] makeInvalidSelected() { + int[] selected = new int[512]; + Arrays.fill(selected, 1); + return selected; + } + + @Test + public void testInitFilterContext(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + int[] selected = makeValidSelected(); + + mutableFilterContext.setFilterContext(true, selected, selected.length); + FilterContext filterContext = mutableFilterContext.immutable(); + + Assert.assertEquals(true, filterContext.isSelectedInUse()); + Assert.assertEquals(512, filterContext.getSelectedSize()); + Assert.assertEquals(512, filterContext.getSelected().length); + } + + + @Test + public void testResetFilterContext(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + int[] selected = makeValidSelected(); + + mutableFilterContext.setFilterContext(true, selected, selected.length); + FilterContext filterContext = mutableFilterContext.immutable(); + + Assert.assertEquals(true, filterContext.isSelectedInUse()); + Assert.assertEquals(512, filterContext.getSelectedSize()); + Assert.assertEquals(512, filterContext.getSelected().length); + + filterContext.resetFilterContext(); + + Assert.assertEquals(false, filterContext.isSelectedInUse()); + Assert.assertEquals(0, filterContext.getSelectedSize()); + Assert.assertEquals(null, filterContext.getSelected()); + } + + @Test(expected=AssertionError.class) + public void testInitInvalidFilterContext(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + int[] selected = makeInvalidSelected(); + + mutableFilterContext.setFilterContext(true, selected, selected.length); + } + + + @Test + public void testCopyFilterContext(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + int[] selected = makeValidSelected(); + + mutableFilterContext.setFilterContext(true, selected, selected.length); + + MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext(); + mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1); + + mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy); + FilterContext filterContext = mutableFilterContext.immutable(); + + Assert.assertEquals(true, filterContext.isSelectedInUse()); + Assert.assertEquals(1, filterContext.getSelectedSize()); + Assert.assertEquals(100, filterContext.getSelected()[0]); + // make sure we kept the remaining array space + Assert.assertEquals(512, filterContext.getSelected().length); + } + + + @Test + public void testBorrowSelected(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2); + + int[] borrowedSelected = mutableFilterContext.borrowSelected(1); + // make sure we borrowed the existing array + Assert.assertEquals(2, borrowedSelected.length); + Assert.assertEquals(100, borrowedSelected[0]); + Assert.assertEquals(200, borrowedSelected[1]); + + borrowedSelected = mutableFilterContext.borrowSelected(3); + Assert.assertEquals(3, borrowedSelected.length); + Assert.assertEquals(0, borrowedSelected[0]); + Assert.assertEquals(0, borrowedSelected[1]); + Assert.assertEquals(0, borrowedSelected[2]); + } +} diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java index d00c42963d..dd620b861b 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java @@ -21,41 +21,19 @@ * A representation of a Filter applied on the rows of a VectorizedRowBatch * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}. * - * Each FilterContext consists of an array with the ids (int) of rows that are selected - * by the filter, an integer representing the number of selected rows, and a boolean showing - * if the filter actually selected any rows. + * Each FilterContext consists of an array with the ids (int) of rows that are selected by the + * filter, an integer representing the number of selected rows, and a boolean showing if the filter + * actually selected any rows. * */ -public class FilterContext { - private boolean currBatchIsSelectedInUse = false; - private int[] currBatchSelected = null; - private int currBatchSelectedSize = 0; +public abstract class FilterContext { - /** - * Empty constructor - */ - public FilterContext(){}; - - /** - * Update context with the given values - * @param isSelectedInUse if the filter is applied - * @param selected an array of the selected rows - * @param selectedSize the number of the selected rows - */ - public void updateFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) { - this.currBatchIsSelectedInUse = isSelectedInUse; - this.currBatchSelected = selected; - this.currBatchSelectedSize = selectedSize; - } + protected boolean currBatchIsSelectedInUse = false; + protected int[] currBatchSelected = null; + protected int currBatchSelectedSize = 0; - /** - * Copy context variables from the a given FilterContext - * @param other FilterContext to copy from - */ - public void copyFilterContextFromOther(FilterContext other) { - this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; - this.currBatchSelected = other.currBatchSelected; - this.currBatchSelectedSize = other.currBatchSelectedSize; + public FilterContext() { + super(); } /** @@ -67,16 +45,9 @@ public void resetFilterContext() { this.currBatchSelectedSize = 0; } - /** - * Set the selectedInUse boolean showing if the filter is applied - * @param selectedInUse - */ - public void setSelectedInUse(boolean selectedInUse) { - this.currBatchIsSelectedInUse = selectedInUse; - } - /** * Is the filter applied? + * * @return true if the filter is actually applied */ public boolean isSelectedInUse() { @@ -84,34 +55,21 @@ public boolean isSelectedInUse() { } /** - * Set the array of the rows that pass the filter - * @param selectedArray - */ - public void setSelected(int[] selectedArray) { - this.currBatchSelected = selectedArray; - } - - /** - * Return an int array with the rows that pass the filter + * Return an int array with the rows that pass the filter. + * Do not modify the array returned! + * * @return int array */ public int[] getSelected() { return this.currBatchSelected; } - /** - * Set the number of the rows that pass the filter - * @param selectedSize - */ - public void setSelectedSize(int selectedSize) { - this.currBatchSelectedSize = selectedSize; - } - /** * Return the number of rows that pass the filter + * * @return an int */ public int getSelectedSize() { return this.currBatchSelectedSize; } -} +} \ No newline at end of file diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java new file mode 100644 index 0000000000..cc33d0ebd5 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.filter; + +import java.util.Arrays; + +/** + * A representation of a Filter applied on the rows of a VectorizedRowBatch + * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}. + * + * Each FilterContext consists of an array with the ids (int) of rows that are selected by the + * filter, an integer representing the number of selected rows, and a boolean showing if the filter + * actually selected any rows. + * + */ +public class MutableFilterContext extends FilterContext { + + /** + * Set context with the given values by reference + * + * @param isSelectedInUse if the filter is applied + * @param selected an array of the selected rows + * @param selectedSize the number of the selected rows + */ + public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) { + this.currBatchIsSelectedInUse = isSelectedInUse; + this.currBatchSelected = selected; + this.currBatchSelectedSize = selectedSize; + // Avoid selected.length < selectedSize since we can borrow a larger array for selected + // debug loop for checking if selected is in order without duplicates (i.e [1,1,1] is illegal) + for (int i = 0; i < selectedSize-1; i++) + assert selected[i] < selected[i+1]; + } + + /** + * Copy context variables from the a given FilterContext. + * Always does a deep copy of the data. + * + * @param other FilterContext to copy from + */ + public void copyFilterContextFrom(MutableFilterContext other) { + // assert if copying into self + assert this != other; + + if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) { + // note: still allocating a full size buffer, for later use + this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length); + } else { + System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize); + } + this.currBatchSelectedSize = other.currBatchSelectedSize; + this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; + } + + /** + * Borrow the current selected array to be modified if it satisfies minimum capacity. + * If it is too small or unset, allocates one. + * This method never returns null! + * + * @param minCapacity + * @return the current selected array to be modified + */ + public int[] borrowSelected(int minCapacity) { + int[] existing = this.currBatchSelected; + this.currBatchSelected = null; + if (existing == null || existing.length < minCapacity) { + return new int[minCapacity]; + } + return existing; + } + + /** + * Get the immutable version of the current FilterContext + * @return + */ + public FilterContext immutable(){ + return this; + } + + /** + * Set the selectedInUse boolean showing if the filter is applied + * + * @param selectedInUse + */ + public void setSelectedInUse(boolean selectedInUse) { + this.currBatchIsSelectedInUse = selectedInUse; + } + + /** + * Set the array of the rows that pass the filter by reference + * + * @param selectedArray + */ + public void setSelected(int[] selectedArray) { + this.currBatchSelected = selectedArray; + } + + /** + * Set the number of the rows that pass the filter + * + * @param selectedSize + */ + public void setSelectedSize(int selectedSize) { + this.currBatchSelectedSize = selectedSize; + } +} -- 2.20.1 (Apple Git-117) From 05ac9278eb9ec58682398705724042fdeec70e3a Mon Sep 17 00:00:00 2001 From: Panos Garefalakis Date: Fri, 6 Mar 2020 18:23:55 +0000 Subject: [PATCH 3/3] Addressing github comments --- .../hive/ql/io/filter/TestFilterContext.java | 17 ++++++++ .../ql/io/filter/MutableFilterContext.java | 39 +++++++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java index 495321543b..0bda6203d1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.ql.io.filter; import org.junit.Assert; diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java index cc33d0ebd5..f7ad6ba818 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java @@ -42,9 +42,8 @@ public void setFilterContext(boolean isSelectedInUse, int[] selected, int select this.currBatchSelected = selected; this.currBatchSelectedSize = selectedSize; // Avoid selected.length < selectedSize since we can borrow a larger array for selected - // debug loop for checking if selected is in order without duplicates (i.e [1,1,1] is illegal) - for (int i = 0; i < selectedSize-1; i++) - assert selected[i] < selected[i+1]; + // Debug loop for selected array: use without assert when needed (asserts only fail in testing) + assert isValidSelected() : "Selected array may not contain duplicates or unordered values"; } /** @@ -54,17 +53,33 @@ public void setFilterContext(boolean isSelectedInUse, int[] selected, int select * @param other FilterContext to copy from */ public void copyFilterContextFrom(MutableFilterContext other) { - // assert if copying into self - assert this != other; + // assert if copying into self (can fail only in testing) + assert this != other: "May not copy a FilterContext to itself"; - if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) { - // note: still allocating a full size buffer, for later use - this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length); - } else { - System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize); + if (this != other) { + if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) { + // note: still allocating a full size buffer, for later use + this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length); + } else { + System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize); + } + this.currBatchSelectedSize = other.currBatchSelectedSize; + this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; } - this.currBatchSelectedSize = other.currBatchSelectedSize; - this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; + } + + /** + * Validate method checking if existing selected array contains values that + * are in order and does not without duplicates i.e [1,1,1] is illegal + * + * @return true if the selected array is valid + */ + public boolean isValidSelected() { + for (int i = 1; i < this.currBatchSelectedSize; i++) { + if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) + return false; + } + return true; } /** -- 2.20.1 (Apple Git-117)