diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TimestampType.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TimestampType.java new file mode 100644 index 0000000..67c93f3 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TimestampType.java @@ -0,0 +1,332 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import org.apache.commons.lang.time.FastDateFormat; + +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +/** + * TimestampType is an enum to represent different ways of encoding time in HBase using 64 bits. + * Time is usually encoded as a 64-bit long in cells timestamps and used for sorting + * cells, ordering writes, snapshot in time queries, as well as TTL feature. + * + *

TimestampType is dumb in a way. It doesn't have any logic other than interpreting the 64 bits. + * Any monotonic increasing or non-decreasing nature of the timestamps is to be taken care by the + * clock. There can be several clock implementations, and each such implementation can map its + * representation of timestamp to one of the Timestamp types ie. {@link #HYBRID} or {@link #PHYSICAL}. + *

+ */ + +public enum TimestampType { + + /** + * Hybrid is a Timestamp type used to encode the physical time and logical time components. + * It has methods to decipher the 64 bit hybrid timestamp and also to construct the + * hybrid timestamp. + */ + HYBRID { + /** + * Hard coded 44-bits for physical time, with most significant bit carrying the sign i.e 0 + * as we are dealing with positive integers and the remaining 43 bits are to be interpreted as + * NTP timestamp. Thus allowing us to represent all the dates between unix epoch (1970) and year + * 2248 with signed timestamp comparison. + */ + @SuppressWarnings("unused") + private static final int BITS_FOR_PHYSICAL_TIME = 44; + + /** + * Remaining 20-bits for logical time, allowing values up to 104,8576. LT is at the least + * significant part of the 64 bit timestamp, so unsigned comparison can be used for LT. + */ + + private static final int BITS_FOR_LOGICAL_TIME = 20; + + /** + * Max value for physical time, inclusive. This assumes signed comparison. + */ + private static final long PHYSICAL_TIME_MAX_VALUE = 0x7ffffffffffL; + + /** + * Max value for logical time, inclusive. + */ + private static final long LOGICAL_TIME_MAX_VALUE = 0xfffffL; + + public long toEpochTimeMillis(long timestamp){ + return getPhysicalTime(timestamp); + } + + public long fromEpochTimeMillis(long timestamp){ + return toTimestamp(TimeUnit.MILLISECONDS, timestamp, 0); + } + + public long toTimestamp(TimeUnit timeUnit, long pt, long lt){ + pt = TimeUnit.MILLISECONDS.convert(pt, timeUnit); + return (pt << BITS_FOR_LOGICAL_TIME) + lt; + } + + public long getPhysicalTime(long timestamp){ + return (timestamp >>> BITS_FOR_LOGICAL_TIME); // assume unsigned timestamp + } + + public long getLogicalTime(long timestamp){ + return timestamp & LOGICAL_TIME_MAX_VALUE; + } + + public long getMaxPhysicalTime(){ + return PHYSICAL_TIME_MAX_VALUE; + } + + public long getMaxLogicalTime(){ + return LOGICAL_TIME_MAX_VALUE; + } + + public int getBitsForLogicalTime() { + return BITS_FOR_LOGICAL_TIME; + } + + /** + * Returns whether the given timestamp is "likely" of the given timestamp type. Timestamp + * implementations can use the full range of 64bit longs to represent physical and logical + * components of time. However, this method returns whether the given timestamp is a likely + * representation depending on heuristics for the clock implementation. + * + *
  • HLC: Timestamps of HLC type are checked whether they belong to HLC range assuming + * that HLC timestamps will only have > 0 lt component for timestamps corresponding to years after + * 2015. This method will return false if lt > 0 and year is before 2015. + * Due to left shifting for HLC, all millisecond-since-epoch timestamps from SYSTEM_MONOTONIC type for + * years 1970-10K fall into year 1970 when interpretted as HLC timestamps. Thus, {@link #isLikelyOfType(long)} + * will return false for timestamps which are in the year 1970 and lt = 0 when interpreted as of + * type HLC. + *
  • + * + *

    Note that this method uses heuristics which may not hold + * if system timestamps are intermixed from client side and server side or timestamp + * sources other than system clock are used.

    + * @return whether the given timestamp is heuristically falls within the expected range and format + * for the Timestamp implementation. + */ + public boolean isLikelyOfType(long timestamp){ + long physicalTime = getPhysicalTime(timestamp); + long logicalTime = getLogicalTime(timestamp); + + // heuristic 1: Up until year 2015 (1420070400000), lt component cannot be non-zero. + if (physicalTime < 1420070400000L && logicalTime != 0) { + return false; + } else if (physicalTime < 31536000000L) { + // heuristic 2: Even if lt = 0, pt should be before year 1971 (31536000000L). + // Due to left shifting by 22, all epoch ms timestamps from SYSTEM_MONOTONIC timestamp + // end up in year 1970, even for epoch time for the year 10000. + // this assumes HLC is not used to represent timestamps for year 1970 UTC. + return false; + } + return true; + } + + public boolean isLikelyOfType(long timestamp, boolean isClockMonotonic){ + return isLikelyOfType(timestamp); + } + + /** + * Returns a string representation for Physical Time and Logical Time components. The format is: + * <yyyy-MM-dd HH:mm:ss:SSS(PT),LT> + * Physical Time is converted to UTC time and not to local time for uniformity. + * Example: <2015-07-17 16:56:35:891(1437177395891), 0> + */ + public String toString(long timestamp){ + long pt = getPhysicalTime(timestamp); + long lt = getLogicalTime(timestamp); + return new StringBuilder("<") + .append(dateFormat.format(pt)) + .append("(").append(pt).append(")") + .append(", ").append(lt) + .append(">").toString(); + } + }, + + /** + * Physical is a Timestamp type used to encode the physical time in 64 bits. + * It has helper methods to decipher the 64 bit encoding of physical time. + */ + PHYSICAL { + public long toEpochTimeMillis(long timestamp) { + return timestamp; + } + + public long fromEpochTimeMillis(long timestamp) { + return timestamp; + } + + public long toTimestamp(TimeUnit timeUnit, long pt, long lt) { + return TimeUnit.MILLISECONDS.convert(pt, timeUnit); + } + + public long getPhysicalTime(long timestamp) { + return timestamp; + } + + public long getLogicalTime(long timestamp) { + return 0; + } + + public long getMaxPhysicalTime() { + return Long.MAX_VALUE; + } + + public long getMaxLogicalTime() { + return 0; + } + + public int getBitsForLogicalTime() { + return 0; + } + + /** + * @param timestamp epoch time in milliseconds + * @return True if the timestamp generated by the clock(Assumed to be Monotonic) is of type {@link #PHYSICAL} else False + */ + public boolean isLikelyOfType(long timestamp) { + return isLikelyOfType(timestamp, true); + } + + public boolean isLikelyOfType(long timestamp, boolean isClockMonotonic) { + // heuristic: the timestamp should be up to year 3K (32503680000000L). + if(!isClockMonotonic) { + return true; + } + return timestamp < 32503680000000L; + } + + /** + * Returns a string representation for Physical Time and Logical Time components. The format is: + * <yyyy-MM-dd HH:mm:ss:SSS(PT),LT> + * Physical Time is converted to UTC time and not to local time for uniformity. + * Example: <2015-07-17 16:56:35:891(1437177395891)> + */ + public String toString(long timestamp) { + long pt = timestamp; + return new StringBuilder("<") + .append(dateFormat.format(pt)) + .append("(").append(pt).append(")") + .append(">").toString(); + } + }; + + /** + * This is used internally by the enum methods of Hybrid and Physical Timestamp types to convert the + * timestamp to format set here. UTC timezone instead of local time zone for convenience and uniformity + */ + private static final FastDateFormat dateFormat + = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss:SSS", TimeZone.getTimeZone("UTC")); + + + /** + * Converts the given timestamp to the unix epoch timestamp with millisecond resolution. + * Returned timestamp is compatible with System.currentTimeMillis(). + * @return number of milliseconds from epoch + */ + public long toEpochTimeMillis(long timestamp){ + throw new AbstractMethodError(); + } + + /** + * Converts the given timestamp to the timestamp representation for the Timestamp implementation. + * @param timestamp epoch time in milliseconds + * @return a timestamp representation for the Timestamp implementation. + */ + public long fromEpochTimeMillis(long timestamp) { + throw new AbstractMethodError(); + } + + /** + * Converts the given physical clock in the given timeunit to a 64-bit timestamp + * @param timeUnit the time unit of physical clock + * @param pt physical time + * @param lt logical time + * @return a timestamp in 64 bits + */ + public long toTimestamp(TimeUnit timeUnit, long pt, long lt) { + throw new AbstractMethodError(); + } + + /** + * Extracts and returns the physical time from the timestamp + * @param timestamp the timestamp + * @return physical time + */ + public long getPhysicalTime(long timestamp) { + throw new AbstractMethodError(); + } + + /** + * Extracts and returns the logical time from the timestamp + * @param timestamp the timestamp + * @return logical time + */ + public long getLogicalTime(long timestamp) { + throw new AbstractMethodError(); + } + + /** + * Returns the maximum time representable by the physical clock + * @return maximum timestamp + */ + public long getMaxPhysicalTime() { + throw new AbstractMethodError(); + } + + /** + * Returns the maximum time representable by the logical clock + * @return maximum timestamp + */ + public long getMaxLogicalTime() { + throw new AbstractMethodError(); + } + + /** + * Returns the number of bits allocated for logical time + * @return maximum timestamp + */ + public int getBitsForLogicalTime() { + throw new AbstractMethodError(); + } + + /** + * + * @param timestamp epoch time in milliseconds + * @param isClockMonotonic if the clock that generated this timestamp is monotonic + * @return True if the timestamp generated by the clock is of type {@link #PHYSICAL} else False + */ + public boolean isLikelyOfType(long timestamp, boolean isClockMonotonic) { + throw new AbstractMethodError(); + } + + public boolean isLikelyOfType(long timestamp) { + throw new AbstractMethodError(); + } + + public String toString(long timestamp) { + long pt = timestamp; + return new StringBuilder("<") + .append(dateFormat.format(pt)) + .append("(").append(pt).append(")") + .append(">").toString(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestTimestampType.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestTimestampType.java new file mode 100644 index 0000000..0183f52 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestTimestampType.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.concurrent.TimeUnit; + +import static org.junit.Assert.*; + +@Category(SmallTests.class) +public class TestTimestampType { + + private static long testPt = 1234567890123L; + private static long testLt = 12; + + /* + * Tests for TimestampType enum + */ + + @Test + public void testFromToEpoch() { + for (TimestampType timestamp : TimestampType.values()) { + long wallTime = System.currentTimeMillis(); + long converted = timestamp.toEpochTimeMillis( + timestamp.fromEpochTimeMillis(wallTime)); + + assertEquals(wallTime, converted); + } + } + + + /* Tests for HL Clock */ + + @Test + public void testHLCMaxValues() { + // assert 42-bit PT with signed comparison (actual 41 bits) + assertEquals( + (1L << (63-TimestampType.HYBRID.getBitsForLogicalTime())) - 1, + TimestampType.HYBRID.getMaxPhysicalTime()); + + // assert 22-bit LT + assertEquals( + (1L << TimestampType.HYBRID.getBitsForLogicalTime()) - 1, + TimestampType.HYBRID.getMaxLogicalTime()); + + // assert that maximum representable timestamp is Long.MAX_VALUE (assuming signed comparison). + assertEquals( + Long.MAX_VALUE, + TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, + TimestampType.HYBRID.getMaxPhysicalTime(), + TimestampType.HYBRID.getMaxLogicalTime()) + ); + } + + @Test + public void testHLCGetPhysicalTime() { + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(testPt, TimestampType.HYBRID.getPhysicalTime(ts)); + } + + @Test + public void testHLCGetLogicalTime() { + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(testLt, TimestampType.HYBRID.getLogicalTime(ts)); + } + + @Test + public void testHLCToString() { + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + + assertEquals("<2009-02-13 23:31:30:123(1234567890123), 12>", TimestampType.HYBRID.toString(ts)); + } + + @Test + public void testHLCToTimestamp() { + long expected = (testPt << TimestampType.HYBRID.getBitsForLogicalTime()) + testLt; + // test millisecond + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(ts, expected); + + // test nanosecond + ts = TimestampType.HYBRID.toTimestamp(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS.toNanos(testPt), testLt); + assertEquals(ts, expected); + } + + @Test + public void testHLCIsLikelyOfType() throws ParseException { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS Z"); + + // test timestamps of HLC type from year 1971 to 2039 where lt = 0 + System.out.println("==="); + for (int year = 1971; year <= 2039; year += 1) { + Date date = dateFormat.parse(year + "-01-01 11:22:33:444 UTC"); + + // HLC type ts with pt = date and lt = 0 + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, date.getTime(), 0); + System.out.println(TimestampType.HYBRID.toString(ts)); + + assertTrue(TimestampType.HYBRID.isLikelyOfType(ts)); + } + + // test timestamps of HLC type from year 2015 to 2039 where lt > 0 + System.out.println("==="); + for (int year = 2015; year <= 2039; year += 1) { + Date date = dateFormat.parse(year + "-01-01 11:22:33:444 UTC"); + + // HLC type ts with pt = date and lt = 123 + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, date.getTime(), 123); + System.out.println(TimestampType.HYBRID.toString(ts)); + + assertTrue(TimestampType.HYBRID.isLikelyOfType(ts)); + } + + // test that timestamps from different years are not HLC type + System.out.println("==="); + for (int year = 1970; year <= 10000 ;year += 10) { + // Stardate 1970 to 10000 + Date date = dateFormat.parse(year + "-01-01 00:00:00:000 UTC"); + long ts = date.getTime(); + System.out.println(TimestampType.PHYSICAL.toString(ts)); + System.out.println(TimestampType.PHYSICAL.toString(TimestampType.HYBRID.getPhysicalTime(ts))); + + assertFalse(TimestampType.HYBRID.isLikelyOfType(ts)); + } + + // test that timestamps up to 2015 are not HLC even if lt = 0 + System.out.println("==="); + for (int year = 1970; year <= 2015; year += 1) { + Date date = dateFormat.parse(year + "-01-01 11:22:33:444 UTC"); + + // reset lt = 0 + long ts = ((date.getTime() + >> TimestampType.HYBRID.getBitsForLogicalTime()) << TimestampType.HYBRID.getBitsForLogicalTime()); + System.out.println(Long.toHexString(ts)); + + System.out.println(TimestampType.PHYSICAL.toString(ts)); + System.out.println(TimestampType.PHYSICAL.toString(TimestampType.HYBRID.getPhysicalTime(ts))); + + assertFalse(TimestampType.HYBRID.isLikelyOfType(ts)); + } + + // test that timestamps from currentTime epoch are not HLC type + System.out.println("==="); + long systemTimeNow = System.currentTimeMillis(); + System.out.println(TimestampType.PHYSICAL.toString(systemTimeNow)); + System.out.println(TimestampType.PHYSICAL.toString((TimestampType.HYBRID.getPhysicalTime(systemTimeNow)))); + assertFalse(TimestampType.HYBRID.isLikelyOfType(systemTimeNow)); + } + + + @Test + public void testSystemClockMaxValues() { + assertEquals( + (1L << 63) - 1, + TimestampType.PHYSICAL.getMaxPhysicalTime()); + + assertEquals(0, TimestampType.PHYSICAL.getMaxLogicalTime()); + } + + @Test + public void testSystemClockGetPhysicalTime() { + long ts = TimestampType.PHYSICAL.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(testPt, TimestampType.PHYSICAL.getPhysicalTime(ts)); + } + + @Test + public void testSystemClockGetLogicalTime() { + long ts = TimestampType.PHYSICAL.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(0, TimestampType.PHYSICAL.getLogicalTime(ts)); + } + + @Test + public void testSystemClockToString() { + long ts = TimestampType.PHYSICAL.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + + assertEquals("<2009-02-13 23:31:30:123(1234567890123)>", TimestampType.PHYSICAL.toString(ts)); + } + + @Test + public void testSystemClockToTimestamp() { + // test millisecond + long ts = TimestampType.PHYSICAL.toTimestamp(TimeUnit.MILLISECONDS, testPt, testLt); + assertEquals(ts, testPt); + + // test nanosecond + ts = TimestampType.PHYSICAL.toTimestamp(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS.toNanos(testPt), testLt); + assertEquals(ts, testPt); + } + + @Test + public void testSystemClockIsLikelyOfType() throws ParseException { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS Z"); + + // test that timestamps from 1970 to 3K epoch are of SYSTEM type + System.out.println("==="); + for (int year = 1970; year < 3000 ;year += 10) { + // Stardate 1970 to 10000 + Date date = dateFormat.parse(year + "-01-01 00:00:00:000 UTC"); + long ts = date.getTime(); + System.out.println(TimestampType.PHYSICAL.toString(ts)); + System.out.println(TimestampType.PHYSICAL.toString(TimestampType.HYBRID.getPhysicalTime(ts))); + + assertTrue(TimestampType.PHYSICAL.isLikelyOfType(ts)); + } + + // test that timestamps from currentTime epoch are of SYSTEM type + System.out.println("==="); + long systemTimeNow = System.currentTimeMillis(); + System.out.println(TimestampType.PHYSICAL.toString(systemTimeNow)); + assertTrue(TimestampType.PHYSICAL.isLikelyOfType(systemTimeNow)); + + // test timestamps of HLC type from year 1970 to 2039 are not of SYSTEM type + System.out.println("==="); + for (int year = 1970; year <= 2039; year += 1) { + Date date = dateFormat.parse(year + "-01-01 11:22:33:444 UTC"); + + // HLC type ts with pt = date and lt = 0 + long ts = TimestampType.HYBRID.toTimestamp(TimeUnit.MILLISECONDS, date.getTime(), 0); + System.out.println(TimestampType.HYBRID.toString(ts)); + System.out.println(TimestampType.PHYSICAL.toString(ts)); + + assertFalse(TimestampType.PHYSICAL.isLikelyOfType(ts)); + } + } +} \ No newline at end of file