8月 11th, 2014
HBaseぜ filter遅?
HBaseぜ filterが遅いバグがある(HBASE-9428)
https://issues.apache.org/jira/browse/HBASE-9428
CDHでも、4.3.0, 4,4,0, 4.5.0が該? する。
パッチはこちら。
# wget https://issues.apache.org/jira/secure/attachment/12601412/9428-0.94.txt # cat https://issues.apache.org/jira/secure/attachment/12601412/9428-0.94.txt Index: src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java (revision 1519839) +++ src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java (working copy) @@ -30,6 +30,7 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; +import java.util.Arrays; import java.util.regex.Pattern; /** @@ -91,9 +92,12 @@ @Override public int compareTo(byte[] value, int offset, int length) { + // See HBASE-9428. Make a copy of the relevant part of the byte[], + // or the JDK will copy the entire byte[] during String decode + byte[] tmp = Arrays.copyOfRange(value, offset, offset+length); // Use find() for subsequence match instead of matches() (full sequence // match) to adhere to the principle of least surprise. - return pattern.matcher(new String(value, offset, length, charset)).find() ? 0 + return pattern.matcher(new String(tmp, charset)).find() ? 0 : 1; }