001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Comparator;
024import java.util.List;
025import java.util.Objects;
026import java.util.PriorityQueue;
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.PrivateCellUtil;
030import org.apache.hadoop.hbase.exceptions.DeserializationException;
031import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent;
032import org.apache.hadoop.hbase.util.Bytes;
033import org.apache.hadoop.hbase.util.Pair;
034import org.apache.yetus.audience.InterfaceAudience;
035
036import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
037import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
038
039import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;
041
042/**
043 * This is optimized version of a standard FuzzyRowFilter Filters data based on fuzzy row key.
044 * Performs fast-forwards during scanning. It takes pairs (row key, fuzzy info) to match row keys.
045 * Where fuzzy info is a byte array with 0 or 1 as its values:
046 * <ul>
047 * <li>0 - means that this byte in provided row key is fixed, i.e. row key's byte at same position
048 * must match</li>
049 * <li>1 - means that this byte in provided row key is NOT fixed, i.e. row key's byte at this
050 * position can be different from the one in provided row key</li>
051 * </ul>
052 * Example:
053 * <p/>
054 * Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and is 4,
055 * length of actionId is 2 and year and month are 4 and 2 bytes long respectively.
056 * <p/>
057 * Let's assume that we need to fetch all users that performed certain action (encoded as "99") in
058 * Jan of any year. Then the pair (row key, fuzzy info) would be the following:
059 *
060 * <pre>
061 * row key = "????_99_????_01" (one can use any value instead of "?")
062 * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00"
063 * </pre>
064 *
065 * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ? can be any value.
066 */
067@InterfaceAudience.Public
068public class FuzzyRowFilter extends FilterBase implements HintingFilter {
069  private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned();
070
071  // the wildcard byte is 1 on the user side. but the filter converts it internally
072  // in preprocessMask. This was changed in HBASE-15676 due to a bug with using 0.
073  // in v1, the 1 byte gets converted to 0
074  // in v2, the 1 byte gets converted to 2.
075  // we support both here to ensure backwards compatibility between client and server
076  static final byte V1_PROCESSED_WILDCARD_MASK = 0;
077  static final byte V2_PROCESSED_WILDCARD_MASK = 2;
078
079  private final byte processedWildcardMask;
080  private final List<Pair<byte[], byte[]>> fuzzyKeysData;
081  // Used to record whether we want to skip the current row.
082  // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey
083  // returns true, we will just skip to next row, instead of calling getNextCellHint to determine
084  // the actual next row, so we need to implement filterCell and return SEEK_NEXT_USING_HINT to let
085  // upper layer call getNextCellHint.
086  // And if we do not implement filterRow, sometimes we will get incorrect result when using
087  // FuzzyRowFilter together with other filters, please see the description for HBASE-26967 for more
088  // details.
089  private boolean filterRow;
090  private boolean done = false;
091
092  /**
093   * The index of a last successfully found matching fuzzy string (in fuzzyKeysData). We will start
094   * matching next KV with this one. If they do not match then we will return back to the one-by-one
095   * iteration over fuzzyKeysData.
096   */
097  private int lastFoundIndex = -1;
098
099  /**
100   * Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned)
101   */
102  private final RowTracker tracker;
103
104  // this client side constructor ensures that all client-constructed
105  // FuzzyRowFilters use the new v2 mask.
106  public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
107    this(fuzzyKeysData, V2_PROCESSED_WILDCARD_MASK);
108  }
109
110  // This constructor is only used internally here, when parsing from protos on the server side.
111  // It exists to enable seamless migration from v1 to v2.
112  // Additionally used in tests, but never used on client side.
113  FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData, byte processedWildcardMask) {
114    this.processedWildcardMask = processedWildcardMask;
115
116    List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
117
118    for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) {
119      if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) {
120        Pair<String, String> readable = new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()),
121          Bytes.toStringBinary(aFuzzyKeysData.getSecond()));
122        throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
123      }
124
125      Pair<byte[], byte[]> p = new Pair<>();
126      // create a copy of pair bytes so that they are not modified by the filter.
127      p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length));
128      p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length));
129
130      // update mask ( 0 -> -1 (0xff), 1 -> [0 or 2 depending on processedWildcardMask value])
131      p.setSecond(preprocessMask(p.getSecond()));
132      preprocessSearchKey(p);
133
134      fuzzyKeyDataCopy.add(p);
135    }
136    this.fuzzyKeysData = fuzzyKeyDataCopy;
137    this.tracker = new RowTracker();
138  }
139
140  private void preprocessSearchKey(Pair<byte[], byte[]> p) {
141    if (!UNSAFE_UNALIGNED) {
142      // do nothing
143      return;
144    }
145    byte[] key = p.getFirst();
146    byte[] mask = p.getSecond();
147    for (int i = 0; i < mask.length; i++) {
148      // set non-fixed part of a search key to 0.
149      if (mask[i] == processedWildcardMask) {
150        key[i] = 0;
151      }
152    }
153  }
154
155  /**
156   * We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as
157   * fixed positions
158   * @return mask array
159   */
160  private byte[] preprocessMask(byte[] mask) {
161    if (!UNSAFE_UNALIGNED) {
162      // do nothing
163      return mask;
164    }
165    if (isPreprocessedMask(mask)) return mask;
166    for (int i = 0; i < mask.length; i++) {
167      if (mask[i] == 0) {
168        mask[i] = -1; // 0 -> -1
169      } else if (mask[i] == 1) {
170        mask[i] = processedWildcardMask;// 1 -> 0 or 2 depending on mask version
171      }
172    }
173    return mask;
174  }
175
176  private boolean isPreprocessedMask(byte[] mask) {
177    for (int i = 0; i < mask.length; i++) {
178      if (mask[i] != -1 && mask[i] != processedWildcardMask) {
179        return false;
180      }
181    }
182    return true;
183  }
184
185  /**
186   * Returns the Fuzzy keys in the format expected by the constructor.
187   * @return the Fuzzy keys in the format expected by the constructor
188   */
189  public List<Pair<byte[], byte[]>> getFuzzyKeys() {
190    List<Pair<byte[], byte[]>> returnList = new ArrayList<>(fuzzyKeysData.size());
191    for (Pair<byte[], byte[]> fuzzyKey : fuzzyKeysData) {
192      Pair<byte[], byte[]> returnKey = new Pair<>();
193      // This won't revert the original key's don't care values, but we don't care.
194      returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length));
195      byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length);
196      if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) {
197        // Revert the preprocessing.
198        for (int i = 0; i < returnMask.length; i++) {
199          if (returnMask[i] == -1) {
200            returnMask[i] = 0; // -1 >> 0
201          } else if (returnMask[i] == processedWildcardMask) {
202            returnMask[i] = 1; // 0 or 2 >> 1 depending on mask version
203          }
204        }
205      }
206      returnKey.setSecond(returnMask);
207      returnList.add(returnKey);
208    }
209    return returnList;
210  }
211
212  @Deprecated
213  @Override
214  public ReturnCode filterKeyValue(final Cell c) {
215    return filterCell(c);
216  }
217
218  @Override
219  public void reset() throws IOException {
220    filterRow = false;
221  }
222
223  @Override
224  public boolean filterRow() throws IOException {
225    return filterRow;
226  }
227
228  @Override
229  public ReturnCode filterCell(final Cell c) {
230    final int startIndex = Math.max(lastFoundIndex, 0);
231    final int size = fuzzyKeysData.size();
232    for (int i = startIndex; i < size + startIndex; i++) {
233      final int index = i % size;
234      Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
235      idempotentMaskShift(fuzzyData.getSecond());
236      SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(),
237        c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
238      if (satisfiesCode == SatisfiesCode.YES) {
239        lastFoundIndex = index;
240        return ReturnCode.INCLUDE;
241      }
242    }
243    // NOT FOUND -> seek next using hint
244    lastFoundIndex = -1;
245    filterRow = true;
246    return ReturnCode.SEEK_NEXT_USING_HINT;
247  }
248
249  static void idempotentMaskShift(byte[] mask) {
250    // This shift is idempotent - always end up with 0 and -1 as mask values.
251    // This works regardless of mask version, because both 0 >> 2 and 2 >> 2
252    // result in 0.
253    for (int j = 0; j < mask.length; j++) {
254      mask[j] >>= 2;
255    }
256  }
257
258  @Override
259  public Cell getNextCellHint(Cell currentCell) {
260    boolean result = tracker.updateTracker(currentCell);
261    if (!result) {
262      done = true;
263      return null;
264    }
265    byte[] nextRowKey = tracker.nextRow();
266    return PrivateCellUtil.createFirstOnRow(nextRowKey, 0, (short) nextRowKey.length);
267  }
268
269  /**
270   * If we have multiple fuzzy keys, row tracker should improve overall performance. It calculates
271   * all next rows (one per every fuzzy key) and put them (the fuzzy key is bundled) into a priority
272   * queue so that the smallest row key always appears at queue head, which helps to decide the
273   * "Next Cell Hint". As scanning going on, the number of candidate rows in the RowTracker will
274   * remain the size of fuzzy keys until some of the fuzzy keys won't possibly have matches any
275   * more.
276   */
277  private class RowTracker {
278    private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
279    private boolean initialized = false;
280
281    RowTracker() {
282      nextRows = new PriorityQueue<>(fuzzyKeysData.size(),
283        new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
284          @Override
285          public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
286            Pair<byte[], Pair<byte[], byte[]>> o2) {
287            return isReversed()
288              ? Bytes.compareTo(o2.getFirst(), o1.getFirst())
289              : Bytes.compareTo(o1.getFirst(), o2.getFirst());
290          }
291        });
292    }
293
294    byte[] nextRow() {
295      if (nextRows.isEmpty()) {
296        throw new IllegalStateException("NextRows should not be empty, "
297          + "make sure to call nextRow() after updateTracker() return true");
298      } else {
299        return nextRows.peek().getFirst();
300      }
301    }
302
303    boolean updateTracker(Cell currentCell) {
304      if (!initialized) {
305        for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
306          updateWith(currentCell, fuzzyData);
307        }
308        initialized = true;
309      } else {
310        while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
311          Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
312          Pair<byte[], byte[]> fuzzyData = head.getSecond();
313          updateWith(currentCell, fuzzyData);
314        }
315      }
316      return !nextRows.isEmpty();
317    }
318
319    boolean lessThan(Cell currentCell, byte[] nextRowKey) {
320      int compareResult =
321        CellComparator.getInstance().compareRows(currentCell, nextRowKey, 0, nextRowKey.length);
322      return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
323    }
324
325    void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
326      byte[] nextRowKeyCandidate =
327        getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
328          currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
329      if (nextRowKeyCandidate != null) {
330        nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData));
331      }
332    }
333
334  }
335
336  @Override
337  public boolean filterAllRemaining() {
338    return done;
339  }
340
341  /** Returns The filter serialized using pb */
342  @Override
343  public byte[] toByteArray() {
344    FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder()
345      .setIsMaskV2(processedWildcardMask == V2_PROCESSED_WILDCARD_MASK);
346    for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
347      BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
348      bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst()));
349      bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond()));
350      builder.addFuzzyKeysData(bbpBuilder);
351    }
352    return builder.build().toByteArray();
353  }
354
355  /**
356   * Parse a serialized representation of {@link FuzzyRowFilter}
357   * @param pbBytes A pb serialized {@link FuzzyRowFilter} instance
358   * @return An instance of {@link FuzzyRowFilter} made from <code>bytes</code>
359   * @throws DeserializationException if an error occurred
360   * @see #toByteArray
361   */
362  public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
363    FilterProtos.FuzzyRowFilter proto;
364    try {
365      proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
366    } catch (InvalidProtocolBufferException e) {
367      throw new DeserializationException(e);
368    }
369    int count = proto.getFuzzyKeysDataCount();
370    ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<>(count);
371    for (int i = 0; i < count; ++i) {
372      BytesBytesPair current = proto.getFuzzyKeysData(i);
373      byte[] keyBytes = current.getFirst().toByteArray();
374      byte[] keyMeta = current.getSecond().toByteArray();
375      fuzzyKeysData.add(new Pair<>(keyBytes, keyMeta));
376    }
377    byte processedWildcardMask = proto.hasIsMaskV2() && proto.getIsMaskV2()
378      ? V2_PROCESSED_WILDCARD_MASK
379      : V1_PROCESSED_WILDCARD_MASK;
380    return new FuzzyRowFilter(fuzzyKeysData, processedWildcardMask);
381  }
382
383  @Override
384  public String toString() {
385    final StringBuilder sb = new StringBuilder();
386    sb.append("FuzzyRowFilter");
387    sb.append("{fuzzyKeysData=");
388    for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
389      sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
390      sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
391    }
392    sb.append("}, ");
393    return sb.toString();
394  }
395
396  // Utility methods
397
398  static enum SatisfiesCode {
399    /** row satisfies fuzzy rule */
400    YES,
401    /** row doesn't satisfy fuzzy rule, but there's possible greater row that does */
402    NEXT_EXISTS,
403    /** row doesn't satisfy fuzzy rule and there's no greater row that does */
404    NO_NEXT
405  }
406
407  @InterfaceAudience.Private
408  static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
409    return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
410  }
411
412  @InterfaceAudience.Private
413  static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
414    byte[] fuzzyKeyMeta) {
415    return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
416  }
417
418  static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
419    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
420
421    if (!UNSAFE_UNALIGNED) {
422      return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
423    }
424
425    if (row == null) {
426      // do nothing, let scan to proceed
427      return SatisfiesCode.YES;
428    }
429    length = Math.min(length, fuzzyKeyBytes.length);
430    int numWords = length / Bytes.SIZEOF_LONG;
431
432    int j = numWords << 3; // numWords * SIZEOF_LONG;
433
434    for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
435      long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i);
436      long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i);
437      long rowValue = Bytes.toLong(row, offset + i);
438      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
439        // We always return NEXT_EXISTS
440        return SatisfiesCode.NEXT_EXISTS;
441      }
442    }
443
444    int off = j;
445
446    if (length - off >= Bytes.SIZEOF_INT) {
447      int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off);
448      int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off);
449      int rowValue = Bytes.toInt(row, offset + off);
450      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
451        // We always return NEXT_EXISTS
452        return SatisfiesCode.NEXT_EXISTS;
453      }
454      off += Bytes.SIZEOF_INT;
455    }
456
457    if (length - off >= Bytes.SIZEOF_SHORT) {
458      short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off);
459      short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off);
460      short rowValue = Bytes.toShort(row, offset + off);
461      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
462        // We always return NEXT_EXISTS
463        // even if it does not (in this case getNextForFuzzyRule
464        // will return null)
465        return SatisfiesCode.NEXT_EXISTS;
466      }
467      off += Bytes.SIZEOF_SHORT;
468    }
469
470    if (length - off >= Bytes.SIZEOF_BYTE) {
471      int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
472      int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
473      int rowValue = row[offset + off] & 0xff;
474      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
475        // We always return NEXT_EXISTS
476        return SatisfiesCode.NEXT_EXISTS;
477      }
478    }
479    return SatisfiesCode.YES;
480  }
481
482  static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
483    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
484    if (row == null) {
485      // do nothing, let scan to proceed
486      return SatisfiesCode.YES;
487    }
488
489    Order order = Order.orderFor(reverse);
490    boolean nextRowKeyCandidateExists = false;
491
492    for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
493      // First, checking if this position is fixed and not equals the given one
494      boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
495      boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
496      if (fixedByteIncorrect) {
497        // in this case there's another row that satisfies fuzzy rule and bigger than this row
498        if (nextRowKeyCandidateExists) {
499          return SatisfiesCode.NEXT_EXISTS;
500        }
501
502        // If this row byte is less than fixed then there's a byte array bigger than
503        // this row and which satisfies the fuzzy rule. Otherwise there's no such byte array:
504        // this row is simply bigger than any byte array that satisfies the fuzzy rule
505        boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
506        if (rowByteLessThanFixed && !reverse) {
507          return SatisfiesCode.NEXT_EXISTS;
508        } else if (!rowByteLessThanFixed && reverse) {
509          return SatisfiesCode.NEXT_EXISTS;
510        } else {
511          return SatisfiesCode.NO_NEXT;
512        }
513      }
514
515      // Second, checking if this position is not fixed and byte value is not the biggest. In this
516      // case there's a byte array bigger than this row and which satisfies the fuzzy rule. To get
517      // bigger byte array that satisfies the rule we need to just increase this byte
518      // (see the code of getNextForFuzzyRule below) by one.
519      // Note: if non-fixed byte is already at biggest value, this doesn't allow us to say there's
520      // bigger one that satisfies the rule as it can't be increased.
521      if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
522        nextRowKeyCandidateExists = true;
523      }
524    }
525    return SatisfiesCode.YES;
526  }
527
528  @InterfaceAudience.Private
529  static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
530    return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
531  }
532
533  @InterfaceAudience.Private
534  static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
535    byte[] fuzzyKeyMeta) {
536    return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
537  }
538
539  /** Abstracts directional comparisons based on scan direction. */
540  private enum Order {
541    ASC {
542      @Override
543      public boolean lt(int lhs, int rhs) {
544        return lhs < rhs;
545      }
546
547      @Override
548      public boolean gt(int lhs, int rhs) {
549        return lhs > rhs;
550      }
551
552      @Override
553      public byte inc(byte val) {
554        // TODO: what about over/underflow?
555        return (byte) (val + 1);
556      }
557
558      @Override
559      public boolean isMax(byte val) {
560        return val == (byte) 0xff;
561      }
562
563      @Override
564      public byte min() {
565        return 0;
566      }
567    },
568    DESC {
569      @Override
570      public boolean lt(int lhs, int rhs) {
571        return lhs > rhs;
572      }
573
574      @Override
575      public boolean gt(int lhs, int rhs) {
576        return lhs < rhs;
577      }
578
579      @Override
580      public byte inc(byte val) {
581        // TODO: what about over/underflow?
582        return (byte) (val - 1);
583      }
584
585      @Override
586      public boolean isMax(byte val) {
587        return val == 0;
588      }
589
590      @Override
591      public byte min() {
592        return (byte) 0xFF;
593      }
594    };
595
596    public static Order orderFor(boolean reverse) {
597      return reverse ? DESC : ASC;
598    }
599
600    /** Returns true when {@code lhs < rhs}. */
601    public abstract boolean lt(int lhs, int rhs);
602
603    /** Returns true when {@code lhs > rhs}. */
604    public abstract boolean gt(int lhs, int rhs);
605
606    /** Returns {@code val} incremented by 1. */
607    public abstract byte inc(byte val);
608
609    /** Return true when {@code val} is the maximum value */
610    public abstract boolean isMax(byte val);
611
612    /** Return the minimum value according to this ordering scheme. */
613    public abstract byte min();
614  }
615
616  /**
617   * Find out the closes next byte array that satisfies fuzzy rule and is after the given one. In
618   * the reverse case it returns increased byte array to make sure that the proper row is selected
619   * next.
620   * @return byte array which is after the given row and which satisfies the fuzzy rule if it
621   *         exists, null otherwise
622   */
623  @InterfaceAudience.Private
624  static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
625    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
626    // To find out the closest next byte array that satisfies fuzzy rule and is after the given one
627    // we do the following:
628    // 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes
629    // 2. if during the first step given row did not increase, then we increase the value at
630    // the first "non-fixed" position (where it is not maximum already)
631
632    // It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position
633    // values than otherwise.
634    byte[] result = Arrays.copyOf(fuzzyKeyBytes, Math.max(length, fuzzyKeyBytes.length));
635    if (reverse) {
636      // we need 0xff's instead of 0x00's
637      for (int i = 0; i < result.length; i++) {
638        if (result[i] == 0) {
639          result[i] = (byte) 0xFF;
640        }
641      }
642    }
643    int toInc = -1;
644    final Order order = Order.orderFor(reverse);
645
646    boolean increased = false;
647    for (int i = 0; i < result.length; i++) {
648      if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) {
649        result[i] = row[offset + i];
650        if (!order.isMax(row[offset + i])) {
651          // this is "non-fixed" position and is not at max value, hence we can increase it
652          toInc = i;
653        }
654      } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1 /* fixed */) {
655        if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
656          // if setting value for any fixed position increased the original array,
657          // we are OK
658          increased = true;
659          break;
660        }
661
662        if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
663          // if setting value for any fixed position makes array "smaller", then just stop:
664          // in case we found some non-fixed position to increase we will do it, otherwise
665          // there's no "next" row key that satisfies fuzzy rule and "greater" than given row
666          break;
667        }
668      }
669    }
670
671    if (!increased) {
672      if (toInc < 0) {
673        return null;
674      }
675      result[toInc] = order.inc(result[toInc]);
676
677      // Setting all "non-fixed" positions to zeroes to the right of the one we increased so
678      // that found "next" row key is the smallest possible
679      for (int i = toInc + 1; i < result.length; i++) {
680        if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) {
681          result[i] = order.min();
682        }
683      }
684    }
685
686    byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
687    if (reverse) {
688      // In the reverse case we increase last non-max byte to make sure that the proper row is
689      // selected next.
690      return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed);
691    } else {
692      return trailingZerosTrimmed;
693    }
694  }
695
696  /**
697   * For forward scanner, next cell hint should not contain any trailing zeroes unless they are part
698   * of fuzzyKeyMeta hint = '\x01\x01\x01\x00\x00' will skip valid row '\x01\x01\x01'
699   * @param toInc - position of incremented byte
700   * @return trimmed version of result
701   */
702
703  private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
704    int off = fuzzyKeyMeta.length >= result.length ? result.length - 1 : fuzzyKeyMeta.length - 1;
705    for (; off >= 0; off--) {
706      if (fuzzyKeyMeta[off] != 0) break;
707    }
708    if (off < toInc) off = toInc;
709    byte[] retValue = new byte[off + 1];
710    System.arraycopy(result, 0, retValue, 0, retValue.length);
711    return retValue;
712  }
713
714  /**
715   * Returns true if and only if the fields of the filter that are serialized are equal to the
716   * corresponding fields in other. Used for testing.
717   */
718  @Override
719  boolean areSerializedFieldsEqual(Filter o) {
720    if (o == this) {
721      return true;
722    }
723    if (!(o instanceof FuzzyRowFilter)) {
724      return false;
725    }
726    FuzzyRowFilter other = (FuzzyRowFilter) o;
727    if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
728    for (int i = 0; i < fuzzyKeysData.size(); ++i) {
729      Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
730      Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
731      if (
732        !(Bytes.equals(thisData.getFirst(), otherData.getFirst())
733          && Bytes.equals(thisData.getSecond(), otherData.getSecond()))
734      ) {
735        return false;
736      }
737    }
738    return true;
739  }
740
741  @Override
742  public boolean equals(Object obj) {
743    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
744  }
745
746  @Override
747  public int hashCode() {
748    return Objects.hash(this.fuzzyKeysData);
749  }
750}