001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.IOException;
020import java.time.LocalDate;
021import java.time.ZoneOffset;
022import java.util.Arrays;
023import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
024import org.apache.lucene.document.Document;
025import org.apache.lucene.document.Field;
026import org.apache.lucene.document.FloatPoint;
027import org.apache.lucene.document.IntPoint;
028import org.apache.lucene.document.LongPoint;
029import org.apache.lucene.document.StringField;
030import org.apache.lucene.facet.FacetResult;
031import org.apache.lucene.facet.Facets;
032import org.apache.lucene.facet.FacetsCollector;
033import org.apache.lucene.facet.FacetsCollectorManager;
034import org.apache.lucene.facet.facetset.DimRange;
035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher;
036import org.apache.lucene.facet.facetset.FacetSet;
037import org.apache.lucene.facet.facetset.FacetSetDecoder;
038import org.apache.lucene.facet.facetset.FacetSetMatcher;
039import org.apache.lucene.facet.facetset.FacetSetsField;
040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts;
041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher;
042import org.apache.lucene.index.DirectoryReader;
043import org.apache.lucene.index.IndexWriter;
044import org.apache.lucene.index.IndexWriterConfig;
045import org.apache.lucene.index.IndexWriterConfig.OpenMode;
046import org.apache.lucene.search.BooleanClause;
047import org.apache.lucene.search.BooleanQuery;
048import org.apache.lucene.search.IndexSearcher;
049import org.apache.lucene.search.MatchAllDocsQuery;
050import org.apache.lucene.search.Query;
051import org.apache.lucene.search.TermInSetQuery;
052import org.apache.lucene.store.ByteBuffersDirectory;
053import org.apache.lucene.store.Directory;
054import org.apache.lucene.util.BytesRef;
055import org.apache.lucene.util.NumericUtils;
056
057/**
058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet}
059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows
060 * how to mix and match dimensions of different types, as well as implementing a custom {@link
061 * FacetSetMatcher}.
062 */
063public class CustomFacetSetExample {
064
065  private static final long MAY_SECOND_2022 = date("2022-05-02");
066  private static final long JUNE_SECOND_2022 = date("2022-06-02");
067  private static final long JULY_SECOND_2022 = date("2022-07-02");
068  private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120);
069  private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100);
070  private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80);
071
072  private final Directory indexDir = new ByteBuffersDirectory();
073
074  /** Empty constructor */
075  public CustomFacetSetExample() {}
076
077  /** Build the example index. */
078  private void index() throws IOException {
079    IndexWriter indexWriter =
080        new IndexWriter(
081            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
082
083    // Every document holds the temperature measures for a City by Date
084
085    Document doc = new Document();
086    doc.add(new StringField("city", "city1", Field.Store.YES));
087    doc.add(
088        FacetSetsField.create(
089            "temperature",
090            new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES),
091            new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES),
092            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
093    addFastMatchFields(doc);
094    indexWriter.addDocument(doc);
095
096    doc = new Document();
097    doc.add(new StringField("city", "city2", Field.Store.YES));
098    doc.add(
099        FacetSetsField.create(
100            "temperature",
101            new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES),
102            new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES),
103            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
104    addFastMatchFields(doc);
105    indexWriter.addDocument(doc);
106
107    indexWriter.close();
108  }
109
110  private void addFastMatchFields(Document doc) {
111    // day field
112    doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO));
113    doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO));
114    doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO));
115
116    // temp field
117    doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO));
118    doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO));
119    doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO));
120  }
121
122  /** Counting documents which exactly match a given {@link FacetSet}. */
123  private FacetResult exactMatching() throws IOException {
124    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
125      IndexSearcher searcher = new IndexSearcher(indexReader);
126
127      // MatchAllDocsQuery is for "browsing" (counts facets
128      // for all non-deleted docs in the index); normally
129      // you'd use a "normal" query:
130      FacetsCollector fc =
131          searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager());
132
133      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
134      Facets facets =
135          new MatchingFacetSetsCounts(
136              "temperature",
137              fc,
138              TemperatureReadingFacetSet::decodeTemperatureReading,
139              new ExactFacetSetMatcher(
140                  "May 2022 (100f)",
141                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
142              new ExactFacetSetMatcher(
143                  "July 2022 (120f)",
144                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
145
146      // Retrieve results
147      return facets.getAllChildren("temperature");
148    }
149  }
150
151  /**
152   * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates
153   * how to use a fast match query to improve the counting efficiency by skipping over documents
154   * which cannot possibly match a set.
155   */
156  private FacetResult exactMatchingWithFastMatchQuery() throws IOException {
157    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
158      IndexSearcher searcher = new IndexSearcher(indexReader);
159
160      // MatchAllDocsQuery is for "browsing" (counts facets
161      // for all non-deleted docs in the index); normally
162      // you'd use a "normal" query:
163      FacetsCollector fc =
164          searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager());
165
166      // Match documents whose "day" field is either "May 2022" or "July 2022"
167      Query dateQuery =
168          new TermInSetQuery(
169              "day",
170              Arrays.asList(
171                  new BytesRef(String.valueOf(MAY_SECOND_2022)),
172                  new BytesRef(String.valueOf(JULY_SECOND_2022))));
173      // Match documents whose "temp" field is either "80" or "120" degrees
174      Query temperatureQuery =
175          new TermInSetQuery(
176              "temp",
177              Arrays.asList(
178                  new BytesRef(String.valueOf(HUNDRED_DEGREES)),
179                  new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES))));
180      // Documents must match both clauses
181      Query fastMatchQuery =
182          new BooleanQuery.Builder()
183              .add(dateQuery, BooleanClause.Occur.MUST)
184              .add(temperatureQuery, BooleanClause.Occur.MUST)
185              .build();
186
187      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
188      Facets facets =
189          new MatchingFacetSetsCounts(
190              "temperature",
191              fc,
192              TemperatureReadingFacetSet::decodeTemperatureReading,
193              fastMatchQuery,
194              new ExactFacetSetMatcher(
195                  "May 2022 (100f)",
196                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
197              new ExactFacetSetMatcher(
198                  "July 2022 (120f)",
199                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
200
201      // Retrieve results
202      return facets.getAllChildren("temperature");
203    }
204  }
205
206  /** Counting documents which match a certain degrees value for any date. */
207  private FacetResult rangeMatching() throws IOException {
208    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
209      IndexSearcher searcher = new IndexSearcher(indexReader);
210
211      // MatchAllDocsQuery is for "browsing" (counts facets
212      // for all non-deleted docs in the index); normally
213      // you'd use a "normal" query:
214      FacetsCollector fc =
215          searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager());
216
217      // Count 80-100 degrees
218      Facets facets =
219          new MatchingFacetSetsCounts(
220              "temperature",
221              fc,
222              TemperatureReadingFacetSet::decodeTemperatureReading,
223              new RangeFacetSetMatcher(
224                  "Eighty to Hundred Degrees",
225                  DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true),
226                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
227
228      // Retrieve results
229      return facets.getAllChildren("temperature");
230    }
231  }
232
233  /**
234   * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link
235   * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature
236   * one).
237   */
238  private FacetResult customRangeMatching() throws IOException {
239    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
240      IndexSearcher searcher = new IndexSearcher(indexReader);
241
242      // MatchAllDocsQuery is for "browsing" (counts facets
243      // for all non-deleted docs in the index); normally
244      // you'd use a "normal" query:
245      FacetsCollector fc =
246          searcher.search(MatchAllDocsQuery.INSTANCE, new FacetsCollectorManager());
247
248      // Count 80-100 degrees
249      Facets facets =
250          new MatchingFacetSetsCounts(
251              "temperature",
252              fc,
253              TemperatureReadingFacetSet::decodeTemperatureReading,
254              new TemperatureOnlyFacetSetMatcher(
255                  "Eighty to Hundred Degrees",
256                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));
257
258      // Retrieve results
259      return facets.getAllChildren("temperature");
260    }
261  }
262
263  private static long date(String dateString) {
264    return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
265  }
266
267  private static float fahrenheitToCelsius(int degrees) {
268    return (degrees - 32.0f) * 5.f / 9.f;
269  }
270
271  /** Runs the exact matching example. */
272  public FacetResult runExactMatching() throws IOException {
273    index();
274    return exactMatching();
275  }
276
277  /** Runs the exact matching with fast match query example. */
278  public FacetResult runExactMatchingWithFastMatchQuery() throws IOException {
279    index();
280    return exactMatchingWithFastMatchQuery();
281  }
282
283  /** Runs the range matching example. */
284  public FacetResult runRangeMatching() throws IOException {
285    index();
286    return rangeMatching();
287  }
288
289  /** Runs the custom range matching example. */
290  public FacetResult runCustomRangeMatching() throws IOException {
291    index();
292    return customRangeMatching();
293  }
294
295  /** Runs the search and drill-down examples and prints the results. */
296  public static void main(String[] args) throws Exception {
297    CustomFacetSetExample example = new CustomFacetSetExample();
298
299    System.out.println("Exact Facet Set matching example:");
300    System.out.println("-----------------------");
301    FacetResult result = example.runExactMatching();
302    System.out.println("Temperature Reading: " + result);
303
304    System.out.println("Exact Facet Set matching with fast match query example:");
305    System.out.println("-----------------------");
306    result = example.runExactMatchingWithFastMatchQuery();
307    System.out.println("Temperature Reading: " + result);
308
309    System.out.println("Range Facet Set matching example:");
310    System.out.println("-----------------------");
311    result = example.runRangeMatching();
312    System.out.println("Temperature Reading: " + result);
313
314    System.out.println("Custom Range Facet Set matching example:");
315    System.out.println("-----------------------");
316    result = example.runCustomRangeMatching();
317    System.out.println("Temperature Reading: " + result);
318  }
319
320  /**
321   * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius;
322   * float).
323   */
324  public static class TemperatureReadingFacetSet extends FacetSet {
325
326    private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES;
327
328    private final long date;
329    private final float degrees;
330
331    /** Constructor */
332    public TemperatureReadingFacetSet(long date, float degrees) {
333      super(2); // We encode two dimensions
334
335      this.date = date;
336      this.degrees = degrees;
337    }
338
339    @Override
340    public long[] getComparableValues() {
341      return new long[] {date, NumericUtils.floatToSortableInt(degrees)};
342    }
343
344    @Override
345    public int packValues(byte[] buf, int start) {
346      LongPoint.encodeDimension(date, buf, start);
347      // Encode 'degrees' as a sortable integer.
348      FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES);
349      return sizePackedBytes();
350    }
351
352    @Override
353    public int sizePackedBytes() {
354      return SIZE_PACKED_BYTES;
355    }
356
357    /**
358     * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link
359     * TemperatureReadingFacetSet}.
360     */
361    public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) {
362      dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start);
363      // Decode the degrees as a sortable integer.
364      dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES);
365      return SIZE_PACKED_BYTES;
366    }
367  }
368
369  /**
370   * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension,
371   * ignoring the date.
372   */
373  public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher {
374
375    private final DimRange temperatureRange;
376
377    /** Constructor */
378    protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) {
379      super(label, 1); // We only evaluate one dimension
380
381      this.temperatureRange = temperatureRange;
382    }
383
384    @Override
385    public boolean matches(long[] dimValues) {
386      return temperatureRange.min() <= dimValues[1] && temperatureRange.max() >= dimValues[1];
387    }
388  }
389}