Problem: I want to search books by date range, but sort the result. Searching by date range works, but the documents are not sorted properly (Insertion order, see ID?):
========== uploadDate:[-2208988800 TO 1893456000] ========== FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z] FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z] FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z] FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z] FOUND: Book[id=5, fileName=The bible.pdf, uploadDate=2020-01-01T09:00:00Z] FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]
To sort them by date, I changed my code to:
Add NumericDocValuesField
:
document.add(new StoredField("uploadDate", book.uploadDate().getEpochSecond())); document.add(new LongPoint("uploadDate", book.uploadDate().getEpochSecond())); document.add(new NumericDocValuesField("uploadDate", book.uploadDate().getEpochSecond()));
Add a Sort
:
// Build the sorter to sort the documents by date Sort sorter = new Sort(); SortField sortField = new SortField("uploadDate", SortField.Type.LONG, true); sorter.setSort(sortField, SortField.FIELD_SCORE); Query query = parser.parse(queryText, ""); TopDocs hits = indexSearcher.search(query, 100, sorter);
Question: What am I doing wrong? What do I need to change so the documents get sorted descending (2021 first, 20202 second, 2000 third…). Taking a look at the following question did not help: Sorting lucene documents by date
My code:
import org.apache.lucene.document.*; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import java.text.DecimalFormat; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; import java.util.*; public class LuceneDocumentExample { public static record Book( Integer id, String fileName, Instant uploadDate ) { } private static Directory directory; private static IndexWriter indexWriter; public static void main(String[] arguments) throws Exception{ // Create the index directory = new RAMDirectory(); indexWriter = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new MultiFieldAnalyzer())); // Define some sample books List<Book> books = Arrays.asList( new Book(1, "Book1 - The beginning.pdf", createInstant(2000, 1, 1, 10, 0)), new Book(2, "Start Wars 1.pdf", createInstant(1977, 5, 25, 10, 0)), new Book(3, "Start Wars 2.pdf", createInstant(1980, 5, 21, 10, 0)), new Book(4, "Start Wars 3.pdf", createInstant(1983, 5, 25, 10, 0)), new Book(5, "The bible.pdf", createInstant(2020, 1, 1, 10, 0)), new Book(6, "TheUltimateDeveloperGuide.pdf", createInstant(2021, 2, 16, 20, 0)) ); // Add the books for (Book book : books) { Document document = new Document(); document.add(new StringField("id", book.id().toString(), Field.Store.YES)); document.add(new StringField("fileName", book.fileName(), Field.Store.YES)); document.add(new StoredField("uploadDate", book.uploadDate().getEpochSecond())); document.add(new LongPoint("uploadDate", book.uploadDate().getEpochSecond())); document.add(new NumericDocValuesField("uploadDate", book.uploadDate().getEpochSecond())); indexWriter.addDocument(document); indexWriter.commit(); } // Run several test queries to check the functionality checkSearchResult("id:1", Set.of(1)); checkSearchResult("id:6", Set.of(6)); checkSearchResult("id:1 OR id:2", Set.of(1, 2)); checkSearchResult("id:1 AND id:2", Set.of()); checkSearchResult("fileName:TheUltimateDeveloperGuide.pdf", Set.of(6)); checkSearchResult("id:6 fileName:TheUltimateDeveloperGuide.pdf", Set.of(6)); checkSearchResult("uploadDate:[" + createInstant(2000, 1, 1, 8, 0).getEpochSecond() + " TO " + createInstant(2000, 1, 1, 12, 0).getEpochSecond() + "]", Set.of(1)); checkSearchResult("uploadDate:[" + createInstant(1977, 1, 1, 1, 0).getEpochSecond() + " TO " + createInstant(1983, 5, 26, 1, 0).getEpochSecond() + "]", Set.of(2, 3, 4)); checkSearchResult("uploadDate:[" + createInstant(1900, 1, 1, 1, 0).getEpochSecond() + " TO " + createInstant(2030, 1, 1, 1, 0).getEpochSecond() + "]", Set.of(1, 2, 3, 4, 5, 6)); checkSearchResult("id:1 uploadDate:[" + createInstant(2000, 1, 1, 8, 0).getEpochSecond() + " TO " + createInstant(2000, 1, 1, 12, 0).getEpochSecond() + "]", Set.of(1)); // Close the index indexWriter.close(); directory.close(); } public static List<Book> searchDocuments(String queryText) { // Create the reader try (IndexReader indexReader = DirectoryReader.open(indexWriter)) { IndexSearcher indexSearcher = new IndexSearcher(indexReader); StandardQueryParser parser = new StandardQueryParser(); parser.setAnalyzer(indexWriter.getAnalyzer()); Map<String, PointsConfig> pointsConfigMap = new HashMap<>(); pointsConfigMap.put("uploadDate", new PointsConfig(new DecimalFormat(), Long.class)); parser.setPointsConfigMap(pointsConfigMap); // Build the sorter to sort the documents by date Sort sorter = new Sort(); SortField sortField = new SortField("uploadDate", SortField.Type.LONG, true); sorter.setSort(sortField, SortField.FIELD_SCORE); Query query = parser.parse(queryText, ""); TopDocs hits = indexSearcher.search(query, 100, sorter); List<Book> books = new ArrayList<>(); for (int i = 0; i < hits.scoreDocs.length; i++) { int docId = hits.scoreDocs[i].doc; Document document = indexSearcher.doc(docId); Integer id = Integer.parseInt(document.get("id")); String fileName = document.get("fileName"); Instant uploadDate = Instant.ofEpochSecond(Long.parseLong(document.get("uploadDate"))); books.add(new Book(id, fileName, uploadDate)); } return books; } catch (Exception exception) { throw new RuntimeException("Unable to execute query " + queryText + ": " + exception.getMessage(), exception); } } public static void checkSearchResult(String queryText, Set<Integer> expectedIds) { System.out.println("========== " + queryText + " =========="); List<Book> books = searchDocuments(queryText); if (!expectedIds.isEmpty()) { for (Integer expectedId : expectedIds) { Optional<Book> optionalBook = books.stream().filter(book -> expectedId.equals(book.id())).findAny(); if (optionalBook.isPresent()) { System.out.println("FOUND: " + optionalBook.get()); } else { System.out.println("MISSING: " + expectedId); } } } else { System.out.println("NONE"); } } private static Instant createInstant(Integer year, Integer month, Integer day, Integer hour, Integer minute) { LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute); return dateTime.atZone(ZoneId.of("Europe/Zurich")).toInstant(); } }
Advertisement
Answer
My problem was related to the Long conversion via Lucene DateTools
. I changed my code and now get proper sorting:
import org.apache.lucene.document.*; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import java.text.DecimalFormat; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; import java.util.*; public class SearchDemo { record Book(Integer id, String fileName, Instant uploadDate) { } private static Directory directory; private static IndexWriter indexWriter; public static void main(String[] arguments) { try { // Create the index directory = new RAMDirectory(); indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MultiFieldAnalyzer())); // Define a few books List<Book> books = Arrays.asList( new Book(1, "Book1 - The beginning.pdf", createDateString(2000, 1, 1, 10, 0)), new Book(2, "Start Wars 1.pdf", createDateString(1977, 5, 25, 10, 0)), new Book(3, "Start Wars 2.pdf", createDateString(1980, 5, 21, 10, 0)), new Book(4, "Start Wars 3.pdf", createDateString(1983, 5, 25, 10, 0)), new Book(5, "The bible.pdf", createDateString(2020, 1, 1, 10, 0)), new Book(6, "TheUltimateDeveloperGuide.pdf", createDateString(2021, 2, 16, 20, 0)) ); // Add the books for (Book book : books) { addBook(book); } // Search for all books checkSearchResult("id:1", Arrays.asList(1)); checkSearchResult("id:6", Arrays.asList(6)); checkSearchResult("id:1 OR id:2", Arrays.asList(1, 2)); checkSearchResult("id:1 AND id:2", Arrays.asList()); checkSearchResult("fileName:TheUltimateDeveloperGuide.pdf", Arrays.asList(6)); checkSearchResult("id:6 AND fileName:TheUltimateDeveloperGuide.pdf", Arrays.asList(6)); checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(2000, 1, 1, 8, 0)) + " TO " + toLuceneDate(createDateString(2000, 1, 1, 12, 0)) + "]", Arrays.asList(1)); checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(1977, 1, 1, 1, 0)) + " TO " + toLuceneDate(createDateString(1983, 5, 26, 1, 0)) + "]", Arrays.asList(4, 3, 2)); checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(1930, 1, 1, 1, 1)) + " TO " + toLuceneDate(createDateString(2030, 1, 1, 1, 1)) + "]", Arrays.asList(6, 5, 1, 4, 3, 2)); checkSearchResult("id:1 uploadDate:[" + toLuceneDate(createDateString(2000, 1, 1, 8, 0)) + " TO " + toLuceneDate(createDateString(2000, 1, 1, 12, 0)) + "]", Arrays.asList(1)); } catch (Exception exception) { exception.printStackTrace(); } } private static void addBook(Book book) throws Exception { Instant uploadDateInstant = book.uploadDate; Long uploadDate = toLuceneDate(uploadDateInstant); Document document = new Document(); document.add(new StringField("id", Integer.toString(book.id), Field.Store.YES)); document.add(new StringField("fileName", book.fileName, Field.Store.YES)); document.add(new LongPoint("uploadDate", uploadDate)); document.add(new StoredField("uploadDate", uploadDate.toString())); document.add(new NumericDocValuesField("uploadDate", uploadDate)); indexWriter.addDocument(document); indexWriter.commit(); } private static List<Book> searchBooks(String searchQuery) throws Exception { // Create the reader and search for the range 101 to 203 IndexReader indexReader = DirectoryReader.open(indexWriter); IndexSearcher indexSearcher = new IndexSearcher(indexReader); StandardQueryParser parser = new StandardQueryParser(); parser.setAnalyzer(indexWriter.getAnalyzer()); PointsConfig pointsConfig = new PointsConfig(new DecimalFormat(), Long.class); Map<String, PointsConfig> pointsConfigMap = new HashMap<>(); pointsConfigMap.put("uploadDate", pointsConfig); parser.setPointsConfigMap(pointsConfigMap); SortField valueSort = new SortedNumericSortField("uploadDate", SortField.Type.LONG, true); Sort sorter = new Sort(valueSort); Query query = parser.parse(searchQuery, ""); TopDocs hits = indexSearcher.search(query, 100, sorter); List<Book> books = new ArrayList<>(); for (int i = 0; i < hits.scoreDocs.length; i++) { int docId = hits.scoreDocs[i].doc; Document document = indexSearcher.doc(docId); Integer id = Integer.parseInt(document.get("id")); String fileName = document.get("fileName"); Instant uploadDate = DateTools.stringToDate(document.get("uploadDate")).toInstant(); books.add(new Book(id, fileName, uploadDate)); } return books; } private static Instant createDateString(Integer year, Integer month, Integer day, Integer hour, Integer minute) { LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute); return dateTime.atZone(ZoneId.of("Europe/Zurich")).toInstant(); } private static Long toLuceneDate(Instant instant) { return Long.parseLong(DateTools.dateToString(Date.from(instant), DateTools.Resolution.MINUTE)); } public static void checkSearchResult(String queryText, List<Integer> expectedIds) throws Exception { System.out.println("========== " + queryText + " =========="); List<Book> books = searchBooks(queryText); if (expectedIds.isEmpty()) { // Check for no result System.out.println("EMPTY AS EXPECTED"); } else if (expectedIds.size() != books.size()) { // Check dimension System.out.println("MISMATCH. GOT: " + books); } else { // Check values for (int i = 0; i < expectedIds.size(); i++) { if (expectedIds.get(i).equals(books.get(i).id)) { System.out.println("FOUND: " + books.get(i)); } else { System.out.println("MISSING: " + books.get(i).id); } } } } }
Output:
========== id:1 ========== FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z] ========== id:6 ========== FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z] ========== id:1 OR id:2 ========== FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z] FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z] ========== id:1 AND id:2 ========== EMPTY AS EXPECTED ========== fileName:TheUltimateDeveloperGuide.pdf ========== FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z] ========== id:6 AND fileName:TheUltimateDeveloperGuide.pdf ========== FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z] ========== uploadDate:[200001010700 TO 200001011100] ========== FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z] ========== uploadDate:[197701010000 TO 198305252300] ========== FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z] FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z] FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z] ========== uploadDate:[193001010001 TO 203001010001] ========== FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z] FOUND: Book[id=5, fileName=The bible.pdf, uploadDate=2020-01-01T09:00:00Z] FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z] FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z] FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z] FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z] ========== id:1 uploadDate:[200001010700 TO 200001011100] ========== FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]