View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.announce.service.announcesearch;
35  
36  import java.text.ParseException;
37  import java.text.SimpleDateFormat;
38  import java.util.ArrayList;
39  import java.util.Collection;
40  import java.util.Date;
41  import java.util.List;
42  import java.util.Locale;
43  
44  import org.apache.commons.lang3.StringUtils;
45  import org.apache.lucene.document.DateTools;
46  import org.apache.lucene.document.Document;
47  import org.apache.lucene.index.Term;
48  import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
49  import org.apache.lucene.search.BooleanClause;
50  import org.apache.lucene.search.IndexSearcher;
51  import org.apache.lucene.search.PhraseQuery;
52  import org.apache.lucene.search.Query;
53  import org.apache.lucene.search.ScoreDoc;
54  import org.apache.lucene.search.TermQuery;
55  import org.apache.lucene.search.TermRangeQuery;
56  import org.apache.lucene.search.TopDocs;
57  import org.apache.lucene.util.BytesRef;
58  import org.apache.lucene.search.BooleanQuery;
59  import org.apache.lucene.analysis.Analyzer;
60  
61  import fr.paris.lutece.plugins.announce.business.Announce;
62  import fr.paris.lutece.plugins.announce.business.AnnounceHome;
63  import fr.paris.lutece.plugins.announce.business.AnnounceSearchFilter;
64  import fr.paris.lutece.plugins.announce.business.AnnounceSort;
65  import fr.paris.lutece.plugins.announce.service.AnnouncePlugin;
66  import fr.paris.lutece.portal.service.plugin.Plugin;
67  import fr.paris.lutece.portal.service.search.SearchItem;
68  import fr.paris.lutece.portal.service.search.SearchResult;
69  import fr.paris.lutece.portal.service.util.AppLogService;
70  import fr.paris.lutece.portal.service.util.AppPropertiesService;
71  /**
72   * LuceneSearchEngine
73   */
74  public class AnnounceLuceneSearchEngine implements IAnnounceSearchEngine
75  {
76      private static final int NO_CATEGORY = 0;
77      private static final int NO_SECTOR = 0;
78      private final SimpleDateFormat _dayFormat = new SimpleDateFormat( "yyyyMMdd", Locale.US );
79      private static final String PROPERTY_LUCENE_MIN_SCORE = "announce.lucene.minScore";
80      /**
81       * {@inheritDoc}
82       */
83      @Override
84      public int getSearchResults( AnnounceSearchFilter filter, Plugin plugin, List<SearchResult> listSearchResult, int nPage, int nItemsPerPage )
85      {
86          ArrayList<SearchItem> listResults = new ArrayList<>( );
87          IndexSearcher searcher;
88  
89          int nNbResults = 0;
90  
91          try
92          {
93              searcher = AnnounceSearchService.getInstance( ).getSearcher( );
94  
95              Collection<String> queries = new ArrayList<>( );
96              Collection<String> sectors = new ArrayList<>( );
97              Collection<BooleanClause.Occur> flags = new ArrayList<>( );
98  
99              // Category id
100             if ( filter.getIdCategory( ) != NO_CATEGORY )
101             {
102                 Query queryCategoryId = new TermQuery( new Term( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( filter.getIdCategory( ) ) ) );
103                 queries.add( queryCategoryId.toString( ) );
104                 sectors.add( AnnounceSearchItem.FIELD_CATEGORY_ID );
105                 flags.add( BooleanClause.Occur.MUST );
106             }
107 
108             // Category id
109             if ( filter.getIdSector( ) != NO_SECTOR )
110             {
111                 Query querySectorId = new TermQuery( new Term( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( filter.getIdSector( ) ) ) );
112                 queries.add( querySectorId.toString( ) );
113                 sectors.add( AnnounceSearchItem.FIELD_SECTOR_ID );
114                 flags.add( BooleanClause.Occur.MUST );
115             }
116 
117             // Type (=announce)
118             PhraseQuery.Builder queryTypeBuilder = new PhraseQuery.Builder( );
119             // add character "e" to TYPE because field is not analyzed when added to lucene document, but it's analyzed then in MultiFieldQueryParser.parse
120             // method
121             queryTypeBuilder.add( new Term( SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME + "e" ) );
122             queries.add( queryTypeBuilder.build( ).toString( ) );
123             sectors.add( SearchItem.FIELD_TYPE );
124             flags.add( BooleanClause.Occur.MUST );
125 
126             // Keywords in title or description
127             if ( StringUtils.isNotBlank( filter.getKeywords( ) ) )
128             {
129                 PhraseQuery.Builder queryContentBuilder = new PhraseQuery.Builder( );
130                 queryContentBuilder.add( new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) ) );
131                 queries.add( queryContentBuilder.build( ).toString( ) );
132                 sectors.add( SearchItem.FIELD_CONTENTS );
133                 flags.add( BooleanClause.Occur.MUST );
134             }
135 
136             // contains range date
137             if ( ( filter.getDateMin( ) != null ) || ( filter.getDateMax( ) != null ) )
138             {
139                 Date dateMinToSearch = new Date( 0L );
140                 Date dateMaxToSearch = new Date( );
141                 if ( filter.getDateMin( ) != null )
142                 {
143                     dateMinToSearch = filter.getDateMin( );
144                 }
145 
146                 if ( filter.getDateMax( ) != null )
147                 {
148                     dateMaxToSearch = filter.getDateMax( );
149                 }
150 
151                 // String stringDateMin = DateUtil.
152                 String strLowerTerm = _dayFormat.format( dateMinToSearch );
153                 String strUpperTerm = _dayFormat.format( dateMaxToSearch );
154                 BytesRef bRLowerTerm = new BytesRef( strLowerTerm );
155                 BytesRef bRUpperTerm = new BytesRef( strUpperTerm );
156                 Query queryRangeDate = new TermRangeQuery( SearchItem.FIELD_DATE, bRLowerTerm, bRUpperTerm, true, true );
157                 queries.add( queryRangeDate.toString( ) );
158                 sectors.add( SearchItem.FIELD_DATE );
159                 flags.add( BooleanClause.Occur.MUST );
160             }
161 
162             // contains range price
163             if ( ( filter.getPriceMin( ) > 0 ) || ( filter.getPriceMax( ) > 0 ) )
164             {
165                 int nPriceMin = ( filter.getPriceMin( ) > 0 ) ? filter.getPriceMin( ) : 0;
166                 int nPriceMax = ( filter.getPriceMax( ) > 0 ) ? filter.getPriceMax( ) : Integer.MAX_VALUE;
167                 Query queryRangePrice = new TermRangeQuery( AnnounceSearchItem.FIELD_PRICE,
168                         new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMin ) ),
169                         new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMax ) ), true, true );
170                 queries.add( queryRangePrice.toString( ) );
171                 sectors.add( AnnounceSearchItem.FIELD_PRICE );
172                 flags.add( BooleanClause.Occur.MUST );
173             }
174 
175             Query queryMulti = MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
176                     flags.toArray( new BooleanClause.Occur [ flags.size( )] ), AnnounceSearchService.getInstance( ).getAnalyzer( ) );
177 
178             TopDocs topDocs = searcher.search( queryMulti, 1000000 );
179             ScoreDoc [ ] hits = topDocs.scoreDocs;
180             nNbResults = hits.length;
181 
182             // We only get the documents of the current page
183             int nFrom = ( nPage - 1 ) * nItemsPerPage;
184 
185             if ( nFrom < 0 )
186             {
187                 nFrom = 0;
188             }
189 
190             int nTo = ( nPage * nItemsPerPage );
191 
192             if ( ( nTo == 0 ) || ( nTo > nNbResults ) )
193             {
194                 nTo = nNbResults;
195             }
196 
197             for ( int i = nFrom; i < nTo; i++ )
198             {
199                 int docId = hits [i].doc;
200                 Document document = searcher.doc( docId );
201                 SearchItem si = new SearchItem( document );
202                 listResults.add( si );
203             }
204         }
205         catch( Exception e )
206         {
207             AppLogService.error( e.getMessage( ), e );
208         }
209         convertList( listResults, listSearchResult );
210 
211         return nNbResults;
212     }
213 
214     /**
215      * {@inheritDoc}
216      */
217     @Override
218     public int getSearchResultsBis( AnnounceSearchFilter filter, Plugin plugin, List<Announce> listAnnouncesResult, int nPage, int nItemsPerPage,
219             AnnounceSort anSort )
220     {
221         ArrayList<SearchItem> listResults = new ArrayList<>( );
222         List<Integer> listIdAnnounces = new ArrayList<>( );
223         IndexSearcher searcher;
224 
225         Date dateMinToSearch;
226         Date dateMaxToSearch;
227         int nNbResults = 0;
228         try
229         {
230             searcher = AnnounceSearchService.getInstance( ).getSearcher( );
231             BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
232 
233             // Category id
234             if ( filter.getIdCategory( ) != NO_CATEGORY )
235             {
236                 Query queryCategoryId = new TermQuery( new Term( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( filter.getIdCategory( ) ) ) );
237 
238                 booleanQueryBuilder.add(queryCategoryId, BooleanClause.Occur.MUST);
239             }
240 
241 
242             // Sector id
243             if ( filter.getIdSector( ) != NO_SECTOR )
244             {
245                 Query querySectorId = new TermQuery( new Term( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( filter.getIdSector( ) ) ) );
246                 booleanQueryBuilder.add(querySectorId, BooleanClause.Occur.MUST);
247             }
248 
249             // Type (=announce)
250             Query queryType = new TermQuery(new Term(SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME));
251             booleanQueryBuilder.add(queryType, BooleanClause.Occur.MUST);
252 
253             // Keywords in title or description
254             if ( StringUtils.isNotBlank( filter.getKeywords( ) ) )
255             {
256                 Analyzer analyzer = AnnounceSearchService.getInstance().getAnalyzer();
257                 MultiFieldQueryParser parser = new MultiFieldQueryParser(
258                         new String[]{SearchItem.FIELD_TITLE, SearchItem.FIELD_SUMMARY, SearchItem.FIELD_CONTENTS},
259                         analyzer
260                 );
261                 Query queryKeywords = parser.parse(filter.getKeywords());
262                 booleanQueryBuilder.add(queryKeywords, BooleanClause.Occur.MUST);
263             }
264 
265             // contains range date
266             if ( ( filter.getDateMin( ) != null ) || ( filter.getDateMax( ) != null ) )
267             {
268                 if ( filter.getDateMin( ) == null )
269                 {
270                     dateMinToSearch = new Date( 0L );
271                 }
272                 else
273                 {
274                     dateMinToSearch = filter.getDateMin( );
275                 }
276 
277                 if ( filter.getDateMax( ) == null )
278                 {
279                     dateMaxToSearch = new Date( );
280                 }
281                 else
282                 {
283                     dateMaxToSearch = filter.getDateMax( );
284                 }
285                 String strLowerTerm = _dayFormat.format(dateMinToSearch);
286                 String strUpperTerm = _dayFormat.format(dateMaxToSearch);
287                 BytesRef bRLowerTerm = new BytesRef(strLowerTerm);
288                 BytesRef bRUpperTerm = new BytesRef(strUpperTerm);
289                 Query queryRangeDate = TermRangeQuery.newStringRange(SearchItem.FIELD_DATE, strLowerTerm, strUpperTerm, true, true);
290                 booleanQueryBuilder.add(queryRangeDate, BooleanClause.Occur.MUST);
291             }
292 
293             // contains range price
294             if ( ( filter.getPriceMin( ) > 0 ) || ( filter.getPriceMax( ) > 0 ) )
295             {
296                 int nPriceMin = (filter.getPriceMin() > 0) ? filter.getPriceMin() : 0;
297                 int nPriceMax = (filter.getPriceMax() > 0) ? filter.getPriceMax() : Integer.MAX_VALUE;
298                 Query queryRangePrice = TermRangeQuery.newStringRange(
299                         AnnounceSearchItem.FIELD_PRICE,
300                         AnnounceSearchService.formatPriceForIndexer(nPriceMin),
301                         AnnounceSearchService.formatPriceForIndexer(nPriceMax),
302                         true,
303                         true
304                 );
305                 booleanQueryBuilder.add(queryRangePrice, BooleanClause.Occur.MUST);
306             }
307 
308             Query queryMulti = booleanQueryBuilder.build();
309 
310             TopDocs topDocs = searcher.search( queryMulti, 1000000 );
311             ScoreDoc [ ] hits = topDocs.scoreDocs;
312             nNbResults = hits.length;
313             // We only get the documents of the current page
314             int nFrom = ( nPage - 1 ) * nItemsPerPage;
315 
316             if ( nFrom < 0 )
317             {
318                 nFrom = 0;
319             }
320 
321             int nTo = ( nPage * nItemsPerPage );
322 
323             if ( ( nTo == 0 ) || ( nTo > nNbResults ) )
324             {
325                 nTo = nNbResults;
326             }
327             // if keywords are not empty, we use lucene score to filter results to sort results
328             if(filter.getKeywords() != null && filter.getKeywords().length() > 0)
329             {
330                 float luceneMinScore = Float.parseFloat( AppPropertiesService.getProperty( PROPERTY_LUCENE_MIN_SCORE, "0.5" ) );
331 
332              for ( int i = nFrom; i < nTo; i++ )
333              {
334                  int docId = hits[i].doc;
335                  float score = hits[i].score;
336                  if( score < luceneMinScore )
337                  {
338                      break;
339                  }
340                  Document document = searcher.doc( docId );
341                  SearchItem si = new SearchItem( document );
342                  listAnnouncesResult.add(AnnounceHome.findByPrimaryKey(Integer.parseInt(si.getId())));
343                  listResults.add( si );
344              }
345               nNbResults = listAnnouncesResult.size();
346             }
347          else
348             {
349              for ( int i = nFrom; i < nTo; i++ )
350              {
351                  int docId = hits[i].doc;
352                  Document document = searcher.doc( docId );
353                  SearchItem si = new SearchItem( document );
354                  listIdAnnounces.add( Integer.parseInt( si.getId( ) ) );
355                  listResults.add( si );
356              }
357              listAnnouncesResult.addAll(AnnounceHome.findByListId( listIdAnnounces, anSort ));
358              nNbResults = listAnnouncesResult.size();
359             }
360         }
361         catch( Exception e )
362         {
363             AppLogService.error( e.getMessage( ), e );
364         }
365 
366         return nNbResults;
367     }
368 
369     /**
370      * Convert the SearchItem list on SearchResult list
371      * 
372      * @param listSource
373      *            The source list
374      * @param listSearchResult
375      *            The result list
376      */
377     private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
378     {
379         for ( SearchItem item : listSource )
380         {
381             SearchResult result = new SearchResult( );
382             result.setId( item.getId( ) );
383 
384             try
385             {
386                 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
387             }
388             catch( ParseException e )
389             {
390                 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
391             }
392 
393             result.setUrl( item.getUrl( ) );
394             result.setTitle( item.getTitle( ) );
395             result.setSummary( item.getSummary( ) );
396             result.setType( item.getType( ) );
397             listSearchResult.add( result );
398         }
399     }
400 }