View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.blog.service.docsearch;
35  
36  import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
37  import fr.paris.lutece.plugins.blog.business.IndexerAction;
38  import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
39  import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
40  import fr.paris.lutece.plugins.blog.service.BlogPlugin;
41  import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
42  import fr.paris.lutece.portal.service.search.SearchItem;
43  import fr.paris.lutece.portal.service.search.SearchResult;
44  import fr.paris.lutece.portal.service.spring.SpringContextService;
45  import fr.paris.lutece.portal.service.util.AppException;
46  import fr.paris.lutece.portal.service.util.AppLogService;
47  import fr.paris.lutece.portal.service.util.AppPathService;
48  import fr.paris.lutece.portal.service.util.AppPropertiesService;
49  import fr.paris.lutece.plugins.blog.business.Blog;
50  
51  import org.apache.commons.lang3.StringUtils;
52  import org.apache.lucene.analysis.Analyzer;
53  import org.apache.lucene.index.DirectoryReader;
54  import org.apache.lucene.index.IndexWriter;
55  import org.apache.lucene.index.IndexWriterConfig;
56  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
57  import org.apache.lucene.index.LogDocMergePolicy;
58  import org.apache.lucene.index.LogMergePolicy;
59  import org.apache.lucene.search.IndexSearcher;
60  
61  import org.apache.lucene.store.Directory;
62  import org.apache.lucene.store.FSDirectory;
63  
64  import java.nio.file.Paths;
65  import java.io.IOException;
66  import java.text.ParseException;
67  import java.util.ArrayList;
68  import java.util.Collection;
69  import java.util.Date;
70  import java.util.List;
71  import org.apache.lucene.document.DateTools;
72  import org.apache.lucene.document.Document;
73  import org.apache.lucene.index.Term;
74  import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
75  import org.apache.lucene.search.BooleanClause;
76  import org.apache.lucene.search.Query;
77  import org.apache.lucene.search.ScoreDoc;
78  import org.apache.lucene.search.Sort;
79  import org.apache.lucene.search.SortField;
80  import org.apache.lucene.search.TermQuery;
81  import org.apache.lucene.search.TermRangeQuery;
82  import org.apache.lucene.search.TopDocs;
83  import org.apache.lucene.util.BytesRef;
84  
85  /**
86   * Blog Search Service
87   */
88  public final class BlogSearchService
89  {
90      private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
91      private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
92      private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
93      private volatile String _strIndex;
94      private static final String WILDCARD = "*";
95  
96      // Default values
97      private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
98  
99      // Constants corresponding to the variables defined in the lutece.properties file
100     private static BlogSearchService _singleton;
101 
102     private Analyzer _analyzer;
103     private IBlogSearchIndexer _indexer;
104     private int _nWriterMergeFactor;
105 
106     /**
107      * Creates a new instance of DirectorySearchService
108      */
109     private BlogSearchService( )
110     {
111         _strIndex = AppPathService.getPath( PATH_INDEX );
112         if ( _strIndex == null )
113         {
114             throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
115         }
116 
117         _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );
118 
119         String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
120 
121         if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
122         {
123             throw new AppException( "Analyser class name not found in blogs.properties", null );
124         }
125 
126         _indexer = SpringContextService.getBean( "blog.blogIndexer" );
127 
128         try
129         {
130             _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
131         }
132         catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
133         {
134             throw new AppException( "Failed to load Lucene Analyzer class", e );
135         }
136     }
137 
138     /**
139      * Get the HelpdeskSearchService instance
140      *
141      * @return The {@link BlogSearchService}
142      */
143     public static BlogSearchService getInstance( )
144     {
145         if ( _singleton == null )
146         {
147             _singleton = new BlogSearchService( );
148         }
149         return _singleton;
150     }
151 
152     /**
153      * Return search results
154      *
155      * @param filter
156      *            The search filter
157      * @param listIdBlog
158      *            Results as a collection of id of blog posts
159      * @return The total number of items found
160      */
161     public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
162     {
163         int nNbItems = 0;
164 
165         try
166         {
167             List<SearchResult> listResults = new ArrayList<>( );
168             nNbItems = getSearchResultsByFilter( filter, listResults );
169 
170             for ( SearchResult searchResult : listResults )
171             {
172                 if ( searchResult.getId( ) != null )
173                 {
174                     listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
175                 }
176             }
177         }
178         catch( NumberFormatException e )
179         {
180             AppLogService.error( e.getMessage( ), e );
181             // If an error occurred clean result list
182             listIdBlog.clear( );
183         }
184 
185         return nNbItems;
186     }
187 
188     /**
189      * Process indexing
190      *
191      * @param bCreate
192      *            true for start full indexing false for begin incremental indexing
193      * @return the log
194      */
195     public String processIndexing( boolean bCreate )
196     {
197         StringBuilder sbLogs = new StringBuilder( );
198         IndexWriter writer = null;
199         boolean bCreateIndex = bCreate;
200 
201         try
202         {
203             sbLogs.append( "\r\nIndexing all contents ...\r\n" );
204 
205             Directory dir = FSDirectory.open( Paths.get( _strIndex ) );
206 
207             // Nouveau
208             if ( !DirectoryReader.indexExists( dir ) )
209             { // init index
210                 bCreateIndex = true;
211             }
212 
213             IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
214             LogMergePolicy mergePolicy = new LogDocMergePolicy( );
215             mergePolicy.setMergeFactor( _nWriterMergeFactor );
216             conf.setMergePolicy( mergePolicy );
217 
218             if ( bCreateIndex )
219             {
220                 conf.setOpenMode( OpenMode.CREATE );
221             }
222             else
223             {
224                 conf.setOpenMode( OpenMode.APPEND );
225             }
226             writer = new IndexWriter( dir, conf );
227 
228             Date start = new Date( );
229 
230             sbLogs.append( "\r\n<strong>Indexer : " );
231             sbLogs.append( _indexer.getName( ) );
232             sbLogs.append( " - " );
233             sbLogs.append( _indexer.getDescription( ) );
234             sbLogs.append( "</strong>\r\n" );
235             _indexer.processIndexing( writer, bCreateIndex, sbLogs );
236 
237             Date end = new Date( );
238 
239             sbLogs.append( "Duration of the treatment : " );
240             sbLogs.append( end.getTime( ) - start.getTime( ) );
241             sbLogs.append( " milliseconds\r\n" );
242 
243         }
244         catch( Exception e )
245         {
246             sbLogs.append( " caught a " );
247             sbLogs.append( e.getClass( ) );
248             sbLogs.append( "\n with message: " );
249             sbLogs.append( e.getMessage( ) );
250             sbLogs.append( "\r\n" );
251             AppLogService.error( "Indexing error : " + e.getMessage( ), e );
252         }
253         finally
254         {
255             try
256             {
257                 if ( writer != null )
258                 {
259                     writer.close( );
260                 }
261             }
262             catch( IOException e )
263             {
264                 AppLogService.error( e.getMessage( ), e );
265             }
266         }
267 
268         return sbLogs.toString( );
269     }
270     public void updateDocument ( Blog blog) {
271         try ( IndexWriter writer = new IndexWriter( FSDirectory.open( Paths.get( _strIndex ) ), new IndexWriterConfig( _analyzer ) ) )
272         {
273             IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
274             LogMergePolicy mergePolicy = new LogDocMergePolicy( );
275             mergePolicy.setMergeFactor( _nWriterMergeFactor );
276             conf.setMergePolicy( mergePolicy );
277             conf.setOpenMode( OpenMode.CREATE_OR_APPEND );
278             _indexer.updateDocument( writer, blog );
279         }
280         catch( Exception e )
281         {
282             AppLogService.error( "Indexing error : " + e.getMessage( ), e );
283         }
284 
285     }
286 
287     /**
288      * Get search results
289      *
290      * @param filter
291      *            The filter
292      * @param listSearchResult
293      *            The list of results
294      * @return The result count
295      */
296     private int getSearchResultsByFilter( BlogSearchFilter filter, List<SearchResult> listSearchResult )
297     {
298         ArrayList<SearchItem> listResults = new ArrayList<>( );
299 
300         int nNbResults = 0;
301         try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
302         {
303             IndexSearcher searcher = new IndexSearcher( reader );
304 
305             Query queryMulti = prepareQueryForFilter( filter );
306 
307             Sort sorter = new Sort( );
308             String field = BlogSearchItem.FIELD_DATE_UPDATE;
309             SortField.Type type = SortField.Type.LONG;
310             boolean descending = true;
311 
312             SortField sortField = new SortField( field, type, descending );
313 
314             sorter.setSort( sortField );
315 
316             TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
317             ScoreDoc [ ] hits = topDocs.scoreDocs;
318             nNbResults = hits.length;
319 
320             for ( int i = 0; i < nNbResults; i++ )
321             {
322                 int docId = hits [i].doc;
323                 Document document = searcher.doc( docId );
324                 SearchItem si = new SearchItem( document );
325                 listResults.add( si );
326             }
327             searcher.getIndexReader( ).close( );
328         }
329         catch( Exception e )
330         {
331             AppLogService.error( e.getMessage( ), e );
332         }
333 
334         convertList( listResults, listSearchResult );
335         return nNbResults;
336     }
337 
338     private Query prepareQueryForFilter( BlogSearchFilter filter ) throws org.apache.lucene.queryparser.classic.ParseException
339     {
340         boolean bDateAfter = false;
341         boolean bDateBefore = false;
342         Collection<String> queries = new ArrayList<>( );
343         Collection<String> sectors = new ArrayList<>( );
344         Collection<BooleanClause.Occur> flags = new ArrayList<>( );
345 
346         if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
347         {
348             Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
349             Query termQuery = new TermQuery( term );
350             queries.add( termQuery.toString( ) );
351             sectors.add( SearchItem.FIELD_CONTENTS );
352             flags.add( BooleanClause.Occur.MUST );
353 
354         }
355         if ( filter.getTag( ) != null )
356         {
357             for ( String tag : filter.getTag( ) )
358             {
359                 Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
360                 Query termQuery = new TermQuery( term );
361                 queries.add( termQuery.toString( ) );
362                 sectors.add( BlogSearchItem.FIELD_TAGS );
363                 flags.add( BooleanClause.Occur.MUST );
364             }
365 
366         }
367         if ( filter.getUser( ) != null )
368         {
369             Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
370             Query termQuery = new TermQuery( term );
371             queries.add( termQuery.toString( ) );
372             sectors.add( BlogSearchItem.FIELD_USER );
373             flags.add( BooleanClause.Occur.MUST );
374 
375         }
376         if ( filter.getUserEditedBlogVersion( ) != null )
377         {
378             Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
379             Query termQuery = new TermQuery( term );
380             queries.add( termQuery.toString( ) );
381             sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
382             flags.add( BooleanClause.Occur.MUST );
383 
384         }
385 
386         if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
387         {
388             BytesRef strAfter = null;
389             BytesRef strBefore = null;
390 
391             if ( filter.getUpdateDateAfter( ) != null )
392             {
393                 strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
394                 bDateAfter = true;
395             }
396 
397             if ( filter.getUpdateDateBefor( ) != null )
398             {
399                 Date dateBefore = filter.getUpdateDateBefor( );
400                 strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
401                 bDateBefore = true;
402             }
403 
404             Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
405             queries.add( queryDate.toString( ) );
406             sectors.add( SearchItem.FIELD_DATE );
407             flags.add( BooleanClause.Occur.MUST );
408         }
409 
410             Term termIsArchived = new Term( BlogSearchItem.FIELD_ARCHIVED, filter.getIsArchived() ? "true" : "false" );
411             Query termQueryIsArchived = new TermQuery( termIsArchived );
412             queries.add( termQueryIsArchived.toString( ) );
413             sectors.add( BlogSearchItem.FIELD_ARCHIVED );
414             flags.add( BooleanClause.Occur.MUST );
415 
416         if ( !filter.getIsArchived( ))
417         {
418             if ( filter.getIsUnpulished( ) > 0 )
419             {
420                 Term termIsUnpublished = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) == 1 ) );
421                 Query termQueryIsUnpublished = new TermQuery( termIsUnpublished );
422                 queries.add( termQueryIsUnpublished.toString( ) );
423                 sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
424                 flags.add( BooleanClause.Occur.MUST );
425             }
426         }
427 
428         Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
429         Query termQuery = new TermQuery( term );
430         queries.add( termQuery.toString( ) );
431         sectors.add( SearchItem.FIELD_TYPE );
432         flags.add( BooleanClause.Occur.MUST );
433 
434         return MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
435                 flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
436     }
437 
438     /**
439      * Add Indexer Action to perform on a record
440      *
441      * @param nIdBlog
442      *            Blog id
443      * @param nIdTask
444      *            the key of the action to do
445      */
446     public void addIndexerAction( int nIdBlog, int nIdTask )
447     {
448         IndexerActionness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
449         indexerAction.setIdBlog( nIdBlog );
450         indexerAction.setIdTask( nIdTask );
451         IndexerActionHome.create( indexerAction );
452     }
453 
454     /**
455      * Remove a Indexer Action
456      *
457      * @param nIdAction
458      *            the key of the action to remove
459      */
460     public void removeIndexerAction( int nIdAction )
461     {
462         IndexerActionHome.remove( nIdAction );
463     }
464 
465     /**
466      * return a list of IndexerAction by task key
467      *
468      * @param nIdTask
469      *            the task key
470      * @return a list of IndexerAction
471      */
472     public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
473     {
474         IndexerActionFilteriness/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
475         filter.setIdTask( nIdTask );
476 
477         return IndexerActionHome.getList( filter );
478     }
479 
480     /**
481      * Convert the SearchItem list on SearchResult list
482      *
483      * @param listSource
484      *            The source list
485      * @param listSearchResult
486      *            The result list
487      */
488     private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
489     {
490         for ( SearchItem item : listSource )
491         {
492             SearchResult result = new SearchResult( );
493             result.setId( item.getId( ) );
494 
495             try
496             {
497                 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
498             }
499             catch( ParseException e )
500             {
501                 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
502             }
503 
504             result.setUrl( item.getUrl( ) );
505             result.setTitle( item.getTitle( ) );
506             result.setSummary( item.getSummary( ) );
507             result.setType( item.getType( ) );
508             listSearchResult.add( result );
509         }
510     }
511 
512 }