View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.blog.service.docsearch;
35  
36  import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
37  import fr.paris.lutece.plugins.blog.business.IndexerAction;
38  import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
39  import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
40  import fr.paris.lutece.plugins.blog.service.BlogPlugin;
41  import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
42  import fr.paris.lutece.portal.service.search.SearchItem;
43  import fr.paris.lutece.portal.service.search.SearchResult;
44  import fr.paris.lutece.portal.service.spring.SpringContextService;
45  import fr.paris.lutece.portal.service.util.AppException;
46  import fr.paris.lutece.portal.service.util.AppLogService;
47  import fr.paris.lutece.portal.service.util.AppPathService;
48  import fr.paris.lutece.portal.service.util.AppPropertiesService;
49  
50  import org.apache.commons.lang3.StringUtils;
51  import org.apache.lucene.analysis.Analyzer;
52  import org.apache.lucene.index.DirectoryReader;
53  import org.apache.lucene.index.IndexWriter;
54  import org.apache.lucene.index.IndexWriterConfig;
55  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
56  import org.apache.lucene.index.LogDocMergePolicy;
57  import org.apache.lucene.index.LogMergePolicy;
58  import org.apache.lucene.search.IndexSearcher;
59  
60  import org.apache.lucene.store.Directory;
61  import org.apache.lucene.store.FSDirectory;
62  
63  import java.nio.file.Paths;
64  import java.io.IOException;
65  import java.text.ParseException;
66  import java.util.ArrayList;
67  import java.util.Collection;
68  import java.util.Date;
69  import java.util.List;
70  import org.apache.lucene.document.DateTools;
71  import org.apache.lucene.document.Document;
72  import org.apache.lucene.index.Term;
73  import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
74  import org.apache.lucene.search.BooleanClause;
75  import org.apache.lucene.search.Query;
76  import org.apache.lucene.search.ScoreDoc;
77  import org.apache.lucene.search.Sort;
78  import org.apache.lucene.search.SortField;
79  import org.apache.lucene.search.TermQuery;
80  import org.apache.lucene.search.TermRangeQuery;
81  import org.apache.lucene.search.TopDocs;
82  import org.apache.lucene.util.BytesRef;
83  
84  /**
85   * Blog Search Service
86   */
87  public final class BlogSearchService
88  {
89      private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
90      private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
91      private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
92      private volatile String _strIndex;
93      private static final String WILDCARD = "*";
94  
95      // Default values
96      private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
97  
98      // Constants corresponding to the variables defined in the lutece.properties file
99      private static BlogSearchService _singleton;
100 
101     private Analyzer _analyzer;
102     private IBlogSearchIndexer _indexer;
103     private int _nWriterMergeFactor;
104 
105     /**
106      * Creates a new instance of DirectorySearchService
107      */
108     private BlogSearchService( )
109     {
110         _strIndex = AppPathService.getPath( PATH_INDEX );
111         if ( _strIndex == null )
112         {
113             throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
114         }
115 
116         _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );
117 
118         String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
119 
120         if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
121         {
122             throw new AppException( "Analyser class name not found in blogs.properties", null );
123         }
124 
125         _indexer = SpringContextService.getBean( "blog.blogIndexer" );
126 
127         try
128         {
129             _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
130         }
131         catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
132         {
133             throw new AppException( "Failed to load Lucene Analyzer class", e );
134         }
135     }
136 
137     /**
138      * Get the HelpdeskSearchService instance
139      *
140      * @return The {@link BlogSearchService}
141      */
142     public static BlogSearchService getInstance( )
143     {
144         if ( _singleton == null )
145         {
146             _singleton = new BlogSearchService( );
147         }
148         return _singleton;
149     }
150 
151     /**
152      * Return search results
153      *
154      * @param filter
155      *            The search filter
156      * @param listIdBlog
157      *            Results as a collection of id of blog posts
158      * @return The total number of items found
159      */
160     public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
161     {
162         int nNbItems = 0;
163 
164         try
165         {
166             List<SearchResult> listResults = new ArrayList<>( );
167             nNbItems = getSearchResultsByFilter( filter, listResults );
168 
169             for ( SearchResult searchResult : listResults )
170             {
171                 if ( searchResult.getId( ) != null )
172                 {
173                     listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
174                 }
175             }
176         }
177         catch( NumberFormatException e )
178         {
179             AppLogService.error( e.getMessage( ), e );
180             // If an error occurred clean result list
181             listIdBlog.clear( );
182         }
183 
184         return nNbItems;
185     }
186 
187     /**
188      * Process indexing
189      *
190      * @param bCreate
191      *            true for start full indexing false for begin incremental indexing
192      * @return the log
193      */
194     public String processIndexing( boolean bCreate )
195     {
196         StringBuilder sbLogs = new StringBuilder( );
197         IndexWriter writer = null;
198         boolean bCreateIndex = bCreate;
199 
200         try
201         {
202             sbLogs.append( "\r\nIndexing all contents ...\r\n" );
203 
204             Directory dir = FSDirectory.open( Paths.get( _strIndex ) );
205 
206             // Nouveau
207             if ( !DirectoryReader.indexExists( dir ) )
208             { // init index
209                 bCreateIndex = true;
210             }
211 
212             IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
213             LogMergePolicy mergePolicy = new LogDocMergePolicy( );
214             mergePolicy.setMergeFactor( _nWriterMergeFactor );
215             conf.setMergePolicy( mergePolicy );
216 
217             if ( bCreateIndex )
218             {
219                 conf.setOpenMode( OpenMode.CREATE );
220             }
221             else
222             {
223                 conf.setOpenMode( OpenMode.APPEND );
224             }
225             writer = new IndexWriter( dir, conf );
226 
227             Date start = new Date( );
228 
229             sbLogs.append( "\r\n<strong>Indexer : " );
230             sbLogs.append( _indexer.getName( ) );
231             sbLogs.append( " - " );
232             sbLogs.append( _indexer.getDescription( ) );
233             sbLogs.append( "</strong>\r\n" );
234             _indexer.processIndexing( writer, bCreateIndex, sbLogs );
235 
236             Date end = new Date( );
237 
238             sbLogs.append( "Duration of the treatment : " );
239             sbLogs.append( end.getTime( ) - start.getTime( ) );
240             sbLogs.append( " milliseconds\r\n" );
241 
242         }
243         catch( Exception e )
244         {
245             sbLogs.append( " caught a " );
246             sbLogs.append( e.getClass( ) );
247             sbLogs.append( "\n with message: " );
248             sbLogs.append( e.getMessage( ) );
249             sbLogs.append( "\r\n" );
250             AppLogService.error( "Indexing error : " + e.getMessage( ), e );
251         }
252         finally
253         {
254             try
255             {
256                 if ( writer != null )
257                 {
258                     writer.close( );
259                 }
260             }
261             catch( IOException e )
262             {
263                 AppLogService.error( e.getMessage( ), e );
264             }
265         }
266 
267         return sbLogs.toString( );
268     }
269 
270     /**
271      * Get search results
272      *
273      * @param filter
274      *            The filter
275      * @param listSearchResult
276      *            The list of results
277      * @return The result count
278      */
279     private int getSearchResultsByFilter( BlogSearchFilter filter, List<SearchResult> listSearchResult )
280     {
281         ArrayList<SearchItem> listResults = new ArrayList<>( );
282 
283         int nNbResults = 0;
284         try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
285         {
286             IndexSearcher searcher = new IndexSearcher( reader );
287 
288             Query queryMulti = prepareQueryForFilter( filter );
289 
290             Sort sorter = new Sort( );
291             String field = BlogSearchItem.FIELD_DATE_UPDATE;
292             SortField.Type type = SortField.Type.LONG;
293             boolean descending = true;
294 
295             SortField sortField = new SortField( field, type, descending );
296 
297             sorter.setSort( sortField );
298 
299             TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
300             ScoreDoc [ ] hits = topDocs.scoreDocs;
301             nNbResults = hits.length;
302 
303             for ( int i = 0; i < nNbResults; i++ )
304             {
305                 int docId = hits [i].doc;
306                 Document document = searcher.doc( docId );
307                 SearchItem si = new SearchItem( document );
308                 listResults.add( si );
309             }
310             searcher.getIndexReader( ).close( );
311         }
312         catch( Exception e )
313         {
314             AppLogService.error( e.getMessage( ), e );
315         }
316 
317         convertList( listResults, listSearchResult );
318         return nNbResults;
319     }
320 
321     private Query prepareQueryForFilter( BlogSearchFilter filter ) throws org.apache.lucene.queryparser.classic.ParseException
322     {
323         boolean bDateAfter = false;
324         boolean bDateBefore = false;
325         Collection<String> queries = new ArrayList<>( );
326         Collection<String> sectors = new ArrayList<>( );
327         Collection<BooleanClause.Occur> flags = new ArrayList<>( );
328 
329         if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
330         {
331             Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
332             Query termQuery = new TermQuery( term );
333             queries.add( termQuery.toString( ) );
334             sectors.add( SearchItem.FIELD_CONTENTS );
335             flags.add( BooleanClause.Occur.MUST );
336 
337         }
338         if ( filter.getTag( ) != null )
339         {
340             for ( String tag : filter.getTag( ) )
341             {
342                 Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
343                 Query termQuery = new TermQuery( term );
344                 queries.add( termQuery.toString( ) );
345                 sectors.add( BlogSearchItem.FIELD_TAGS );
346                 flags.add( BooleanClause.Occur.MUST );
347             }
348 
349         }
350         if ( filter.getUser( ) != null )
351         {
352             Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
353             Query termQuery = new TermQuery( term );
354             queries.add( termQuery.toString( ) );
355             sectors.add( BlogSearchItem.FIELD_USER );
356             flags.add( BooleanClause.Occur.MUST );
357 
358         }
359         if ( filter.getUserEditedBlogVersion( ) != null )
360         {
361             Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
362             Query termQuery = new TermQuery( term );
363             queries.add( termQuery.toString( ) );
364             sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
365             flags.add( BooleanClause.Occur.MUST );
366 
367         }
368 
369         if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
370         {
371             BytesRef strAfter = null;
372             BytesRef strBefore = null;
373 
374             if ( filter.getUpdateDateAfter( ) != null )
375             {
376                 strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
377                 bDateAfter = true;
378             }
379 
380             if ( filter.getUpdateDateBefor( ) != null )
381             {
382                 Date dateBefore = filter.getUpdateDateBefor( );
383                 strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
384                 bDateBefore = true;
385             }
386 
387             Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
388             queries.add( queryDate.toString( ) );
389             sectors.add( SearchItem.FIELD_DATE );
390             flags.add( BooleanClause.Occur.MUST );
391         }
392 
393         if ( filter.getIsUnpulished( ) > 0 )
394         {
395             Term termIsUnpublished = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) == 1 ) );
396             Query termQueryIsUnpublished = new TermQuery( termIsUnpublished );
397             queries.add( termQueryIsUnpublished.toString( ) );
398             sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
399             flags.add( BooleanClause.Occur.MUST );
400         }
401 
402         Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
403         Query termQuery = new TermQuery( term );
404         queries.add( termQuery.toString( ) );
405         sectors.add( SearchItem.FIELD_TYPE );
406         flags.add( BooleanClause.Occur.MUST );
407 
408         return MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
409                 flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
410     }
411 
412     /**
413      * Add Indexer Action to perform on a record
414      *
415      * @param nIdBlog
416      *            Blog id
417      * @param nIdTask
418      *            the key of the action to do
419      */
420     public void addIndexerAction( int nIdBlog, int nIdTask )
421     {
422         IndexerActionness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
423         indexerAction.setIdBlog( nIdBlog );
424         indexerAction.setIdTask( nIdTask );
425         IndexerActionHome.create( indexerAction );
426     }
427 
428     /**
429      * Remove a Indexer Action
430      *
431      * @param nIdAction
432      *            the key of the action to remove
433      */
434     public void removeIndexerAction( int nIdAction )
435     {
436         IndexerActionHome.remove( nIdAction );
437     }
438 
439     /**
440      * return a list of IndexerAction by task key
441      *
442      * @param nIdTask
443      *            the task key
444      * @return a list of IndexerAction
445      */
446     public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
447     {
448         IndexerActionFilteriness/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
449         filter.setIdTask( nIdTask );
450 
451         return IndexerActionHome.getList( filter );
452     }
453 
454     /**
455      * Convert the SearchItem list on SearchResult list
456      *
457      * @param listSource
458      *            The source list
459      * @param listSearchResult
460      *            The result list
461      */
462     private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
463     {
464         for ( SearchItem item : listSource )
465         {
466             SearchResult result = new SearchResult( );
467             result.setId( item.getId( ) );
468 
469             try
470             {
471                 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
472             }
473             catch( ParseException e )
474             {
475                 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
476             }
477 
478             result.setUrl( item.getUrl( ) );
479             result.setTitle( item.getTitle( ) );
480             result.setSummary( item.getSummary( ) );
481             result.setType( item.getType( ) );
482             listSearchResult.add( result );
483         }
484     }
485 
486 }