View Javadoc
1   /*
2    * Copyright (c) 2002-2020, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.blog.service.docsearch;
35  
36  import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
37  import fr.paris.lutece.plugins.blog.business.IndexerAction;
38  import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
39  import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
40  import fr.paris.lutece.plugins.blog.service.BlogPlugin;
41  import fr.paris.lutece.portal.service.plugin.Plugin;
42  import fr.paris.lutece.portal.service.plugin.PluginService;
43  import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
44  import fr.paris.lutece.portal.service.search.SearchItem;
45  import fr.paris.lutece.portal.service.search.SearchResult;
46  import fr.paris.lutece.portal.service.spring.SpringContextService;
47  import fr.paris.lutece.portal.service.util.AppException;
48  import fr.paris.lutece.portal.service.util.AppLogService;
49  import fr.paris.lutece.portal.service.util.AppPathService;
50  import fr.paris.lutece.portal.service.util.AppPropertiesService;
51  
52  import org.apache.commons.lang.StringUtils;
53  import org.apache.lucene.analysis.Analyzer;
54  import org.apache.lucene.index.DirectoryReader;
55  import org.apache.lucene.index.IndexWriter;
56  import org.apache.lucene.index.IndexWriterConfig;
57  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
58  import org.apache.lucene.index.LogDocMergePolicy;
59  import org.apache.lucene.index.LogMergePolicy;
60  import org.apache.lucene.search.IndexSearcher;
61  
62  import org.apache.lucene.store.Directory;
63  import org.apache.lucene.store.FSDirectory;
64  
65  import java.nio.file.Paths;
66  import java.io.IOException;
67  import java.text.ParseException;
68  import java.util.ArrayList;
69  import java.util.Collection;
70  import java.util.Date;
71  import java.util.List;
72  import org.apache.lucene.document.DateTools;
73  import org.apache.lucene.document.Document;
74  import org.apache.lucene.index.Term;
75  import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
76  import org.apache.lucene.search.BooleanClause;
77  import org.apache.lucene.search.Query;
78  import org.apache.lucene.search.ScoreDoc;
79  import org.apache.lucene.search.Sort;
80  import org.apache.lucene.search.SortField;
81  import org.apache.lucene.search.TermQuery;
82  import org.apache.lucene.search.TermRangeQuery;
83  import org.apache.lucene.search.TopDocs;
84  import org.apache.lucene.util.BytesRef;
85  
86  /**
87   * Blog Search Service
88   */
89  public final class BlogSearchService
90  {
91      private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
92      private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
93      private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
94      private volatile String _strIndex;
95      private static final String WILDCARD = "*";
96  
97      // Default values
98      private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
99  
100     // Constants corresponding to the variables defined in the lutece.properties file
101     private static BlogSearchService _singleton;
102     private static Plugin _plugin;
103 
104     private Analyzer _analyzer;
105     private IBlogSearchIndexer _indexer;
106     private int _nWriterMergeFactor;
107 
108     /**
109      * Creates a new instance of DirectorySearchService
110      */
111     private BlogSearchService( )
112     {
113         _strIndex = AppPathService.getPath( PATH_INDEX );
114         if ( _strIndex == null )
115         {
116             throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
117         }
118 
119         _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );
120 
121         String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
122 
123         if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
124         {
125             throw new AppException( "Analyser class name not found in blogs.properties", null );
126         }
127 
128         _indexer = SpringContextService.getBean( "blog.blogIndexer" );
129 
130         try
131         {
132             _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
133         }
134         catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
135         {
136             throw new AppException( "Failed to load Lucene Analyzer class", e );
137         }
138     }
139 
140     /**
141      * Get the HelpdeskSearchService instance
142      * 
143      * @return The {@link BlogSearchService}
144      */
145     public static BlogSearchService getInstance( )
146     {
147         if ( _singleton == null )
148         {
149             _singleton = new BlogSearchService( );
150             _plugin = PluginService.getPlugin( BlogPlugin.PLUGIN_NAME );
151         }
152 
153         return _singleton;
154     }
155 
156     /**
157      * Return search results
158      * 
159      * @param filter
160      *            The search filter
161      * @param listIdBlog
162      *            Results as a collection of id of blog posts
163      * @return The total number of items found
164      */
165     public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
166     {
167         int nNbItems = 0;
168 
169         try
170         {
171             List<SearchResult> listResults = new ArrayList<>( );
172             nNbItems = getSearchResults( filter, _plugin, listResults );
173 
174             for ( SearchResult searchResult : listResults )
175             {
176                 if ( searchResult.getId( ) != null )
177                 {
178                     listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
179                 }
180             }
181         }
182         catch( NumberFormatException e )
183         {
184             AppLogService.error( e.getMessage( ), e );
185             // If an error occurred clean result list
186             listIdBlog.clear( );
187         }
188 
189         return nNbItems;
190     }
191 
192     /**
193      * Process indexing
194      * 
195      * @param bCreate
196      *            true for start full indexing false for begin incremental indexing
197      * @return the log
198      */
199     public String processIndexing( boolean bCreate )
200     {
201         StringBuilder sbLogs = new StringBuilder( );
202         IndexWriter writer = null;
203         boolean bCreateIndex = bCreate;
204 
205         try
206         {
207             sbLogs.append( "\r\nIndexing all contents ...\r\n" );
208 
209             Directory dir = FSDirectory.open( Paths.get( _strIndex ) );
210 
211             // Nouveau
212             if ( !DirectoryReader.indexExists( dir ) )
213             { // init index
214                 bCreateIndex = true;
215             }
216 
217             IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
218             LogMergePolicy mergePolicy = new LogDocMergePolicy( );
219             mergePolicy.setMergeFactor( _nWriterMergeFactor );
220             conf.setMergePolicy( mergePolicy );
221 
222             if ( bCreateIndex )
223             {
224                 conf.setOpenMode( OpenMode.CREATE );
225             }
226             else
227             {
228                 conf.setOpenMode( OpenMode.APPEND );
229             }
230             writer = new IndexWriter( dir, conf );
231 
232             Date start = new Date( );
233 
234             sbLogs.append( "\r\n<strong>Indexer : " );
235             sbLogs.append( _indexer.getName( ) );
236             sbLogs.append( " - " );
237             sbLogs.append( _indexer.getDescription( ) );
238             sbLogs.append( "</strong>\r\n" );
239             _indexer.processIndexing( writer, bCreateIndex, sbLogs );
240 
241             Date end = new Date( );
242 
243             sbLogs.append( "Duration of the treatment : " );
244             sbLogs.append( end.getTime( ) - start.getTime( ) );
245             sbLogs.append( " milliseconds\r\n" );
246 
247         }
248         catch( Exception e )
249         {
250             sbLogs.append( " caught a " );
251             sbLogs.append( e.getClass( ) );
252             sbLogs.append( "\n with message: " );
253             sbLogs.append( e.getMessage( ) );
254             sbLogs.append( "\r\n" );
255             AppLogService.error( "Indexing error : " + e.getMessage( ), e );
256         }
257         finally
258         {
259             try
260             {
261                 if ( writer != null )
262                 {
263                     writer.close( );
264                 }
265             }
266             catch( IOException e )
267             {
268                 AppLogService.error( e.getMessage( ), e );
269             }
270         }
271 
272         return sbLogs.toString( );
273     }
274 
275     /**
276      * Get search results
277      * 
278      * @param filter
279      *            The filter
280      * @param plugin
281      *            The plugin
282      * @param listSearchResult
283      *            The list of results
284      * @return The result count
285      */
286     private int getSearchResults( BlogSearchFilter filter, Plugin plugin, List<SearchResult> listSearchResult )
287     {
288         ArrayList<SearchItem> listResults = new ArrayList<>( );
289         IndexSearcher searcher;
290         boolean bDateAfter = false;
291         boolean bDateBefore = false;
292 
293         int nNbResults = 0;
294         try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
295         {
296             searcher = new IndexSearcher( reader );
297 
298             Collection<String> queries = new ArrayList<>( );
299             Collection<String> sectors = new ArrayList<>( );
300             Collection<BooleanClause.Occur> flags = new ArrayList<>( );
301 
302             if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
303             {
304 
305                 Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
306                 Query termQuery = new TermQuery( term );
307                 queries.add( termQuery.toString( ) );
308                 sectors.add( SearchItem.FIELD_CONTENTS );
309                 flags.add( BooleanClause.Occur.MUST );
310 
311             }
312             if ( filter.getTag( ) != null )
313             {
314                 for ( String tag : filter.getTag( ) )
315                 {
316                     Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
317                     Query termQuery = new TermQuery( term );
318                     queries.add( termQuery.toString( ) );
319                     sectors.add( BlogSearchItem.FIELD_TAGS );
320                     flags.add( BooleanClause.Occur.MUST );
321                 }
322 
323             }
324             if ( filter.getUser( ) != null )
325             {
326 
327                 Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
328                 Query termQuery = new TermQuery( term );
329                 queries.add( termQuery.toString( ) );
330                 sectors.add( BlogSearchItem.FIELD_USER );
331                 flags.add( BooleanClause.Occur.MUST );
332 
333             }
334             if ( filter.getUserEditedBlogVersion( ) != null )
335             {
336 
337                 Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
338                 Query termQuery = new TermQuery( term );
339                 queries.add( termQuery.toString( ) );
340                 sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
341                 flags.add( BooleanClause.Occur.MUST );
342 
343             }
344 
345             if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
346             {
347                 BytesRef strAfter = null;
348                 BytesRef strBefore = null;
349 
350                 if ( filter.getUpdateDateAfter( ) != null )
351                 {
352                     strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
353                     bDateAfter = true;
354                 }
355 
356                 if ( filter.getUpdateDateBefor( ) != null )
357                 {
358                     Date dateBefore = filter.getUpdateDateBefor( );
359                     strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
360                     bDateBefore = true;
361                 }
362 
363                 Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
364                 queries.add( queryDate.toString( ) );
365                 sectors.add( SearchItem.FIELD_DATE );
366                 flags.add( BooleanClause.Occur.MUST );
367             }
368 
369             if ( filter.getIsUnpulished( ) )
370             {
371 
372                 Term term = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) ) );
373                 Query termQuery = new TermQuery( term );
374                 queries.add( termQuery.toString( ) );
375                 sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
376                 flags.add( BooleanClause.Occur.MUST );
377 
378             }
379             Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
380             Query termQuery = new TermQuery( term );
381             queries.add( termQuery.toString( ) );
382             sectors.add( SearchItem.FIELD_TYPE );
383             flags.add( BooleanClause.Occur.MUST );
384 
385             Query queryMulti = MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
386                     flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
387 
388             Sort sorter = new Sort( );
389             String field = BlogSearchItem.FIELD_DATE_UPDATE;
390             SortField.Type type = SortField.Type.LONG;
391             boolean descending = true;
392 
393             SortField sortField = new SortField( field, type, descending );
394 
395             sorter.setSort( sortField );
396 
397             TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
398             ScoreDoc [ ] hits = topDocs.scoreDocs;
399             nNbResults = hits.length;
400 
401             for ( int i = 0; i < nNbResults; i++ )
402             {
403                 int docId = hits [i].doc;
404                 Document document = searcher.doc( docId );
405                 SearchItem si = new SearchItem( document );
406                 listResults.add( si );
407             }
408             searcher.getIndexReader( ).close( );
409         }
410         catch( Exception e )
411         {
412             AppLogService.error( e.getMessage( ), e );
413         }
414 
415         convertList( listResults, listSearchResult );
416 
417         return nNbResults;
418     }
419 
420     /**
421      * Add Indexer Action to perform on a record
422      * 
423      * @param nIdBlog
424      *            Blog id
425      * @param nIdTask
426      *            the key of the action to do
427      */
428     public void addIndexerAction( int nIdBlog, int nIdTask )
429     {
430         IndexerActionness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
431         indexerAction.setIdBlog( nIdBlog );
432         indexerAction.setIdTask( nIdTask );
433         IndexerActionHome.create( indexerAction );
434     }
435 
436     /**
437      * Remove a Indexer Action
438      * 
439      * @param nIdAction
440      *            the key of the action to remove
441      */
442     public void removeIndexerAction( int nIdAction )
443     {
444         IndexerActionHome.remove( nIdAction );
445     }
446 
447     /**
448      * return a list of IndexerAction by task key
449      * 
450      * @param nIdTask
451      *            the task key
452      * @return a list of IndexerAction
453      */
454     public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
455     {
456         IndexerActionFilteriness/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
457         filter.setIdTask( nIdTask );
458 
459         return IndexerActionHome.getList( filter );
460     }
461 
462     /**
463      * Convert the SearchItem list on SearchResult list
464      * 
465      * @param listSource
466      *            The source list
467      * @param listSearchResult
468      *            The result list
469      */
470     private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
471     {
472         for ( SearchItem item : listSource )
473         {
474             SearchResult result = new SearchResult( );
475             result.setId( item.getId( ) );
476 
477             try
478             {
479                 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
480             }
481             catch( ParseException e )
482             {
483                 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
484             }
485 
486             result.setUrl( item.getUrl( ) );
487             result.setTitle( item.getTitle( ) );
488             result.setSummary( item.getSummary( ) );
489             result.setType( item.getType( ) );
490             listSearchResult.add( result );
491         }
492     }
493 
494 }