View Javadoc
1   /*
2    * Copyright (c) 2002-2022, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.portal.service.search;
35  
36  import java.io.IOException;
37  import java.io.Serializable;
38  import java.nio.file.Paths;
39  import java.util.ArrayList;
40  import java.util.Collection;
41  import java.util.Collections;
42  import java.util.Comparator;
43  import java.util.Date;
44  import java.util.List;
45  import java.util.Map;
46  import java.util.concurrent.ConcurrentHashMap;
47  
48  import org.apache.commons.collections.CollectionUtils;
49  import org.apache.commons.lang3.StringUtils;
50  import org.apache.lucene.analysis.Analyzer;
51  import org.apache.lucene.document.Document;
52  import org.apache.lucene.index.CorruptIndexException;
53  import org.apache.lucene.index.DirectoryReader;
54  import org.apache.lucene.index.IndexWriter;
55  import org.apache.lucene.index.IndexWriterConfig;
56  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
57  import org.apache.lucene.index.Term;
58  import org.apache.lucene.store.Directory;
59  import org.apache.lucene.store.FSDirectory;
60  import org.apache.lucene.util.Version;
61  
62  import fr.paris.lutece.portal.business.indexeraction.IndexerAction;
63  import fr.paris.lutece.portal.business.indexeraction.IndexerActionFilter;
64  import fr.paris.lutece.portal.business.indexeraction.IndexerActionHome;
65  import fr.paris.lutece.portal.service.init.LuteceInitException;
66  import fr.paris.lutece.portal.service.message.SiteMessageException;
67  import fr.paris.lutece.portal.service.util.AppLogService;
68  import fr.paris.lutece.portal.service.util.AppPathService;
69  import fr.paris.lutece.portal.service.util.AppPropertiesService;
70  
71  /**
72   * This class provides management methods for indexing
73   */
74  public final class IndexationService
75  {
76      // Constants corresponding to the variables defined in the lutece.properties file
77      public static final String PATH_INDEX = "search.lucene.indexPath";
78      public static final String PATH_INDEX_IN_WEBAPP = "search.lucene.indexInWebapp";
79      public static final String PARAM_FORCING = "forcing";
80      public static final int ALL_DOCUMENT = -1;
81      public static final Version LUCENE_INDEX_VERSION = Version.LATEST;
82      private static final String PARAM_TYPE_PAGE = "Page";
83      private static final String PROPERTY_ANALYSER_CLASS_NAME = "search.lucene.analyser.className";
84      private static String _strIndex;
85      private static Analyzer _analyzer;
86      private static Map<String, SearchIndexer> _mapIndexers = new ConcurrentHashMap<>( );
87      private static IndexWriter _writer;
88      private static StringBuilder _sbLogs;
89      private static SearchIndexerComparator _comparator = new SearchIndexerComparator( );
90  
91      /**
92       * The private constructor
93       */
94      private IndexationService( )
95      {
96      }
97  
98      /**
99       * Initalizes the service
100      *
101      * @throws LuteceInitException
102      *             If an error occured
103      */
104     public static void init( ) throws LuteceInitException
105     {
106         // Read configuration properties
107         boolean indexInWebapp = AppPropertiesService.getPropertyBoolean( PATH_INDEX_IN_WEBAPP, true );
108 
109         if ( indexInWebapp )
110         {
111             _strIndex = AppPathService.getPath( PATH_INDEX );
112         }
113         else
114         {
115             _strIndex = AppPropertiesService.getProperty( PATH_INDEX );
116         }
117 
118         if ( StringUtils.isEmpty( _strIndex ) )
119         {
120             throw new LuteceInitException( "Lucene index path not found in lucene.properties", null );
121         }
122         String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
123 
124         if ( StringUtils.isEmpty( strAnalyserClassName ) )
125         {
126             throw new LuteceInitException( "Analyser class name not found in lucene.properties", null );
127         }
128 
129         try
130         {
131             _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
132         }
133         catch( Exception e )
134         {
135             throw new LuteceInitException( "Failed to load Lucene Analyzer class", e );
136         }
137     }
138 
139     /**
140      * Register an indexer
141      *
142      * @param indexer
143      *            The indexer to add to the registry
144      */
145     public static void registerIndexer( SearchIndexer indexer )
146     {
147         if ( indexer != null )
148         {
149             _mapIndexers.put( indexer.getName( ), indexer );
150             AppLogService.info( "New search indexer registered : " + indexer.getName( ) );
151         }
152     }
153 
154     /**
155      * Unregister an indexer. The indexer is only removed if its name has not changed
156      * 
157      * @param indexer
158      *            the indexer to remove from the registry
159      */
160     public static void unregisterIndexer( SearchIndexer indexer )
161     {
162         if ( indexer != null )
163         {
164             if ( _mapIndexers.remove( indexer.getName( ), indexer ) )
165             {
166                 AppLogService.info( "Search indexer unregistered : " + indexer.getName( ) );
167             }
168             else
169             {
170                 AppLogService.error( "Search indexer " + indexer.getName( ) + " could not be be unregistered" );
171             }
172         }
173     }
174 
175     /**
176      * Process the indexing
177      *
178      * @param bCreate
179      *            Force creating the index
180      * @return the result log of the indexing
181      */
182     public static synchronized String processIndexing( boolean bCreate )
183     {
184         _sbLogs = new StringBuilder( );
185 
186         _writer = null;
187 
188         boolean bCreateIndex = bCreate;
189 
190         Directory dir = null;
191 
192         try
193         {
194             dir = IndexationService.getDirectoryIndex( );
195 
196             if ( !DirectoryReader.indexExists( dir ) )
197             { // init index
198                 bCreateIndex = true;
199             }
200 
201             Date start = new Date( );
202             IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
203 
204             if ( bCreateIndex )
205             {
206                 conf.setOpenMode( OpenMode.CREATE );
207             }
208             else
209             {
210                 conf.setOpenMode( OpenMode.APPEND );
211             }
212 
213             _writer = new IndexWriter( dir, conf );
214 
215             if ( bCreateIndex )
216             {
217                 processFullIndexing( );
218             }
219             else
220             {
221                 processIncrementalIndexing( );
222             }
223 
224             Date end = new Date( );
225             _sbLogs.append( "Duration of the treatment : " );
226             _sbLogs.append( end.getTime( ) - start.getTime( ) );
227             _sbLogs.append( " milliseconds\r\n" );
228         }
229         catch( Exception e )
230         {
231             error( "Indexing error ", e, "" );
232         }
233         finally
234         {
235             try
236             {
237                 if ( _writer != null )
238                 {
239                     _writer.close( );
240                 }
241             }
242             catch( IOException e )
243             {
244                 AppLogService.error( e.getMessage( ), e );
245             }
246 
247             try
248             {
249                 if ( dir != null )
250                 {
251                     dir.close( );
252                 }
253             }
254             catch( IOException e )
255             {
256                 AppLogService.error( e.getMessage( ), e );
257             }
258         }
259 
260         return _sbLogs.toString( );
261     }
262 
263     /**
264      * Process all contents
265      */
266     private static void processFullIndexing( )
267     {
268         _sbLogs.append( "\r\nIndexing all contents ...\r\n" );
269 
270         for ( SearchIndexer indexer : getIndexerListSortedByName( ) )
271         {
272             // catch any exception coming from an indexer to prevent global indexation to fail
273             try
274             {
275                 if ( indexer.isEnable( ) )
276                 {
277                     _sbLogs.append( "\r\n<strong>Indexer : " );
278                     _sbLogs.append( indexer.getName( ) );
279                     _sbLogs.append( " - " );
280                     _sbLogs.append( indexer.getDescription( ) );
281                     _sbLogs.append( "</strong>\r\n" );
282 
283                     // the indexer will call write(doc)
284                     indexer.indexDocuments( );
285                 }
286             }
287             catch( Exception e )
288             {
289                 error( indexer, e, StringUtils.EMPTY );
290             }
291         }
292 
293         removeAllIndexerAction( );
294     }
295 
296     /**
297      * Process incremental indexing
298      *
299      * @throws CorruptIndexException
300      *             if an error occurs
301      * @throws IOException
302      *             if an error occurs
303      * @throws InterruptedException
304      *             if an error occurs
305      * @throws SiteMessageException
306      *             if an error occurs
307      */
308     private static void processIncrementalIndexing( ) throws IOException, InterruptedException, SiteMessageException
309     {
310         _sbLogs.append( "\r\nIncremental Indexing ...\r\n" );
311 
312         // incremental indexing
313         Collection<IndexerAction> actions = IndexerActionHome.getList( );
314 
315         for ( IndexerAction action : actions )
316         {
317             // catch any exception coming from an indexer to prevent global indexation to fail
318             try
319             {
320                 processIndexAction( action );
321             }
322             catch( Exception e )
323             {
324                 error( action, e, StringUtils.EMPTY );
325             }
326         }
327 
328         // reindexing all pages.
329         _writer.deleteDocuments( new Term( SearchItem.FIELD_TYPE, PARAM_TYPE_PAGE ) );
330         _mapIndexers.get( PageIndexer.INDEXER_NAME ).indexDocuments( );
331     }
332 
333     private static void processIndexAction( IndexerAction action ) throws IOException, InterruptedException, SiteMessageException
334     {
335         SearchIndexer indexer = _mapIndexers.get( action.getIndexerName( ) );
336 
337         if ( action.getIdTask( ) == IndexerAction.TASK_DELETE )
338         {
339             deleteDocument( action );
340         }
341         else
342         {
343             List<Document> luceneDocuments = indexer.getDocuments( action.getIdDocument( ) );
344 
345             if ( CollectionUtils.isNotEmpty( luceneDocuments ) )
346             {
347                 for ( Document doc : luceneDocuments )
348                 {
349                     if ( ( action.getIdPortlet( ) == ALL_DOCUMENT ) || ( ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) != null )
350                             && ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ).equals( doc.get( SearchItem.FIELD_UID ) + "&" + action.getIdPortlet( ) ) ) ) )
351                     {
352                         processDocument( action, doc );
353                     }
354                 }
355             }
356         }
357 
358         removeIndexerAction( action.getIdAction( ) );
359     }
360 
361     /**
362      * Delete a document from the index
363      *
364      * @param action
365      *            The current action
366      * @throws CorruptIndexException
367      *             if an error occurs
368      * @throws IOException
369      *             if an error occurs
370      */
371     private static void deleteDocument( IndexerAction action ) throws IOException
372     {
373         if ( action.getIdPortlet( ) != ALL_DOCUMENT )
374         {
375             // delete only the index linked to this portlet
376             _writer.deleteDocuments(
377                     new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID, action.getIdDocument( ) + "&" + Integer.toString( action.getIdPortlet( ) ) ) );
378         }
379         else
380         {
381             // delete all index linked to uid
382             _writer.deleteDocuments( new Term( SearchItem.FIELD_UID, action.getIdDocument( ) ) );
383         }
384 
385         _sbLogs.append( "Deleting #" ).append( action.getIdDocument( ) ).append( "\r\n" );
386     }
387 
388     /**
389      * Create or update the index for a given document
390      *
391      * @param action
392      *            The current action
393      * @param doc
394      *            The document
395      * @throws CorruptIndexException
396      *             if an error occurs
397      * @throws IOException
398      *             if an error occurs
399      */
400     private static void processDocument( IndexerAction action, Document doc ) throws IOException
401     {
402         if ( action.getIdTask( ) == IndexerAction.TASK_CREATE )
403         {
404             _writer.addDocument( doc );
405             logDoc( "Adding ", doc );
406         }
407         else
408             if ( action.getIdTask( ) == IndexerAction.TASK_MODIFY )
409             {
410                 if ( action.getIdPortlet( ) != ALL_DOCUMENT )
411                 {
412                     // delete only the index linked to this portlet
413                     _writer.updateDocument( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID, doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) ), doc );
414                 }
415                 else
416                 {
417                     _writer.updateDocument( new Term( SearchItem.FIELD_UID, doc.getField( SearchItem.FIELD_UID ).stringValue( ) ), doc );
418                 }
419 
420                 logDoc( "Updating ", doc );
421             }
422     }
423 
424     /**
425      * Index one document, called by plugin indexers
426      *
427      * @param doc
428      *            the document to index
429      * @throws CorruptIndexException
430      *             corruptIndexException
431      * @throws IOException
432      *             i/o exception
433      */
434     public static void write( Document doc ) throws IOException
435     {
436         _writer.addDocument( doc );
437         logDoc( "Indexing ", doc );
438     }
439 
440     /**
441      * Log an action made on a document
442      * 
443      * @param strAction
444      *            The action
445      * @param doc
446      *            The document
447      */
448     private static void logDoc( String strAction, Document doc )
449     {
450         _sbLogs.append( strAction );
451         _sbLogs.append( doc.get( SearchItem.FIELD_TYPE ) );
452         _sbLogs.append( " #" );
453         _sbLogs.append( doc.get( SearchItem.FIELD_UID ) );
454         _sbLogs.append( " - " );
455         _sbLogs.append( doc.get( SearchItem.FIELD_TITLE ) );
456         _sbLogs.append( "\r\n" );
457     }
458 
459     /**
460      * Log the error for the search indexer.
461      *
462      * @param indexer
463      *            the {@link SearchIndexer}
464      * @param e
465      *            the exception
466      * @param strMessage
467      *            the str message
468      */
469     public static void error( SearchIndexer indexer, Exception e, String strMessage )
470     {
471         String strTitle = "Indexer : " + indexer.getName( );
472         error( strTitle, e, strMessage );
473     }
474 
475     /**
476      * Log the error for the indexer action.
477      *
478      * @param action
479      *            the {@link IndexerAction}
480      * @param e
481      *            the exception
482      * @param strMessage
483      *            the str message
484      */
485     public static void error( IndexerAction action, Exception e, String strMessage )
486     {
487         String strTitle = "Action from indexer : " + action.getIndexerName( );
488         strTitle += ( " Action ID : " + action.getIdAction( ) + " - Document ID : " + action.getIdDocument( ) );
489         error( strTitle, e, strMessage );
490     }
491 
492     /**
493      * Log an exception
494      * 
495      * @param strTitle
496      *            The title of the error
497      * @param e
498      *            The exception to log
499      * @param strMessage
500      *            The message
501      */
502     private static void error( String strTitle, Exception e, String strMessage )
503     {
504         _sbLogs.append( "</pre>\r\n" );
505         _sbLogs.append( "<div class=\"alert alert-danger\">\r\n" );
506         _sbLogs.append( strTitle );
507         _sbLogs.append( " - ERROR : " );
508         _sbLogs.append( "<strong>\r\n" );
509         _sbLogs.append( e.getMessage( ) );
510         _sbLogs.append( "</strong>\r\n" );
511 
512         if ( e.getCause( ) != null )
513         {
514             _sbLogs.append( " : " );
515             _sbLogs.append( "<strong>\r\n" );
516             _sbLogs.append( e.getCause( ).getMessage( ) );
517             _sbLogs.append( "</strong>\r\n" );
518         }
519 
520         if ( StringUtils.isNotBlank( strMessage ) )
521         {
522             _sbLogs.append( " - " );
523             _sbLogs.append( "<strong>\r\n" );
524             _sbLogs.append( strMessage );
525             _sbLogs.append( "</strong>\r\n" );
526         }
527 
528         _sbLogs.append( "</div>\r\n" );
529         _sbLogs.append( "<pre>" );
530 
531         AppLogService.error( "Indexing error : " + e.getMessage( ), e );
532     }
533 
534     /**
535      * Gets the current IndexSearcher.
536      *
537      * @return IndexSearcher
538      * @throws IOException
539      *             Signals that an I/O exception has occurred.
540      */
541     public static Directory getDirectoryIndex( ) throws IOException
542     {
543         return FSDirectory.open( Paths.get( _strIndex ) );
544     }
545 
546     /**
547      * Gets the current analyser
548      *
549      * @return The analyser
550      */
551     public static Analyzer getAnalyser( )
552     {
553         return _analyzer;
554     }
555 
556     /**
557      * Returns all search indexers
558      *
559      * @return A collection of indexers
560      */
561     public static Collection<SearchIndexer> getIndexers( )
562     {
563         return _mapIndexers.values( );
564     }
565 
566     /**
567      * return a list of IndexerAction by task key
568      *
569      * @param nIdTask
570      *            the task kety
571      * @return a list of IndexerAction
572      */
573     public static List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
574     {
575         IndexerActionFilterexeraction/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
576         filter.setIdTask( nIdTask );
577 
578         return IndexerActionHome.getList( filter );
579     }
580 
581     /**
582      * Remove a Indexer Action
583      *
584      * @param nIdAction
585      *            the key of the action to remove
586      *
587      */
588     public static void removeIndexerAction( int nIdAction )
589     {
590         IndexerActionHome.remove( nIdAction );
591     }
592 
593     /**
594      * Remove all Indexer Action
595      *
596      */
597     public static void removeAllIndexerAction( )
598     {
599         IndexerActionHome.removeAll( );
600     }
601 
602     /**
603      * Add Indexer Action to perform on a record
604      *
605      * @param strIdDocument
606      *            the id of the document
607      * @param indexerName
608      *            the name of the indexer
609      * @param nIdTask
610      *            the key of the action to do
611      * @param nIdPortlet
612      *            id of the portlet
613      */
614     public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask, int nIdPortlet )
615     {
616         IndexerActionxeraction/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
617         indexerAction.setIdDocument( strIdDocument );
618         indexerAction.setIdTask( nIdTask );
619         indexerAction.setIndexerName( indexerName );
620         indexerAction.setIdPortlet( nIdPortlet );
621         IndexerActionHome.create( indexerAction );
622     }
623 
624     /**
625      * Add Indexer Action to perform on a record
626      *
627      * @param strIdDocument
628      *            the id of the document
629      * @param indexerName
630      *            the name of the indexer
631      * @param nIdTask
632      *            the key of the action to do
633      */
634     public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask )
635     {
636         addIndexerAction( strIdDocument, indexerName, nIdTask, ALL_DOCUMENT );
637     }
638 
639     /**
640      * Gets a sorted list of registered indexers
641      * 
642      * @return The list
643      */
644     private static List<SearchIndexer> getIndexerListSortedByName( )
645     {
646         List<SearchIndexer> list = new ArrayList<>( _mapIndexers.values( ) );
647         Collections.sort( list, _comparator );
648 
649         return list;
650     }
651 
652     /**
653      * Comparator to sort indexer
654      */
655     private static class SearchIndexerComparator implements Comparator<SearchIndexer>, Serializable
656     {
657         private static final long serialVersionUID = -3800252801777838562L;
658 
659         /**
660          * {@inheritDoc}
661          */
662         @Override
663         public int compare( SearchIndexer/../../../../../fr/paris/lutece/portal/service/search/SearchIndexer.html#SearchIndexer">SearchIndexer si1, SearchIndexer si2 )
664         {
665             return si1.getName( ).compareToIgnoreCase( si2.getName( ) );
666         }
667     }
668 }