View Javadoc
1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.portal.service.search;
35  
36  import fr.paris.lutece.portal.business.indexeraction.IndexerAction;
37  import fr.paris.lutece.portal.business.indexeraction.IndexerActionFilter;
38  import fr.paris.lutece.portal.business.indexeraction.IndexerActionHome;
39  import fr.paris.lutece.portal.service.init.LuteceInitException;
40  import fr.paris.lutece.portal.service.message.SiteMessageException;
41  import fr.paris.lutece.portal.service.util.AppLogService;
42  import fr.paris.lutece.portal.service.util.AppPathService;
43  import fr.paris.lutece.portal.service.util.AppPropertiesService;
44  
45  import org.apache.commons.lang.StringUtils;
46  
47  import org.apache.lucene.analysis.Analyzer;
48  import org.apache.lucene.document.Document;
49  import org.apache.lucene.index.CorruptIndexException;
50  import org.apache.lucene.index.DirectoryReader;
51  import org.apache.lucene.index.IndexWriter;
52  import org.apache.lucene.index.IndexWriterConfig;
53  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
54  import org.apache.lucene.index.Term;
55  import org.apache.lucene.store.Directory;
56  import org.apache.lucene.store.NIOFSDirectory;
57  import org.apache.lucene.util.Version;
58  
59  import java.io.File;
60  import java.io.IOException;
61  import java.io.Serializable;
62  
63  import java.util.ArrayList;
64  import java.util.Collection;
65  import java.util.Collections;
66  import java.util.Comparator;
67  import java.util.Date;
68  import java.util.HashMap;
69  import java.util.List;
70  import java.util.Map;
71  
72  
73  /**
74   * This class provides management methods for indexing
75   */
76  public final class IndexationService
77  {
78      // Constants corresponding to the variables defined in the lutece.properties file
79      public static final String PATH_INDEX = "search.lucene.indexPath";
80      public static final String PATH_INDEX_IN_WEBAPP = "search.lucene.indexInWebapp";
81      public static final String PARAM_FORCING = "forcing";
82      public static final int ALL_DOCUMENT = -1;
83      public static final Version LUCENE_INDEX_VERSION = Version.LUCENE_46;
84      private static final String PARAM_TYPE_PAGE = "Page";
85      private static final String PROPERTY_WRITER_MERGE_FACTOR = "search.lucene.writer.mergeFactor";
86      private static final String PROPERTY_WRITER_MAX_FIELD_LENGTH = "search.lucene.writer.maxFieldLength";
87      private static final String PROPERTY_ANALYSER_CLASS_NAME = "search.lucene.analyser.className";
88      private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
89      private static final int DEFAULT_WRITER_MAX_FIELD_LENGTH = 1000000;
90      private static String _strIndex;
91      private static int _nWriterMergeFactor;
92      private static int _nWriterMaxFieldLength;
93      private static Analyzer _analyzer;
94      private static Map<String, SearchIndexer> _mapIndexers = new HashMap<String, SearchIndexer>(  );
95      private static IndexWriter _writer;
96      private static StringBuffer _sbLogs;
97      private static SearchIndexerComparator _comparator = new SearchIndexerComparator(  );
98  
99      /**
100      * The private constructor
101      */
102     private IndexationService(  )
103     {
104     }
105 
106     /**
107      * Initalizes the service
108      *
109      * @throws LuteceInitException If an error occured
110      */
111     public static void init(  ) throws LuteceInitException
112     {
113         // Read configuration properties
114         boolean indexInWebapp = AppPropertiesService.getPropertyBoolean( PATH_INDEX_IN_WEBAPP, true );
115 
116         if ( indexInWebapp )
117         {
118             _strIndex = AppPathService.getPath( PATH_INDEX );
119         }
120         else
121         {
122             _strIndex = AppPropertiesService.getProperty( PATH_INDEX );
123         }
124 
125         if ( ( _strIndex == null ) || ( _strIndex.equals( "" ) ) )
126         {
127             throw new LuteceInitException( "Lucene index path not found in lucene.properties", null );
128         }
129 
130         _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR,
131                 DEFAULT_WRITER_MERGE_FACTOR );
132         _nWriterMaxFieldLength = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MAX_FIELD_LENGTH,
133                 DEFAULT_WRITER_MAX_FIELD_LENGTH );
134 
135         String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
136 
137         if ( ( _strIndex == null ) || ( _strIndex.equals( "" ) ) )
138         {
139             throw new LuteceInitException( "Analyser class name not found in lucene.properties", null );
140         }
141 
142         try
143         {
144             _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance(  );
145         }
146         catch ( Exception e )
147         {
148             throw new LuteceInitException( "Failed to load Lucene Analyzer class", e );
149         }
150     }
151 
152     /**
153      * Register an indexer
154      *
155      * @param indexer The indexer to add to the registry
156      */
157     public static void registerIndexer( SearchIndexer indexer )
158     {
159         if ( indexer != null )
160         {
161             _mapIndexers.put( indexer.getName(  ), indexer );
162             AppLogService.info( "New search indexer registered : " + indexer.getName(  ) );
163         }
164     }
165 
166     /**
167      * Process the indexing
168      *
169      * @param bCreate Force creating the index
170      * @return the result log of the indexing
171      */
172     public static synchronized String processIndexing( boolean bCreate )
173     {
174         // String buffer for building the response page;
175         _sbLogs = new StringBuffer(  );
176 
177         _writer = null;
178 
179         boolean bCreateIndex = bCreate;
180         Directory dir = null;
181 
182         try
183         {
184             dir = IndexationService.getDirectoryIndex(  );
185 
186             if ( !DirectoryReader.indexExists( dir ) )
187             { //init index
188                 bCreateIndex = true;
189             }
190 
191             Date start = new Date(  );
192             IndexWriterConfig conf = new IndexWriterConfig( Version.LUCENE_46, _analyzer );
193 
194             if ( bCreateIndex )
195             {
196                 conf.setOpenMode( OpenMode.CREATE );
197             }
198             else
199             {
200                 conf.setOpenMode( OpenMode.APPEND );
201             }
202 
203             _writer = new IndexWriter( dir, conf );
204 
205             if ( bCreateIndex )
206             {
207                 processFullIndexing(  );
208             }
209             else
210             {
211                 processIncrementalIndexing(  );
212             }
213 
214             Date end = new Date(  );
215             _sbLogs.append( "Duration of the treatment : " );
216             _sbLogs.append( end.getTime(  ) - start.getTime(  ) );
217             _sbLogs.append( " milliseconds\r\n" );
218         }
219         catch ( Exception e )
220         {
221             error( "Indexing error ", e, "" );
222         }
223         finally
224         {
225             try
226             {
227                 if ( _writer != null )
228                 {
229                     _writer.close(  );
230                 }
231             }
232             catch ( IOException e )
233             {
234                 AppLogService.error( e.getMessage(  ), e );
235             }
236 
237             try
238             {
239                 if ( dir != null )
240                 {
241                     dir.close(  );
242                 }
243             }
244             catch ( IOException e )
245             {
246                 AppLogService.error( e.getMessage(  ), e );
247             }
248         }
249 
250         return _sbLogs.toString(  );
251     }
252 
253     /**
254      * Process all contents
255      */
256     private static void processFullIndexing(  )
257     {
258         _sbLogs.append( "\r\nIndexing all contents ...\r\n" );
259 
260         for ( SearchIndexer indexer : getIndexerListSortedByName(  ) )
261         {
262             // catch any exception coming from an indexer to prevent global indexation to fail
263             try
264             {
265                 if ( indexer.isEnable(  ) )
266                 {
267                     _sbLogs.append( "\r\n<strong>Indexer : " );
268                     _sbLogs.append( indexer.getName(  ) );
269                     _sbLogs.append( " - " );
270                     _sbLogs.append( indexer.getDescription(  ) );
271                     _sbLogs.append( "</strong>\r\n" );
272 
273                     //the indexer will call write(doc)
274                     indexer.indexDocuments(  );
275                 }
276             }
277             catch ( Exception e )
278             {
279                 error( indexer, e, StringUtils.EMPTY );
280             }
281         }
282 
283         removeAllIndexerAction(  );
284     }
285 
286     /**
287      * Process incremental indexing
288      *
289      * @throws CorruptIndexException if an error occurs
290      * @throws IOException if an error occurs
291      * @throws InterruptedException if an error occurs
292      * @throws SiteMessageException if an error occurs
293      */
294     private static void processIncrementalIndexing(  )
295         throws CorruptIndexException, IOException, InterruptedException, SiteMessageException
296     {
297         _sbLogs.append( "\r\nIncremental Indexing ...\r\n" );
298 
299         //incremental indexing
300         Collection<IndexerAction> actions = IndexerActionHome.getList(  );
301 
302         for ( IndexerAction action : actions )
303         {
304             // catch any exception coming from an indexer to prevent global indexation to fail
305             try
306             {
307                 SearchIndexer indexer = _mapIndexers.get( action.getIndexerName(  ) );
308 
309                 if ( action.getIdTask(  ) == IndexerAction.TASK_DELETE )
310                 {
311                     deleteDocument( action );
312                 }
313                 else
314                 {
315                     List<org.apache.lucene.document.Document> luceneDocuments = indexer.getDocuments( action.getIdDocument(  ) );
316 
317                     if ( ( luceneDocuments != null ) && ( luceneDocuments.size(  ) > 0 ) )
318                     {
319                         for ( org.apache.lucene.document.Document doc : luceneDocuments )
320                         {
321                             if ( ( action.getIdPortlet(  ) == ALL_DOCUMENT ) ||
322                                     ( ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) != null ) &&
323                                     ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID )
324                                              .equals( doc.get( SearchItem.FIELD_UID ) + "&" + action.getIdPortlet(  ) ) ) ) )
325                             {
326                                 processDocument( action, doc );
327                             }
328                         }
329                     }
330                 }
331 
332                 removeIndexerAction( action.getIdAction(  ) );
333             }
334             catch ( Exception e )
335             {
336                 error( action, e, StringUtils.EMPTY );
337             }
338         }
339 
340         //reindexing all pages.
341         _writer.deleteDocuments( new Term( SearchItem.FIELD_TYPE, PARAM_TYPE_PAGE ) );
342         _mapIndexers.get( PageIndexer.INDEXER_NAME ).indexDocuments(  );
343     }
344 
345     /**
346      * Delete a document from the index
347      *
348      * @param action The current action
349      * @throws CorruptIndexException if an error occurs
350      * @throws IOException if an error occurs
351      */
352     private static void deleteDocument( IndexerAction action )
353         throws CorruptIndexException, IOException
354     {
355         if ( action.getIdPortlet(  ) != ALL_DOCUMENT )
356         {
357             //delete only the index linked to this portlet
358             _writer.deleteDocuments( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID,
359                     action.getIdDocument(  ) + "&" + Integer.toString( action.getIdPortlet(  ) ) ) );
360         }
361         else
362         {
363             //delete all index linked to uid
364             _writer.deleteDocuments( new Term( SearchItem.FIELD_UID, action.getIdDocument(  ) ) );
365         }
366 
367         _sbLogs.append( "Deleting #" ).append( action.getIdDocument(  ) ).append( "\r\n" );
368     }
369 
370     /**
371      * Create or update the index for a given document
372      *
373      * @param action The current action
374      * @param doc The document
375      * @throws CorruptIndexException if an error occurs
376      * @throws IOException if an error occurs
377      */
378     private static void processDocument( IndexerAction action, Document doc )
379         throws CorruptIndexException, IOException
380     {
381         if ( action.getIdTask(  ) == IndexerAction.TASK_CREATE )
382         {
383             _writer.addDocument( doc );
384             logDoc( "Adding ", doc );
385         }
386         else if ( action.getIdTask(  ) == IndexerAction.TASK_MODIFY )
387         {
388             if ( action.getIdPortlet(  ) != ALL_DOCUMENT )
389             {
390                 //delete only the index linked to this portlet
391                 _writer.updateDocument( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID,
392                         doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) ), doc );
393             }
394             else
395             {
396                 _writer.updateDocument( new Term( SearchItem.FIELD_UID,
397                         doc.getField( SearchItem.FIELD_UID ).stringValue(  ) ), doc );
398             }
399 
400             logDoc( "Updating ", doc );
401         }
402     }
403 
404     /**
405      * Index one document, called by plugin indexers
406      *
407      * @param doc the document to index
408      * @throws CorruptIndexException corruptIndexException
409      * @throws IOException i/o exception
410      */
411     public static void write( Document doc ) throws CorruptIndexException, IOException
412     {
413         _writer.addDocument( doc );
414         logDoc( "Indexing ", doc );
415     }
416 
417     /**
418      * Log an action made on a document
419      * @param strAction The action
420      * @param doc The document
421      */
422     private static void logDoc( String strAction, Document doc )
423     {
424         _sbLogs.append( strAction );
425         _sbLogs.append( doc.get( SearchItem.FIELD_TYPE ) );
426         _sbLogs.append( " #" );
427         _sbLogs.append( doc.get( SearchItem.FIELD_UID ) );
428         _sbLogs.append( " - " );
429         _sbLogs.append( doc.get( SearchItem.FIELD_TITLE ) );
430         _sbLogs.append( "\r\n" );
431     }
432 
433     /**
434      * Log the error for the search indexer.
435      *
436      * @param indexer the {@link SearchIndexer}
437      * @param e the exception
438      * @param strMessage the str message
439      */
440     public static void error( SearchIndexer indexer, Exception e, String strMessage )
441     {
442         String strTitle = "Indexer : " + indexer.getName(  );
443         error( strTitle, e, strMessage );
444     }
445 
446     /**
447      * Log the error for the indexer action.
448      *
449      * @param action the {@link IndexerAction}
450      * @param e the exception
451      * @param strMessage the str message
452      */
453     public static void error( IndexerAction action, Exception e, String strMessage )
454     {
455         String strTitle = "Action from indexer : " + action.getIndexerName(  );
456         strTitle += ( " Action ID : " + action.getIdAction(  ) + " - Document ID : " + action.getIdDocument(  ) );
457         error( strTitle, e, strMessage );
458     }
459 
460     /**
461      * Log an exception
462      * @param strTitle The title of the error
463      * @param e The exception to log
464      * @param strMessage The message
465      */
466     private static void error( String strTitle, Exception e, String strMessage )
467     {
468         _sbLogs.append( "<strong class=\"alert\">" );
469         _sbLogs.append( strTitle );
470         _sbLogs.append( " - ERROR : " );
471         _sbLogs.append( e.getMessage(  ) );
472 
473         if ( e.getCause(  ) != null )
474         {
475             _sbLogs.append( " : " );
476             _sbLogs.append( e.getCause(  ).getMessage(  ) );
477         }
478 
479         if ( StringUtils.isNotBlank( strMessage ) )
480         {
481             _sbLogs.append( " - " ).append( strMessage );
482         }
483 
484         _sbLogs.append( "</strong>\r\n" );
485         AppLogService.error( "Indexing error : " + e.getMessage(  ), e );
486     }
487 
488     /**
489      * Gets the current index
490      *
491      * @return The index
492      * @deprecated use getDirectoryIndex( ) instead
493      */
494     @Deprecated
495     public static String getIndex(  )
496     {
497         return _strIndex;
498     }
499 
500     /**
501      * Gets the current IndexSearcher.
502      *
503      * @return IndexSearcher
504      * @throws IOException Signals that an I/O exception has occurred.
505      */
506     public static Directory getDirectoryIndex(  ) throws IOException
507     {
508         return NIOFSDirectory.open( new File( _strIndex ) );
509     }
510 
511     /**
512      * Gets the current analyser
513      *
514      * @return The analyser
515      */
516     public static Analyzer getAnalyser(  )
517     {
518         return _analyzer;
519     }
520 
521     /**
522      * Returns all search indexers
523      *
524      * @return A collection of indexers
525      */
526     public static Collection<SearchIndexer> getIndexers(  )
527     {
528         return _mapIndexers.values(  );
529     }
530 
531     /**
532      * return a list of IndexerAction by task key
533      *
534      * @param nIdTask the task kety
535      * @return a list of IndexerAction
536      */
537     public static List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
538     {
539         IndexerActionFilter filter = new IndexerActionFilter(  );
540         filter.setIdTask( nIdTask );
541 
542         return IndexerActionHome.getList( filter );
543     }
544 
545     /**
546      * Remove a Indexer Action
547      *
548      * @param nIdAction the key of the action to remove
549      *
550      */
551     public static void removeIndexerAction( int nIdAction )
552     {
553         IndexerActionHome.remove( nIdAction );
554     }
555 
556     /**
557      * Remove all Indexer Action
558      *
559      */
560     public static void removeAllIndexerAction(  )
561     {
562         IndexerActionHome.removeAll(  );
563     }
564 
565     /**
566      * Add Indexer Action to perform on a record
567      *
568      * @param strIdDocument the id of the document
569      * @param indexerName the name of the indexer
570      * @param nIdTask the key of the action to do
571      * @param nIdPortlet id of the portlet
572      */
573     public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask, int nIdPortlet )
574     {
575         IndexerAction indexerAction = new IndexerAction(  );
576         indexerAction.setIdDocument( strIdDocument );
577         indexerAction.setIdTask( nIdTask );
578         indexerAction.setIndexerName( indexerName );
579         indexerAction.setIdPortlet( nIdPortlet );
580         IndexerActionHome.create( indexerAction );
581     }
582 
583     /**
584      * Add Indexer Action to perform on a record
585      *
586      * @param strIdDocument the id of the document
587      * @param indexerName the name of the indexer
588      * @param nIdTask the key of the action to do
589      */
590     public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask )
591     {
592         addIndexerAction( strIdDocument, indexerName, nIdTask, ALL_DOCUMENT );
593     }
594 
595     /**
596      * Gets a sorted list of registered indexers
597      * @return The list
598      */
599     private static List<SearchIndexer> getIndexerListSortedByName(  )
600     {
601         List<SearchIndexer> list = new ArrayList<SearchIndexer>( _mapIndexers.values(  ) );
602         Collections.sort( list, _comparator );
603 
604         return list;
605     }
606 
607     /**
608      * Comparator to sort indexer
609      */
610     private static class SearchIndexerComparator implements Comparator<SearchIndexer>, Serializable
611     {
612         private static final long serialVersionUID = -3800252801777838562L;
613 
614         /**
615          * {@inheritDoc}
616          */
617         @Override
618         public int compare( SearchIndexer si1, SearchIndexer si2 )
619         {
620             return si1.getName(  ).compareToIgnoreCase( si2.getName(  ) );
621         }
622     }
623 }