1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.portal.service.search;
35
36 import fr.paris.lutece.portal.business.indexeraction.IndexerAction;
37 import fr.paris.lutece.portal.business.indexeraction.IndexerActionFilter;
38 import fr.paris.lutece.portal.business.indexeraction.IndexerActionHome;
39 import fr.paris.lutece.portal.service.init.LuteceInitException;
40 import fr.paris.lutece.portal.service.message.SiteMessageException;
41 import fr.paris.lutece.portal.service.util.AppLogService;
42 import fr.paris.lutece.portal.service.util.AppPathService;
43 import fr.paris.lutece.portal.service.util.AppPropertiesService;
44
45 import org.apache.commons.lang.StringUtils;
46
47 import org.apache.lucene.analysis.Analyzer;
48 import org.apache.lucene.document.Document;
49 import org.apache.lucene.index.CorruptIndexException;
50 import org.apache.lucene.index.DirectoryReader;
51 import org.apache.lucene.index.IndexWriter;
52 import org.apache.lucene.index.IndexWriterConfig;
53 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
54 import org.apache.lucene.index.Term;
55 import org.apache.lucene.store.Directory;
56 import org.apache.lucene.store.NIOFSDirectory;
57 import org.apache.lucene.util.Version;
58
59 import java.io.File;
60 import java.io.IOException;
61 import java.io.Serializable;
62
63 import java.util.ArrayList;
64 import java.util.Collection;
65 import java.util.Collections;
66 import java.util.Comparator;
67 import java.util.Date;
68 import java.util.HashMap;
69 import java.util.List;
70 import java.util.Map;
71
72
73
74
75
76 public final class IndexationService
77 {
78
79 public static final String PATH_INDEX = "search.lucene.indexPath";
80 public static final String PATH_INDEX_IN_WEBAPP = "search.lucene.indexInWebapp";
81 public static final String PARAM_FORCING = "forcing";
82 public static final int ALL_DOCUMENT = -1;
83 public static final Version LUCENE_INDEX_VERSION = Version.LUCENE_46;
84 private static final String PARAM_TYPE_PAGE = "Page";
85 private static final String PROPERTY_WRITER_MERGE_FACTOR = "search.lucene.writer.mergeFactor";
86 private static final String PROPERTY_WRITER_MAX_FIELD_LENGTH = "search.lucene.writer.maxFieldLength";
87 private static final String PROPERTY_ANALYSER_CLASS_NAME = "search.lucene.analyser.className";
88 private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
89 private static final int DEFAULT_WRITER_MAX_FIELD_LENGTH = 1000000;
90 private static String _strIndex;
91 private static int _nWriterMergeFactor;
92 private static int _nWriterMaxFieldLength;
93 private static Analyzer _analyzer;
94 private static Map<String, SearchIndexer> _mapIndexers = new HashMap<String, SearchIndexer>( );
95 private static IndexWriter _writer;
96 private static StringBuffer _sbLogs;
97 private static SearchIndexerComparator _comparator = new SearchIndexerComparator( );
98
99
100
101
102 private IndexationService( )
103 {
104 }
105
106
107
108
109
110
111 public static void init( ) throws LuteceInitException
112 {
113
114 boolean indexInWebapp = AppPropertiesService.getPropertyBoolean( PATH_INDEX_IN_WEBAPP, true );
115
116 if ( indexInWebapp )
117 {
118 _strIndex = AppPathService.getPath( PATH_INDEX );
119 }
120 else
121 {
122 _strIndex = AppPropertiesService.getProperty( PATH_INDEX );
123 }
124
125 if ( ( _strIndex == null ) || ( _strIndex.equals( "" ) ) )
126 {
127 throw new LuteceInitException( "Lucene index path not found in lucene.properties", null );
128 }
129
130 _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR,
131 DEFAULT_WRITER_MERGE_FACTOR );
132 _nWriterMaxFieldLength = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MAX_FIELD_LENGTH,
133 DEFAULT_WRITER_MAX_FIELD_LENGTH );
134
135 String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
136
137 if ( ( _strIndex == null ) || ( _strIndex.equals( "" ) ) )
138 {
139 throw new LuteceInitException( "Analyser class name not found in lucene.properties", null );
140 }
141
142 try
143 {
144 _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
145 }
146 catch ( Exception e )
147 {
148 throw new LuteceInitException( "Failed to load Lucene Analyzer class", e );
149 }
150 }
151
152
153
154
155
156
157 public static void registerIndexer( SearchIndexer indexer )
158 {
159 if ( indexer != null )
160 {
161 _mapIndexers.put( indexer.getName( ), indexer );
162 AppLogService.info( "New search indexer registered : " + indexer.getName( ) );
163 }
164 }
165
166
167
168
169
170
171
172 public static synchronized String processIndexing( boolean bCreate )
173 {
174
175 _sbLogs = new StringBuffer( );
176
177 _writer = null;
178
179 boolean bCreateIndex = bCreate;
180 Directory dir = null;
181
182 try
183 {
184 dir = IndexationService.getDirectoryIndex( );
185
186 if ( !DirectoryReader.indexExists( dir ) )
187 {
188 bCreateIndex = true;
189 }
190
191 Date start = new Date( );
192 IndexWriterConfig conf = new IndexWriterConfig( Version.LUCENE_46, _analyzer );
193
194 if ( bCreateIndex )
195 {
196 conf.setOpenMode( OpenMode.CREATE );
197 }
198 else
199 {
200 conf.setOpenMode( OpenMode.APPEND );
201 }
202
203 _writer = new IndexWriter( dir, conf );
204
205 if ( bCreateIndex )
206 {
207 processFullIndexing( );
208 }
209 else
210 {
211 processIncrementalIndexing( );
212 }
213
214 Date end = new Date( );
215 _sbLogs.append( "Duration of the treatment : " );
216 _sbLogs.append( end.getTime( ) - start.getTime( ) );
217 _sbLogs.append( " milliseconds\r\n" );
218 }
219 catch ( Exception e )
220 {
221 error( "Indexing error ", e, "" );
222 }
223 finally
224 {
225 try
226 {
227 if ( _writer != null )
228 {
229 _writer.close( );
230 }
231 }
232 catch ( IOException e )
233 {
234 AppLogService.error( e.getMessage( ), e );
235 }
236
237 try
238 {
239 if ( dir != null )
240 {
241 dir.close( );
242 }
243 }
244 catch ( IOException e )
245 {
246 AppLogService.error( e.getMessage( ), e );
247 }
248 }
249
250 return _sbLogs.toString( );
251 }
252
253
254
255
256 private static void processFullIndexing( )
257 {
258 _sbLogs.append( "\r\nIndexing all contents ...\r\n" );
259
260 for ( SearchIndexer indexer : getIndexerListSortedByName( ) )
261 {
262
263 try
264 {
265 if ( indexer.isEnable( ) )
266 {
267 _sbLogs.append( "\r\n<strong>Indexer : " );
268 _sbLogs.append( indexer.getName( ) );
269 _sbLogs.append( " - " );
270 _sbLogs.append( indexer.getDescription( ) );
271 _sbLogs.append( "</strong>\r\n" );
272
273
274 indexer.indexDocuments( );
275 }
276 }
277 catch ( Exception e )
278 {
279 error( indexer, e, StringUtils.EMPTY );
280 }
281 }
282
283 removeAllIndexerAction( );
284 }
285
286
287
288
289
290
291
292
293
294 private static void processIncrementalIndexing( )
295 throws CorruptIndexException, IOException, InterruptedException, SiteMessageException
296 {
297 _sbLogs.append( "\r\nIncremental Indexing ...\r\n" );
298
299
300 Collection<IndexerAction> actions = IndexerActionHome.getList( );
301
302 for ( IndexerAction action : actions )
303 {
304
305 try
306 {
307 SearchIndexer indexer = _mapIndexers.get( action.getIndexerName( ) );
308
309 if ( action.getIdTask( ) == IndexerAction.TASK_DELETE )
310 {
311 deleteDocument( action );
312 }
313 else
314 {
315 List<org.apache.lucene.document.Document> luceneDocuments = indexer.getDocuments( action.getIdDocument( ) );
316
317 if ( ( luceneDocuments != null ) && ( luceneDocuments.size( ) > 0 ) )
318 {
319 for ( org.apache.lucene.document.Document doc : luceneDocuments )
320 {
321 if ( ( action.getIdPortlet( ) == ALL_DOCUMENT ) ||
322 ( ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) != null ) &&
323 ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID )
324 .equals( doc.get( SearchItem.FIELD_UID ) + "&" + action.getIdPortlet( ) ) ) ) )
325 {
326 processDocument( action, doc );
327 }
328 }
329 }
330 }
331
332 removeIndexerAction( action.getIdAction( ) );
333 }
334 catch ( Exception e )
335 {
336 error( action, e, StringUtils.EMPTY );
337 }
338 }
339
340
341 _writer.deleteDocuments( new Term( SearchItem.FIELD_TYPE, PARAM_TYPE_PAGE ) );
342 _mapIndexers.get( PageIndexer.INDEXER_NAME ).indexDocuments( );
343 }
344
345
346
347
348
349
350
351
352 private static void deleteDocument( IndexerAction action )
353 throws CorruptIndexException, IOException
354 {
355 if ( action.getIdPortlet( ) != ALL_DOCUMENT )
356 {
357
358 _writer.deleteDocuments( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID,
359 action.getIdDocument( ) + "&" + Integer.toString( action.getIdPortlet( ) ) ) );
360 }
361 else
362 {
363
364 _writer.deleteDocuments( new Term( SearchItem.FIELD_UID, action.getIdDocument( ) ) );
365 }
366
367 _sbLogs.append( "Deleting #" ).append( action.getIdDocument( ) ).append( "\r\n" );
368 }
369
370
371
372
373
374
375
376
377
378 private static void processDocument( IndexerAction action, Document doc )
379 throws CorruptIndexException, IOException
380 {
381 if ( action.getIdTask( ) == IndexerAction.TASK_CREATE )
382 {
383 _writer.addDocument( doc );
384 logDoc( "Adding ", doc );
385 }
386 else if ( action.getIdTask( ) == IndexerAction.TASK_MODIFY )
387 {
388 if ( action.getIdPortlet( ) != ALL_DOCUMENT )
389 {
390
391 _writer.updateDocument( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID,
392 doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) ), doc );
393 }
394 else
395 {
396 _writer.updateDocument( new Term( SearchItem.FIELD_UID,
397 doc.getField( SearchItem.FIELD_UID ).stringValue( ) ), doc );
398 }
399
400 logDoc( "Updating ", doc );
401 }
402 }
403
404
405
406
407
408
409
410
411 public static void write( Document doc ) throws CorruptIndexException, IOException
412 {
413 _writer.addDocument( doc );
414 logDoc( "Indexing ", doc );
415 }
416
417
418
419
420
421
422 private static void logDoc( String strAction, Document doc )
423 {
424 _sbLogs.append( strAction );
425 _sbLogs.append( doc.get( SearchItem.FIELD_TYPE ) );
426 _sbLogs.append( " #" );
427 _sbLogs.append( doc.get( SearchItem.FIELD_UID ) );
428 _sbLogs.append( " - " );
429 _sbLogs.append( doc.get( SearchItem.FIELD_TITLE ) );
430 _sbLogs.append( "\r\n" );
431 }
432
433
434
435
436
437
438
439
440 public static void error( SearchIndexer indexer, Exception e, String strMessage )
441 {
442 String strTitle = "Indexer : " + indexer.getName( );
443 error( strTitle, e, strMessage );
444 }
445
446
447
448
449
450
451
452
453 public static void error( IndexerAction action, Exception e, String strMessage )
454 {
455 String strTitle = "Action from indexer : " + action.getIndexerName( );
456 strTitle += ( " Action ID : " + action.getIdAction( ) + " - Document ID : " + action.getIdDocument( ) );
457 error( strTitle, e, strMessage );
458 }
459
460
461
462
463
464
465
466 private static void error( String strTitle, Exception e, String strMessage )
467 {
468 _sbLogs.append( "<strong class=\"alert\">" );
469 _sbLogs.append( strTitle );
470 _sbLogs.append( " - ERROR : " );
471 _sbLogs.append( e.getMessage( ) );
472
473 if ( e.getCause( ) != null )
474 {
475 _sbLogs.append( " : " );
476 _sbLogs.append( e.getCause( ).getMessage( ) );
477 }
478
479 if ( StringUtils.isNotBlank( strMessage ) )
480 {
481 _sbLogs.append( " - " ).append( strMessage );
482 }
483
484 _sbLogs.append( "</strong>\r\n" );
485 AppLogService.error( "Indexing error : " + e.getMessage( ), e );
486 }
487
488
489
490
491
492
493
494 @Deprecated
495 public static String getIndex( )
496 {
497 return _strIndex;
498 }
499
500
501
502
503
504
505
506 public static Directory getDirectoryIndex( ) throws IOException
507 {
508 return NIOFSDirectory.open( new File( _strIndex ) );
509 }
510
511
512
513
514
515
516 public static Analyzer getAnalyser( )
517 {
518 return _analyzer;
519 }
520
521
522
523
524
525
526 public static Collection<SearchIndexer> getIndexers( )
527 {
528 return _mapIndexers.values( );
529 }
530
531
532
533
534
535
536
537 public static List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
538 {
539 IndexerActionFilter filter = new IndexerActionFilter( );
540 filter.setIdTask( nIdTask );
541
542 return IndexerActionHome.getList( filter );
543 }
544
545
546
547
548
549
550
551 public static void removeIndexerAction( int nIdAction )
552 {
553 IndexerActionHome.remove( nIdAction );
554 }
555
556
557
558
559
560 public static void removeAllIndexerAction( )
561 {
562 IndexerActionHome.removeAll( );
563 }
564
565
566
567
568
569
570
571
572
573 public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask, int nIdPortlet )
574 {
575 IndexerAction indexerAction = new IndexerAction( );
576 indexerAction.setIdDocument( strIdDocument );
577 indexerAction.setIdTask( nIdTask );
578 indexerAction.setIndexerName( indexerName );
579 indexerAction.setIdPortlet( nIdPortlet );
580 IndexerActionHome.create( indexerAction );
581 }
582
583
584
585
586
587
588
589
590 public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask )
591 {
592 addIndexerAction( strIdDocument, indexerName, nIdTask, ALL_DOCUMENT );
593 }
594
595
596
597
598
599 private static List<SearchIndexer> getIndexerListSortedByName( )
600 {
601 List<SearchIndexer> list = new ArrayList<SearchIndexer>( _mapIndexers.values( ) );
602 Collections.sort( list, _comparator );
603
604 return list;
605 }
606
607
608
609
610 private static class SearchIndexerComparator implements Comparator<SearchIndexer>, Serializable
611 {
612 private static final long serialVersionUID = -3800252801777838562L;
613
614
615
616
617 @Override
618 public int compare( SearchIndexer si1, SearchIndexer si2 )
619 {
620 return si1.getName( ).compareToIgnoreCase( si2.getName( ) );
621 }
622 }
623 }