1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.document.service.docsearch;
35
36 import fr.paris.lutece.plugins.document.business.DocumentHome;
37 import fr.paris.lutece.plugins.document.business.DocumentType;
38 import fr.paris.lutece.plugins.document.business.IndexerAction;
39 import fr.paris.lutece.plugins.document.business.IndexerActionFilter;
40 import fr.paris.lutece.plugins.document.business.IndexerActionHome;
41 import fr.paris.lutece.plugins.document.business.spaces.DocumentSpace;
42 import fr.paris.lutece.plugins.document.service.spaces.DocumentSpacesService;
43 import fr.paris.lutece.portal.business.user.AdminUser;
44 import fr.paris.lutece.portal.service.search.IndexationService;
45 import fr.paris.lutece.portal.service.spring.SpringContextService;
46 import fr.paris.lutece.portal.service.util.AppException;
47 import fr.paris.lutece.portal.service.util.AppLogService;
48 import fr.paris.lutece.portal.service.util.AppPathService;
49 import fr.paris.lutece.portal.service.util.AppPropertiesService;
50
51 import org.apache.commons.lang3.StringUtils;
52
53 import org.apache.lucene.analysis.Analyzer;
54 import org.apache.lucene.analysis.core.KeywordAnalyzer;
55 import org.apache.lucene.document.Document;
56 import org.apache.lucene.index.DirectoryReader;
57 import org.apache.lucene.index.IndexReader;
58 import org.apache.lucene.index.IndexWriter;
59 import org.apache.lucene.index.IndexWriterConfig;
60 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
61 import org.apache.lucene.index.Term;
62 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
63 import org.apache.lucene.queryparser.classic.QueryParser;
64 import org.apache.lucene.search.BooleanClause;
65 import org.apache.lucene.search.BooleanQuery;
66 import org.apache.lucene.search.IndexSearcher;
67 import org.apache.lucene.search.Query;
68 import org.apache.lucene.search.ScoreDoc;
69 import org.apache.lucene.search.TermQuery;
70 import org.apache.lucene.search.TopDocs;
71 import org.apache.lucene.store.Directory;
72 import org.apache.lucene.store.NIOFSDirectory;
73 import org.apache.lucene.util.Version;
74
75 import java.io.File;
76 import java.io.IOException;
77 import java.nio.file.Paths;
78
79 import java.text.DateFormat;
80 import java.text.ParseException;
81 import java.text.SimpleDateFormat;
82
83 import java.util.ArrayList;
84 import java.util.Collection;
85 import java.util.Date;
86 import java.util.Iterator;
87 import java.util.List;
88 import java.util.Locale;
89
90
91
92
93
94 public class DocSearchService
95 {
96
97 public static final String PATH_INDEX = "document.docsearch.lucene.indexPath";
98 public static final String PARAM_FORCING = "forcing";
99 public static final String PATTERN_DATE = "dd/MM/yy";
100 private static final String PROPERTY_ANALYSER_CLASS_NAME = "document.docsearch.lucene.analyser.className";
101 private static final int MAX_RESPONSES = 1000000;
102 private static String _strIndex;
103 private static Analyzer _analyzer;
104 private static IndexSearcher _searcher;
105 private static DocSearchService _singleton;
106 private static IDocSearchIndexer _indexer;
107
108
109 private DocSearchService( )
110 {
111
112 _strIndex = AppPathService.getPath( PATH_INDEX );
113
114 if ( ( _strIndex == null ) || ( _strIndex.equals( StringUtils.EMPTY ) ) )
115 {
116 throw new AppException( "Lucene index path not found in document.properties", null );
117 }
118
119 String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
120
121 if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( StringUtils.EMPTY ) ) )
122 {
123 throw new AppException( "Analyser class name not found in lucene.properties", null );
124 }
125
126 _indexer = SpringContextService.getBean( "document.docSearchIndexer" );
127
128 try
129 {
130 _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
131 }
132 catch ( Exception e )
133 {
134 throw new AppException( "Failed to load Lucene Analyzer class", e );
135 }
136 }
137
138
139
140
141
142 public static DocSearchService getInstance( )
143 {
144 if ( _singleton == null )
145 {
146 _singleton = new DocSearchService( );
147 }
148
149 return _singleton;
150 }
151
152
153
154
155
156
157 public String processIndexing( boolean bCreate )
158 {
159 StringBuilder sbLogs = new StringBuilder( );
160
161 IndexWriter writer = null;
162 boolean bCreateIndex = bCreate;
163
164 try
165 {
166 sbLogs.append( "\r\nIndexing all contents ...\r\n" );
167
168 Directory dir = NIOFSDirectory.open( Paths.get( _strIndex ) );
169
170 if ( !DirectoryReader.indexExists( dir ) )
171 {
172 bCreateIndex = true;
173 }
174
175 Date start = new Date( );
176 IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
177
178 if ( bCreateIndex )
179 {
180 conf.setOpenMode( OpenMode.CREATE );
181 }
182 else
183 {
184 conf.setOpenMode( OpenMode.APPEND );
185 }
186
187 writer = new IndexWriter( dir, conf );
188
189 if ( !bCreateIndex )
190 {
191
192
193
194 for ( IndexerAction action : getAllIndexerActionByTask( IndexerAction.TASK_CREATE ) )
195 {
196 ArrayList<Integer> luceneDocumentId = new ArrayList<Integer>( );
197 try
198 {
199 luceneDocumentId.add( action.getIdDocument( ) );
200
201 List<org.apache.lucene.document.Document> luceneDocument = _indexer.getDocuments( luceneDocumentId );
202
203 if ( ( luceneDocument != null ) && ( luceneDocument.size( ) > 0 ) )
204 {
205 Iterator<org.apache.lucene.document.Document> it = luceneDocument.iterator( );
206
207 while ( it.hasNext( ) )
208 {
209 org.apache.lucene.document.Document doc = it.next( );
210 writer.addDocument( doc );
211 sbLogs.append( "Adding " );
212 sbLogs.append( doc.get( DocSearchItem.FIELD_TYPE ) );
213 sbLogs.append( " #" );
214 sbLogs.append( doc.get( DocSearchItem.FIELD_UID ) );
215 sbLogs.append( " - " );
216 sbLogs.append( doc.get( DocSearchItem.FIELD_TITLE ) );
217 sbLogs.append( "\r\n" );
218 }
219 }
220 }
221 catch ( Exception e )
222 {
223 sbLogs.append( "Indexing DocId " + luceneDocumentId + " Error durign document indexation parsing.\r\n" );
224 sbLogs.append( "Caught a " );
225 sbLogs.append( e.getClass( ) );
226 sbLogs.append( "\r\n with message: " );
227 sbLogs.append( e.getMessage( ) );
228 AppLogService.error( "Indexing error : ", e );
229 }
230
231 removeIndexerAction( action.getIdAction( ) );
232 }
233
234
235 for ( IndexerAction action : getAllIndexerActionByTask( IndexerAction.TASK_MODIFY ) )
236 {
237 ArrayList<Integer> luceneDocumentId = new ArrayList<Integer>( );
238 try
239 {
240 luceneDocumentId.add( action.getIdDocument( ) );
241
242 List<org.apache.lucene.document.Document> luceneDocument = _indexer.getDocuments( luceneDocumentId );
243
244 if ( ( luceneDocument != null ) && ( luceneDocument.size( ) > 0 ) )
245 {
246 Iterator<org.apache.lucene.document.Document> it = luceneDocument.iterator( );
247
248 while ( it.hasNext( ) )
249 {
250 org.apache.lucene.document.Document doc = it.next( );
251 writer.updateDocument( new Term( DocSearchItem.FIELD_UID,
252 Integer.toString( action.getIdDocument( ) ) ), doc );
253 sbLogs.append( "Updating " );
254 sbLogs.append( doc.get( DocSearchItem.FIELD_TYPE ) );
255 sbLogs.append( " #" );
256 sbLogs.append( doc.get( DocSearchItem.FIELD_UID ) );
257 sbLogs.append( " - " );
258 sbLogs.append( doc.get( DocSearchItem.FIELD_TITLE ) );
259 sbLogs.append( "\r\n" );
260 }
261 }
262 }
263 catch ( Exception e )
264 {
265 sbLogs.append( "Indexing DocId " + luceneDocumentId + " Error durign document indexation parsing.\r\n" );
266 sbLogs.append( "Caught a " );
267 sbLogs.append( e.getClass( ) );
268 sbLogs.append( "\r\n with message: " );
269 sbLogs.append( e.getMessage( ) );
270 AppLogService.error( "Indexing error : ", e );
271 }
272
273 removeIndexerAction( action.getIdAction( ) );
274 }
275
276
277 for ( IndexerAction action : getAllIndexerActionByTask( IndexerAction.TASK_DELETE ) )
278 {
279 writer.deleteDocuments( new Term( DocSearchItem.FIELD_UID,
280 Integer.toString( action.getIdDocument( ) ) ) );
281 sbLogs.append( "Deleting " );
282 sbLogs.append( " #" );
283 sbLogs.append( action.getIdDocument( ) );
284 sbLogs.append( "\r\n" );
285
286 removeIndexerAction( action.getIdAction( ) );
287 }
288 }
289 else
290 {
291
292 removeAllIndexerAction( );
293
294 Collection<Integer> listIdDocuments = DocumentHome.findAllPrimaryKeys( );
295
296 for ( Integer nIdDocument : listIdDocuments )
297 {
298
299 ArrayList<Integer> luceneDocumentId = new ArrayList<Integer>( );
300 try
301 {
302 luceneDocumentId.add( nIdDocument );
303
304 List<Document> listDocuments = _indexer.getDocuments( luceneDocumentId );
305
306 for ( Document doc : listDocuments )
307 {
308 writer.addDocument( doc );
309 sbLogs.append( "Indexing " );
310 sbLogs.append( doc.get( DocSearchItem.FIELD_TYPE ) );
311 sbLogs.append( " #" );
312 sbLogs.append( doc.get( DocSearchItem.FIELD_UID ) );
313 sbLogs.append( " - " );
314 sbLogs.append( doc.get( DocSearchItem.FIELD_TITLE ) );
315 sbLogs.append( "\r\n" );
316 }
317 }
318 catch ( Exception e )
319 {
320 sbLogs.append( "Indexing DocId " + luceneDocumentId + " Error durign document indexation parsing.\r\n" );
321 sbLogs.append( "Caught a " );
322 sbLogs.append( e.getClass( ) );
323 sbLogs.append( "\r\n with message: " );
324 sbLogs.append( e.getMessage( ) );
325 AppLogService.error( "Indexing error : ", e );
326 }
327 }
328 }
329
330 Date end = new Date( );
331 sbLogs.append( "Duration of the treatment : " );
332 sbLogs.append( end.getTime( ) - start.getTime( ) );
333 sbLogs.append( " milliseconds\r\n" );
334 }
335 catch ( Exception e )
336 {
337 sbLogs.append( " caught a " );
338 sbLogs.append( e.getClass( ) );
339 sbLogs.append( "\n with message: " );
340 sbLogs.append( e.getMessage( ) );
341 sbLogs.append( "\r\n" );
342 AppLogService.error( "Indexing error : " + e.getMessage( ), e );
343 }
344 finally
345 {
346 try
347 {
348 if ( writer != null )
349 {
350 writer.close( );
351 }
352 }
353 catch ( IOException e )
354 {
355 AppLogService.error( e.getMessage( ), e );
356 }
357 }
358
359 return sbLogs.toString( );
360 }
361
362
363
364
365
366
367
368
369 public List<DocSearchItem> getSearchResults( String strQuery, int nStartIndex, AdminUser user )
370 {
371 ArrayList<DocSearchItem> listResults = new ArrayList<DocSearchItem>( );
372
373 try( Directory directory = NIOFSDirectory.open( Paths.get( _strIndex ) ) ; IndexReader ir = DirectoryReader.open( directory ) ; )
374 {
375 _searcher = new IndexSearcher( ir );
376
377 QueryParser parser = new QueryParser( DocSearchItem.FIELD_CONTENTS,
378 _analyzer );
379 Query query = parser.parse( ( StringUtils.isNotBlank( strQuery ) ) ? strQuery : "*:*" );
380
381 List<DocumentSpace> listSpaces = DocumentSpacesService.getInstance( ).getUserAllowedSpaces( user );
382 Query[] filters = new Query[listSpaces.size( )];
383 int nIndex = 0;
384
385 for ( DocumentSpace space : listSpaces )
386 {
387 Query querySpace = new TermQuery( new Term( DocSearchItem.FIELD_SPACE, "s" + space.getId( ) ) );
388 filters[nIndex++] = querySpace;
389 }
390
391 BooleanQuery.Builder booleanQueryBuilderFilters = new BooleanQuery.Builder( );
392 for (Query filter: filters) {
393 booleanQueryBuilderFilters.add( filter , BooleanClause.Occur.SHOULD );
394 }
395 Query allFilters = booleanQueryBuilderFilters.build( );
396
397 BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder( );
398 booleanQueryBuilder.add( allFilters, BooleanClause.Occur.FILTER );
399 booleanQueryBuilder.add( query, BooleanClause.Occur.MUST );
400
401
402 TopDocs topDocs = _searcher.search( booleanQueryBuilder.build( ) , MAX_RESPONSES );
403 ScoreDoc[] hits = topDocs.scoreDocs;
404
405 for ( ScoreDoc hit : hits )
406 {
407 int docId = hit.doc;
408 Document document = _searcher.doc( docId );
409 DocSearchItems/document/service/docsearch/DocSearchItem.html#DocSearchItem">DocSearchItem si = new DocSearchItem( document );
410 listResults.add( si );
411 }
412 }
413 catch ( Exception e )
414 {
415 AppLogService.error( e.getMessage( ), e );
416 }
417
418 return listResults;
419 }
420
421
422
423
424
425
426
427
428
429
430 public List<DocSearchItem> getSearchResults( String strQuery, boolean bTitle, boolean bSummary, String date,
431 DocumentType documentType )
432 {
433 ArrayList<DocSearchItem> listResults = new ArrayList<DocSearchItem>( );
434
435 try( Directory directory = NIOFSDirectory.open( Paths.get( _strIndex ) ) ; IndexReader ir = DirectoryReader.open( directory ) ; )
436 {
437 _searcher = new IndexSearcher( ir );
438
439 Collection<String> queries = new ArrayList<String>( );
440 Collection<String> fields = new ArrayList<String>( );
441 Collection<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>( );
442
443 if ( bTitle )
444 {
445 Query queryTitle = new TermQuery( new Term( DocSearchItem.FIELD_TITLE, strQuery ) );
446 queries.add( queryTitle.toString( ) );
447 fields.add( DocSearchItem.FIELD_TITLE );
448 flags.add( BooleanClause.Occur.SHOULD );
449 }
450
451 if ( bSummary )
452 {
453 Query querySummary = new TermQuery( new Term( DocSearchItem.FIELD_SUMMARY, strQuery ) );
454 queries.add( querySummary.toString( ) );
455 fields.add( DocSearchItem.FIELD_SUMMARY );
456 flags.add( BooleanClause.Occur.SHOULD );
457 }
458
459 if ( !( bTitle ) && !( bSummary ) && !( strQuery.equals( StringUtils.EMPTY ) ) )
460 {
461 Query queryContents = new TermQuery( new Term( DocSearchItem.FIELD_CONTENTS, strQuery ) );
462 queries.add( queryContents.toString( ) );
463 fields.add( DocSearchItem.FIELD_CONTENTS );
464 flags.add( BooleanClause.Occur.SHOULD );
465 }
466
467 Query queryMulti = null;
468
469 if ( strQuery.equals( StringUtils.EMPTY ) )
470 {
471 if ( documentType != null )
472 {
473 Query queryType = new TermQuery( new Term( DocSearchItem.FIELD_TYPE,
474 "\"" + documentType.getName( ) + "\"" ) );
475 queries.add( queryType.toString( ) );
476 fields.add( DocSearchItem.FIELD_TYPE );
477 flags.add( BooleanClause.Occur.SHOULD );
478 }
479
480 if ( ( date != null ) && ( !date.equals( StringUtils.EMPTY ) ) )
481 {
482 String formatedDate = formatDate( date );
483
484 Query queryDate = new TermQuery( new Term( DocSearchItem.FIELD_DATE, formatedDate ) );
485 queries.add( queryDate.toString( ) );
486 fields.add( DocSearchItem.FIELD_DATE );
487 flags.add( BooleanClause.Occur.SHOULD );
488 }
489
490 KeywordAnalyzer analyzer = new KeywordAnalyzer( );
491
492 queryMulti = MultiFieldQueryParser.parse(
493 queries.toArray( new String[queries.size( )] ), fields.toArray( new String[fields.size( )] ),
494 flags.toArray( new BooleanClause.Occur[flags.size( )] ), analyzer );
495 }
496 else
497 {
498 queryMulti = MultiFieldQueryParser.parse(
499 queries.toArray( new String[queries.size( )] ), fields.toArray( new String[fields.size( )] ),
500 flags.toArray( new BooleanClause.Occur[flags.size( )] ), IndexationService.getAnalyser( ) );
501 }
502
503 List<Query> filterList = new ArrayList<Query>( );
504
505 if ( documentType != null )
506 {
507 Query queryType = new TermQuery( new Term( DocSearchItem.FIELD_TYPE, documentType.getName( ) ) );
508 filterList.add( queryType );
509 }
510
511 if ( ( date != null ) && ( !date.equals( StringUtils.EMPTY ) ) )
512 {
513 String formatedDate = formatDate( date );
514 Query queryDate = new TermQuery( new Term( DocSearchItem.FIELD_DATE, formatedDate ) );
515 filterList.add( queryDate );
516 }
517
518 TopDocs topDocs = null;
519
520 if ( filterList.size( ) > 0 )
521 {
522 BooleanQuery.Builder booleanQueryBuilderFilters = new BooleanQuery.Builder( );
523 for (Query filter: filterList) {
524 booleanQueryBuilderFilters.add( filter , BooleanClause.Occur.MUST );
525 }
526 Query allFilters = booleanQueryBuilderFilters.build( );
527
528 BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder( );
529 booleanQueryBuilder.add( allFilters , BooleanClause.Occur.FILTER );
530 booleanQueryBuilder.add( queryMulti , BooleanClause.Occur.MUST );
531 topDocs = _searcher.search( booleanQueryBuilder.build( ), MAX_RESPONSES );
532 }
533 else
534 {
535 topDocs = _searcher.search( queryMulti, MAX_RESPONSES );
536 }
537
538 ScoreDoc[] hits = topDocs.scoreDocs;
539
540 for ( ScoreDoc hit : hits )
541 {
542 int docId = hit.doc;
543 Document document = _searcher.doc( docId );
544 listResults.add( new DocSearchItem( document ) );
545 }
546 }
547 catch ( Exception e )
548 {
549 AppLogService.error( e.getMessage( ), e );
550 }
551
552 return listResults;
553 }
554
555
556
557
558
559
560 public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
561 {
562 IndexerActionFilter/business/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
563 filter.setIdTask( nIdTask );
564
565 return IndexerActionHome.getList( filter );
566 }
567
568
569
570
571
572 public void removeIndexerAction( int nIdAction )
573 {
574 IndexerActionHome.remove( nIdAction );
575 }
576
577
578
579
580
581 public static void removeAllIndexerAction( )
582 {
583 IndexerActionHome.removeAll( );
584 }
585
586
587
588
589
590
591 public void addIndexerAction( int nIdDocument, int nIdTask )
592 {
593 IndexerActionbusiness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
594 indexerAction.setIdDocument( nIdDocument );
595 indexerAction.setIdTask( nIdTask );
596 IndexerActionHome.create( indexerAction );
597 }
598
599
600
601
602
603
604 private String formatDate( String date )
605 {
606 DateFormat dateFormat = new SimpleDateFormat( PATTERN_DATE, Locale.FRENCH );
607 dateFormat.setLenient( false );
608
609 Date formatedDate;
610
611 try
612 {
613 formatedDate = dateFormat.parse( date.trim( ) );
614 }
615 catch ( ParseException e )
616 {
617 AppLogService.error( e );
618
619 return null;
620 }
621
622 return dateFormat.format( formatedDate );
623 }
624 }