1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.blog.service.docsearch;
35
36 import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
37 import fr.paris.lutece.plugins.blog.business.IndexerAction;
38 import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
39 import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
40 import fr.paris.lutece.plugins.blog.service.BlogPlugin;
41 import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
42 import fr.paris.lutece.portal.service.search.SearchItem;
43 import fr.paris.lutece.portal.service.search.SearchResult;
44 import fr.paris.lutece.portal.service.spring.SpringContextService;
45 import fr.paris.lutece.portal.service.util.AppException;
46 import fr.paris.lutece.portal.service.util.AppLogService;
47 import fr.paris.lutece.portal.service.util.AppPathService;
48 import fr.paris.lutece.portal.service.util.AppPropertiesService;
49 import fr.paris.lutece.plugins.blog.business.Blog;
50
51 import org.apache.commons.lang3.StringUtils;
52 import org.apache.lucene.analysis.Analyzer;
53 import org.apache.lucene.index.DirectoryReader;
54 import org.apache.lucene.index.IndexWriter;
55 import org.apache.lucene.index.IndexWriterConfig;
56 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
57 import org.apache.lucene.index.LogDocMergePolicy;
58 import org.apache.lucene.index.LogMergePolicy;
59 import org.apache.lucene.search.IndexSearcher;
60
61 import org.apache.lucene.store.Directory;
62 import org.apache.lucene.store.FSDirectory;
63
64 import java.nio.file.Paths;
65 import java.io.IOException;
66 import java.text.ParseException;
67 import java.util.ArrayList;
68 import java.util.Collection;
69 import java.util.Date;
70 import java.util.List;
71 import org.apache.lucene.document.DateTools;
72 import org.apache.lucene.document.Document;
73 import org.apache.lucene.index.Term;
74 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
75 import org.apache.lucene.search.BooleanClause;
76 import org.apache.lucene.search.Query;
77 import org.apache.lucene.search.ScoreDoc;
78 import org.apache.lucene.search.Sort;
79 import org.apache.lucene.search.SortField;
80 import org.apache.lucene.search.TermQuery;
81 import org.apache.lucene.search.TermRangeQuery;
82 import org.apache.lucene.search.TopDocs;
83 import org.apache.lucene.util.BytesRef;
84
85
86
87
88 public final class BlogSearchService
89 {
90 private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
91 private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
92 private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
93 private volatile String _strIndex;
94 private static final String WILDCARD = "*";
95
96
97 private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
98
99
100 private static BlogSearchService _singleton;
101
102 private Analyzer _analyzer;
103 private IBlogSearchIndexer _indexer;
104 private int _nWriterMergeFactor;
105
106
107
108
109 private BlogSearchService( )
110 {
111 _strIndex = AppPathService.getPath( PATH_INDEX );
112 if ( _strIndex == null )
113 {
114 throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
115 }
116
117 _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );
118
119 String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
120
121 if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
122 {
123 throw new AppException( "Analyser class name not found in blogs.properties", null );
124 }
125
126 _indexer = SpringContextService.getBean( "blog.blogIndexer" );
127
128 try
129 {
130 _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
131 }
132 catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
133 {
134 throw new AppException( "Failed to load Lucene Analyzer class", e );
135 }
136 }
137
138
139
140
141
142
143 public static BlogSearchService getInstance( )
144 {
145 if ( _singleton == null )
146 {
147 _singleton = new BlogSearchService( );
148 }
149 return _singleton;
150 }
151
152
153
154
155
156
157
158
159
160
161 public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
162 {
163 int nNbItems = 0;
164
165 try
166 {
167 List<SearchResult> listResults = new ArrayList<>( );
168 nNbItems = getSearchResultsByFilter( filter, listResults );
169
170 for ( SearchResult searchResult : listResults )
171 {
172 if ( searchResult.getId( ) != null )
173 {
174 listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
175 }
176 }
177 }
178 catch( NumberFormatException e )
179 {
180 AppLogService.error( e.getMessage( ), e );
181
182 listIdBlog.clear( );
183 }
184
185 return nNbItems;
186 }
187
188
189
190
191
192
193
194
195 public String processIndexing( boolean bCreate )
196 {
197 StringBuilder sbLogs = new StringBuilder( );
198 IndexWriter writer = null;
199 boolean bCreateIndex = bCreate;
200
201 try
202 {
203 sbLogs.append( "\r\nIndexing all contents ...\r\n" );
204
205 Directory dir = FSDirectory.open( Paths.get( _strIndex ) );
206
207
208 if ( !DirectoryReader.indexExists( dir ) )
209 {
210 bCreateIndex = true;
211 }
212
213 IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
214 LogMergePolicy mergePolicy = new LogDocMergePolicy( );
215 mergePolicy.setMergeFactor( _nWriterMergeFactor );
216 conf.setMergePolicy( mergePolicy );
217
218 if ( bCreateIndex )
219 {
220 conf.setOpenMode( OpenMode.CREATE );
221 }
222 else
223 {
224 conf.setOpenMode( OpenMode.APPEND );
225 }
226 writer = new IndexWriter( dir, conf );
227
228 Date start = new Date( );
229
230 sbLogs.append( "\r\n<strong>Indexer : " );
231 sbLogs.append( _indexer.getName( ) );
232 sbLogs.append( " - " );
233 sbLogs.append( _indexer.getDescription( ) );
234 sbLogs.append( "</strong>\r\n" );
235 _indexer.processIndexing( writer, bCreateIndex, sbLogs );
236
237 Date end = new Date( );
238
239 sbLogs.append( "Duration of the treatment : " );
240 sbLogs.append( end.getTime( ) - start.getTime( ) );
241 sbLogs.append( " milliseconds\r\n" );
242
243 }
244 catch( Exception e )
245 {
246 sbLogs.append( " caught a " );
247 sbLogs.append( e.getClass( ) );
248 sbLogs.append( "\n with message: " );
249 sbLogs.append( e.getMessage( ) );
250 sbLogs.append( "\r\n" );
251 AppLogService.error( "Indexing error : " + e.getMessage( ), e );
252 }
253 finally
254 {
255 try
256 {
257 if ( writer != null )
258 {
259 writer.close( );
260 }
261 }
262 catch( IOException e )
263 {
264 AppLogService.error( e.getMessage( ), e );
265 }
266 }
267
268 return sbLogs.toString( );
269 }
270 public void updateDocument ( Blog blog) {
271 try ( IndexWriter writer = new IndexWriter( FSDirectory.open( Paths.get( _strIndex ) ), new IndexWriterConfig( _analyzer ) ) )
272 {
273 IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
274 LogMergePolicy mergePolicy = new LogDocMergePolicy( );
275 mergePolicy.setMergeFactor( _nWriterMergeFactor );
276 conf.setMergePolicy( mergePolicy );
277 conf.setOpenMode( OpenMode.CREATE_OR_APPEND );
278 _indexer.updateDocument( writer, blog );
279 }
280 catch( Exception e )
281 {
282 AppLogService.error( "Indexing error : " + e.getMessage( ), e );
283 }
284
285 }
286
287
288
289
290
291
292
293
294
295
296 private int getSearchResultsByFilter( BlogSearchFilter filter, List<SearchResult> listSearchResult )
297 {
298 ArrayList<SearchItem> listResults = new ArrayList<>( );
299
300 int nNbResults = 0;
301 try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
302 {
303 IndexSearcher searcher = new IndexSearcher( reader );
304
305 Query queryMulti = prepareQueryForFilter( filter );
306
307 Sort sorter = new Sort( );
308 String field = BlogSearchItem.FIELD_DATE_UPDATE;
309 SortField.Type type = SortField.Type.LONG;
310 boolean descending = true;
311
312 SortField sortField = new SortField( field, type, descending );
313
314 sorter.setSort( sortField );
315
316 TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
317 ScoreDoc [ ] hits = topDocs.scoreDocs;
318 nNbResults = hits.length;
319
320 for ( int i = 0; i < nNbResults; i++ )
321 {
322 int docId = hits [i].doc;
323 Document document = searcher.doc( docId );
324 SearchItem si = new SearchItem( document );
325 listResults.add( si );
326 }
327 searcher.getIndexReader( ).close( );
328 }
329 catch( Exception e )
330 {
331 AppLogService.error( e.getMessage( ), e );
332 }
333
334 convertList( listResults, listSearchResult );
335 return nNbResults;
336 }
337
338 private Query prepareQueryForFilter( BlogSearchFilter filter ) throws org.apache.lucene.queryparser.classic.ParseException
339 {
340 boolean bDateAfter = false;
341 boolean bDateBefore = false;
342 Collection<String> queries = new ArrayList<>( );
343 Collection<String> sectors = new ArrayList<>( );
344 Collection<BooleanClause.Occur> flags = new ArrayList<>( );
345
346 if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
347 {
348 Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
349 Query termQuery = new TermQuery( term );
350 queries.add( termQuery.toString( ) );
351 sectors.add( SearchItem.FIELD_CONTENTS );
352 flags.add( BooleanClause.Occur.MUST );
353
354 }
355 if ( filter.getTag( ) != null )
356 {
357 for ( String tag : filter.getTag( ) )
358 {
359 Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
360 Query termQuery = new TermQuery( term );
361 queries.add( termQuery.toString( ) );
362 sectors.add( BlogSearchItem.FIELD_TAGS );
363 flags.add( BooleanClause.Occur.MUST );
364 }
365
366 }
367 if ( filter.getUser( ) != null )
368 {
369 Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
370 Query termQuery = new TermQuery( term );
371 queries.add( termQuery.toString( ) );
372 sectors.add( BlogSearchItem.FIELD_USER );
373 flags.add( BooleanClause.Occur.MUST );
374
375 }
376 if ( filter.getUserEditedBlogVersion( ) != null )
377 {
378 Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
379 Query termQuery = new TermQuery( term );
380 queries.add( termQuery.toString( ) );
381 sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
382 flags.add( BooleanClause.Occur.MUST );
383
384 }
385
386 if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
387 {
388 BytesRef strAfter = null;
389 BytesRef strBefore = null;
390
391 if ( filter.getUpdateDateAfter( ) != null )
392 {
393 strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
394 bDateAfter = true;
395 }
396
397 if ( filter.getUpdateDateBefor( ) != null )
398 {
399 Date dateBefore = filter.getUpdateDateBefor( );
400 strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
401 bDateBefore = true;
402 }
403
404 Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
405 queries.add( queryDate.toString( ) );
406 sectors.add( SearchItem.FIELD_DATE );
407 flags.add( BooleanClause.Occur.MUST );
408 }
409
410 Term termIsArchived = new Term( BlogSearchItem.FIELD_ARCHIVED, filter.getIsArchived() ? "true" : "false" );
411 Query termQueryIsArchived = new TermQuery( termIsArchived );
412 queries.add( termQueryIsArchived.toString( ) );
413 sectors.add( BlogSearchItem.FIELD_ARCHIVED );
414 flags.add( BooleanClause.Occur.MUST );
415
416 if ( !filter.getIsArchived( ))
417 {
418 if ( filter.getIsUnpulished( ) > 0 )
419 {
420 Term termIsUnpublished = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) == 1 ) );
421 Query termQueryIsUnpublished = new TermQuery( termIsUnpublished );
422 queries.add( termQueryIsUnpublished.toString( ) );
423 sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
424 flags.add( BooleanClause.Occur.MUST );
425 }
426 }
427
428 Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
429 Query termQuery = new TermQuery( term );
430 queries.add( termQuery.toString( ) );
431 sectors.add( SearchItem.FIELD_TYPE );
432 flags.add( BooleanClause.Occur.MUST );
433
434 return MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
435 flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
436 }
437
438
439
440
441
442
443
444
445
446 public void addIndexerAction( int nIdBlog, int nIdTask )
447 {
448 IndexerActionness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
449 indexerAction.setIdBlog( nIdBlog );
450 indexerAction.setIdTask( nIdTask );
451 IndexerActionHome.create( indexerAction );
452 }
453
454
455
456
457
458
459
460 public void removeIndexerAction( int nIdAction )
461 {
462 IndexerActionHome.remove( nIdAction );
463 }
464
465
466
467
468
469
470
471
472 public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
473 {
474 IndexerActionFilteriness/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
475 filter.setIdTask( nIdTask );
476
477 return IndexerActionHome.getList( filter );
478 }
479
480
481
482
483
484
485
486
487
488 private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
489 {
490 for ( SearchItem item : listSource )
491 {
492 SearchResult result = new SearchResult( );
493 result.setId( item.getId( ) );
494
495 try
496 {
497 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
498 }
499 catch( ParseException e )
500 {
501 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
502 }
503
504 result.setUrl( item.getUrl( ) );
505 result.setTitle( item.getTitle( ) );
506 result.setSummary( item.getSummary( ) );
507 result.setType( item.getType( ) );
508 listSearchResult.add( result );
509 }
510 }
511
512 }