1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.blog.service.docsearch;
35
36 import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
37 import fr.paris.lutece.plugins.blog.business.IndexerAction;
38 import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
39 import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
40 import fr.paris.lutece.plugins.blog.service.BlogPlugin;
41 import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
42 import fr.paris.lutece.portal.service.search.SearchItem;
43 import fr.paris.lutece.portal.service.search.SearchResult;
44 import fr.paris.lutece.portal.service.spring.SpringContextService;
45 import fr.paris.lutece.portal.service.util.AppException;
46 import fr.paris.lutece.portal.service.util.AppLogService;
47 import fr.paris.lutece.portal.service.util.AppPathService;
48 import fr.paris.lutece.portal.service.util.AppPropertiesService;
49
50 import org.apache.commons.lang3.StringUtils;
51 import org.apache.lucene.analysis.Analyzer;
52 import org.apache.lucene.index.DirectoryReader;
53 import org.apache.lucene.index.IndexWriter;
54 import org.apache.lucene.index.IndexWriterConfig;
55 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
56 import org.apache.lucene.index.LogDocMergePolicy;
57 import org.apache.lucene.index.LogMergePolicy;
58 import org.apache.lucene.search.IndexSearcher;
59
60 import org.apache.lucene.store.Directory;
61 import org.apache.lucene.store.FSDirectory;
62
63 import java.nio.file.Paths;
64 import java.io.IOException;
65 import java.text.ParseException;
66 import java.util.ArrayList;
67 import java.util.Collection;
68 import java.util.Date;
69 import java.util.List;
70 import org.apache.lucene.document.DateTools;
71 import org.apache.lucene.document.Document;
72 import org.apache.lucene.index.Term;
73 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
74 import org.apache.lucene.search.BooleanClause;
75 import org.apache.lucene.search.Query;
76 import org.apache.lucene.search.ScoreDoc;
77 import org.apache.lucene.search.Sort;
78 import org.apache.lucene.search.SortField;
79 import org.apache.lucene.search.TermQuery;
80 import org.apache.lucene.search.TermRangeQuery;
81 import org.apache.lucene.search.TopDocs;
82 import org.apache.lucene.util.BytesRef;
83
84
85
86
87 public final class BlogSearchService
88 {
89 private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
90 private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
91 private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
92 private volatile String _strIndex;
93 private static final String WILDCARD = "*";
94
95
96 private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;
97
98
99 private static BlogSearchService _singleton;
100
101 private Analyzer _analyzer;
102 private IBlogSearchIndexer _indexer;
103 private int _nWriterMergeFactor;
104
105
106
107
108 private BlogSearchService( )
109 {
110 _strIndex = AppPathService.getPath( PATH_INDEX );
111 if ( _strIndex == null )
112 {
113 throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
114 }
115
116 _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );
117
118 String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
119
120 if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
121 {
122 throw new AppException( "Analyser class name not found in blogs.properties", null );
123 }
124
125 _indexer = SpringContextService.getBean( "blog.blogIndexer" );
126
127 try
128 {
129 _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
130 }
131 catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
132 {
133 throw new AppException( "Failed to load Lucene Analyzer class", e );
134 }
135 }
136
137
138
139
140
141
142 public static BlogSearchService getInstance( )
143 {
144 if ( _singleton == null )
145 {
146 _singleton = new BlogSearchService( );
147 }
148 return _singleton;
149 }
150
151
152
153
154
155
156
157
158
159
160 public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
161 {
162 int nNbItems = 0;
163
164 try
165 {
166 List<SearchResult> listResults = new ArrayList<>( );
167 nNbItems = getSearchResultsByFilter( filter, listResults );
168
169 for ( SearchResult searchResult : listResults )
170 {
171 if ( searchResult.getId( ) != null )
172 {
173 listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
174 }
175 }
176 }
177 catch( NumberFormatException e )
178 {
179 AppLogService.error( e.getMessage( ), e );
180
181 listIdBlog.clear( );
182 }
183
184 return nNbItems;
185 }
186
187
188
189
190
191
192
193
194 public String processIndexing( boolean bCreate )
195 {
196 StringBuilder sbLogs = new StringBuilder( );
197 IndexWriter writer = null;
198 boolean bCreateIndex = bCreate;
199
200 try
201 {
202 sbLogs.append( "\r\nIndexing all contents ...\r\n" );
203
204 Directory dir = FSDirectory.open( Paths.get( _strIndex ) );
205
206
207 if ( !DirectoryReader.indexExists( dir ) )
208 {
209 bCreateIndex = true;
210 }
211
212 IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
213 LogMergePolicy mergePolicy = new LogDocMergePolicy( );
214 mergePolicy.setMergeFactor( _nWriterMergeFactor );
215 conf.setMergePolicy( mergePolicy );
216
217 if ( bCreateIndex )
218 {
219 conf.setOpenMode( OpenMode.CREATE );
220 }
221 else
222 {
223 conf.setOpenMode( OpenMode.APPEND );
224 }
225 writer = new IndexWriter( dir, conf );
226
227 Date start = new Date( );
228
229 sbLogs.append( "\r\n<strong>Indexer : " );
230 sbLogs.append( _indexer.getName( ) );
231 sbLogs.append( " - " );
232 sbLogs.append( _indexer.getDescription( ) );
233 sbLogs.append( "</strong>\r\n" );
234 _indexer.processIndexing( writer, bCreateIndex, sbLogs );
235
236 Date end = new Date( );
237
238 sbLogs.append( "Duration of the treatment : " );
239 sbLogs.append( end.getTime( ) - start.getTime( ) );
240 sbLogs.append( " milliseconds\r\n" );
241
242 }
243 catch( Exception e )
244 {
245 sbLogs.append( " caught a " );
246 sbLogs.append( e.getClass( ) );
247 sbLogs.append( "\n with message: " );
248 sbLogs.append( e.getMessage( ) );
249 sbLogs.append( "\r\n" );
250 AppLogService.error( "Indexing error : " + e.getMessage( ), e );
251 }
252 finally
253 {
254 try
255 {
256 if ( writer != null )
257 {
258 writer.close( );
259 }
260 }
261 catch( IOException e )
262 {
263 AppLogService.error( e.getMessage( ), e );
264 }
265 }
266
267 return sbLogs.toString( );
268 }
269
270
271
272
273
274
275
276
277
278
279 private int getSearchResultsByFilter( BlogSearchFilter filter, List<SearchResult> listSearchResult )
280 {
281 ArrayList<SearchItem> listResults = new ArrayList<>( );
282
283 int nNbResults = 0;
284 try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
285 {
286 IndexSearcher searcher = new IndexSearcher( reader );
287
288 Query queryMulti = prepareQueryForFilter( filter );
289
290 Sort sorter = new Sort( );
291 String field = BlogSearchItem.FIELD_DATE_UPDATE;
292 SortField.Type type = SortField.Type.LONG;
293 boolean descending = true;
294
295 SortField sortField = new SortField( field, type, descending );
296
297 sorter.setSort( sortField );
298
299 TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
300 ScoreDoc [ ] hits = topDocs.scoreDocs;
301 nNbResults = hits.length;
302
303 for ( int i = 0; i < nNbResults; i++ )
304 {
305 int docId = hits [i].doc;
306 Document document = searcher.doc( docId );
307 SearchItem si = new SearchItem( document );
308 listResults.add( si );
309 }
310 searcher.getIndexReader( ).close( );
311 }
312 catch( Exception e )
313 {
314 AppLogService.error( e.getMessage( ), e );
315 }
316
317 convertList( listResults, listSearchResult );
318 return nNbResults;
319 }
320
321 private Query prepareQueryForFilter( BlogSearchFilter filter ) throws org.apache.lucene.queryparser.classic.ParseException
322 {
323 boolean bDateAfter = false;
324 boolean bDateBefore = false;
325 Collection<String> queries = new ArrayList<>( );
326 Collection<String> sectors = new ArrayList<>( );
327 Collection<BooleanClause.Occur> flags = new ArrayList<>( );
328
329 if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
330 {
331 Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
332 Query termQuery = new TermQuery( term );
333 queries.add( termQuery.toString( ) );
334 sectors.add( SearchItem.FIELD_CONTENTS );
335 flags.add( BooleanClause.Occur.MUST );
336
337 }
338 if ( filter.getTag( ) != null )
339 {
340 for ( String tag : filter.getTag( ) )
341 {
342 Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
343 Query termQuery = new TermQuery( term );
344 queries.add( termQuery.toString( ) );
345 sectors.add( BlogSearchItem.FIELD_TAGS );
346 flags.add( BooleanClause.Occur.MUST );
347 }
348
349 }
350 if ( filter.getUser( ) != null )
351 {
352 Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
353 Query termQuery = new TermQuery( term );
354 queries.add( termQuery.toString( ) );
355 sectors.add( BlogSearchItem.FIELD_USER );
356 flags.add( BooleanClause.Occur.MUST );
357
358 }
359 if ( filter.getUserEditedBlogVersion( ) != null )
360 {
361 Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
362 Query termQuery = new TermQuery( term );
363 queries.add( termQuery.toString( ) );
364 sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
365 flags.add( BooleanClause.Occur.MUST );
366
367 }
368
369 if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
370 {
371 BytesRef strAfter = null;
372 BytesRef strBefore = null;
373
374 if ( filter.getUpdateDateAfter( ) != null )
375 {
376 strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
377 bDateAfter = true;
378 }
379
380 if ( filter.getUpdateDateBefor( ) != null )
381 {
382 Date dateBefore = filter.getUpdateDateBefor( );
383 strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
384 bDateBefore = true;
385 }
386
387 Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
388 queries.add( queryDate.toString( ) );
389 sectors.add( SearchItem.FIELD_DATE );
390 flags.add( BooleanClause.Occur.MUST );
391 }
392
393 if ( filter.getIsUnpulished( ) > 0 )
394 {
395 Term termIsUnpublished = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) == 1 ) );
396 Query termQueryIsUnpublished = new TermQuery( termIsUnpublished );
397 queries.add( termQueryIsUnpublished.toString( ) );
398 sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
399 flags.add( BooleanClause.Occur.MUST );
400 }
401
402 Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
403 Query termQuery = new TermQuery( term );
404 queries.add( termQuery.toString( ) );
405 sectors.add( SearchItem.FIELD_TYPE );
406 flags.add( BooleanClause.Occur.MUST );
407
408 return MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
409 flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
410 }
411
412
413
414
415
416
417
418
419
420 public void addIndexerAction( int nIdBlog, int nIdTask )
421 {
422 IndexerActionness/IndexerAction.html#IndexerAction">IndexerAction indexerAction = new IndexerAction( );
423 indexerAction.setIdBlog( nIdBlog );
424 indexerAction.setIdTask( nIdTask );
425 IndexerActionHome.create( indexerAction );
426 }
427
428
429
430
431
432
433
434 public void removeIndexerAction( int nIdAction )
435 {
436 IndexerActionHome.remove( nIdAction );
437 }
438
439
440
441
442
443
444
445
446 public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
447 {
448 IndexerActionFilteriness/IndexerActionFilter.html#IndexerActionFilter">IndexerActionFilter filter = new IndexerActionFilter( );
449 filter.setIdTask( nIdTask );
450
451 return IndexerActionHome.getList( filter );
452 }
453
454
455
456
457
458
459
460
461
462 private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
463 {
464 for ( SearchItem item : listSource )
465 {
466 SearchResult result = new SearchResult( );
467 result.setId( item.getId( ) );
468
469 try
470 {
471 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
472 }
473 catch( ParseException e )
474 {
475 AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
476 }
477
478 result.setUrl( item.getUrl( ) );
479 result.setTitle( item.getTitle( ) );
480 result.setSummary( item.getSummary( ) );
481 result.setType( item.getType( ) );
482 listSearchResult.add( result );
483 }
484 }
485
486 }