1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.blog.service.docsearch;
35
36 import java.io.ByteArrayInputStream;
37 import java.io.IOException;
38 import java.util.ArrayList;
39 import java.util.Date;
40 import java.util.List;
41
42 import org.apache.lucene.document.DateTools;
43 import org.apache.lucene.document.Field;
44 import org.apache.lucene.document.FieldType;
45 import org.apache.lucene.document.NumericDocValuesField;
46 import org.apache.lucene.document.StringField;
47 import org.apache.lucene.document.TextField;
48 import org.apache.lucene.document.IntPoint;
49 import org.apache.lucene.index.CorruptIndexException;
50 import org.apache.lucene.index.IndexWriter;
51 import org.apache.lucene.index.Term;
52 import org.apache.tika.exception.TikaException;
53 import org.apache.tika.metadata.Metadata;
54 import org.apache.tika.parser.ParseContext;
55 import org.apache.tika.parser.html.HtmlParser;
56 import org.apache.tika.sax.BodyContentHandler;
57 import org.xml.sax.ContentHandler;
58 import org.xml.sax.SAXException;
59
60 import fr.paris.lutece.plugins.blog.business.Blog;
61 import fr.paris.lutece.plugins.blog.business.BlogHome;
62 import fr.paris.lutece.plugins.blog.business.IndexerAction;
63 import fr.paris.lutece.plugins.blog.business.Tag;
64 import fr.paris.lutece.plugins.blog.service.BlogPlugin;
65 import fr.paris.lutece.plugins.blog.service.BlogService;
66 import fr.paris.lutece.plugins.blog.utils.BlogUtils;
67 import fr.paris.lutece.portal.service.message.SiteMessageException;
68 import fr.paris.lutece.portal.service.plugin.PluginService;
69 import fr.paris.lutece.portal.service.search.SearchItem;
70 import fr.paris.lutece.portal.service.util.AppException;
71 import fr.paris.lutece.portal.service.util.AppPropertiesService;
72 import org.apache.lucene.document.Document;
73
74
75
76
77 public class DefaultBlogIndexer implements IBlogSearchIndexer
78 {
79 private static final String PROPERTY_INDEXER_NAME = "blog.indexer.name";
80 private static final String ENABLE_VALUE_TRUE = "1";
81 private static final String PROPERTY_INDEXER_DESCRIPTION = "blog.indexer.description";
82 private static final String PROPERTY_INDEXER_VERSION = "blog.indexer.version";
83 private static final String PROPERTY_INDEXER_ENABLE = "blog.indexer.enable";
84 private static final String BLANK_SPACE = " ";
85
86
87
88
89 @Override
90 public String getDescription( )
91 {
92 return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
93 }
94
95
96
97
98
99
100
101
102
103
104
105
106
107 private void indexListBlog( IndexWriter indexWriter, List<Integer> listIdBlog ) throws IOException
108 {
109 for ( Integer nBlogId : listIdBlog )
110 {
111 Blog blog = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( nBlogId );
112 if ( blog != null )
113 {
114 Document doc = getDocument( blog );
115 indexWriter.addDocument( doc );
116 }
117 }
118 }
119
120
121
122
123 @Override
124 public void updateDocument( IndexWriter indexWriter, Blog blog ) throws IOException
125 {
126 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( blog.getId( ) ) );
127 Term [ ] terms = {
128 term
129 };
130
131 indexWriter.deleteDocuments( terms );
132 Document doc = getDocument( blog );
133 indexWriter.addDocument( doc );
134 }
135
136
137
138
139 @Override
140 public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuilder sbLogs )
141 throws IOException, InterruptedException, SiteMessageException
142 {
143 List<Integer> listIdBlog = new ArrayList<>( );
144
145 if ( !bCreate )
146 {
147
148
149 for ( fr.paris.lutece.plugins.blog.business.IndexerAction action : BlogSearchService.getInstance( )
150 .getAllIndexerActionByTask( IndexerAction.TASK_DELETE ) )
151 {
152 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_DELETE );
153
154 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
155 Term [ ] terms = {
156 term
157 };
158
159 indexWriter.deleteDocuments( terms );
160 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
161 }
162
163
164 for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY ) )
165 {
166 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_MODIFY );
167
168 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
169 Term [ ] terms = {
170 term
171 };
172
173 indexWriter.deleteDocuments( terms );
174 listIdBlog = new ArrayList<>( );
175 listIdBlog.add( action.getIdBlog( ) );
176 this.indexListBlog( indexWriter, listIdBlog );
177 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
178 }
179
180 listIdBlog = new ArrayList<>( );
181
182
183 for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE ) )
184 {
185 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_CREATE );
186 listIdBlog.add( action.getIdBlog( ) );
187
188 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
189 }
190
191 indexListBlog( indexWriter, listIdBlog );
192 }
193 else
194 {
195 for ( Blog doc : BlogHome.getBlogsList( ) )
196 {
197
198 sbLogs.append( "Indexing Blog" );
199 sbLogs.append( "\r\n" );
200
201 sbLogBlog( sbLogs, doc.getId( ), IndexerAction.TASK_CREATE );
202
203 listIdBlog.add( doc.getId( ) );
204
205 }
206
207 indexListBlog( indexWriter, listIdBlog );
208 }
209
210 indexWriter.commit( );
211 }
212
213
214
215
216
217
218
219
220
221
222 public static org.apache.lucene.document.Document getDocument( Blog blog ) throws IOException
223 {
224
225 org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
226
227 doc.add( new StringField( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( blog.getId( ) ), Field.Store.YES ) );
228
229 doc.add( new StringField( BlogSearchItem.FIELD_USER, blog.getUserCreator( ).toLowerCase( ), Field.Store.YES ) );
230
231 doc.add( new TextField( BlogSearchItem.FIELD_TAGS, getTagToIndex( blog ), Field.Store.YES ) );
232 doc.add( new TextField( BlogSearchItem.FIELD_USERS_EDITED_BLOG, getUsersEditedBlogVersions( blog ), Field.Store.YES ) );
233
234 FieldType ft = new FieldType( StringField.TYPE_STORED );
235 ft.setOmitNorms( false );
236 doc.add( new Field( SearchItem.FIELD_DATE, DateTools.timeToString( blog.getUpdateDate( ).getTime( ), DateTools.Resolution.MINUTE ), ft ) );
237 doc.add( new NumericDocValuesField( BlogSearchItem.FIELD_DATE_UPDATE, blog.getUpdateDate( ).getTime( ) ) );
238
239 Date today = new Date( );
240 boolean isPublished = blog.getBlogPublication( ).stream( )
241 .anyMatch( publication -> today.after( publication.getDateBeginPublishing( ) ) && today.before( publication.getDateEndPublishing( ) ) );
242 doc.add( new TextField( BlogSearchItem.FIELD_UNPUBLISHED, ( isPublished ) ? "false" : "true", Field.Store.YES ) );
243
244 doc.add( new TextField( BlogSearchItem.FIELD_ARCHIVED, blog.isArchived( ) ? "true" : "false", Field.Store.YES ) );
245
246
247
248 String strIdAnnounce = String.valueOf( blog.getId( ) );
249 doc.add( new StringField( SearchItem.FIELD_UID, strIdAnnounce, Field.Store.YES ) );
250
251 String strContentToIndex = getContentToIndex( blog );
252
253 ContentHandler handler = new BodyContentHandler( -1 );
254 Metadata metadata = new Metadata( );
255
256 try
257 {
258 new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
259 }
260 catch( TikaException | SAXException e )
261 {
262 throw new AppException( "Error during blog parsing. blog Id: " + blog.getId( ), e );
263 }
264
265 String strContent = handler.toString( );
266
267
268
269 doc.add( new TextField( SearchItem.FIELD_CONTENTS, strContent, Field.Store.NO ) );
270
271 doc.add( new TextField( SearchItem.FIELD_SUMMARY, blog.getHtmlContent( ), Field.Store.YES ) );
272
273
274 doc.add( new StringField( SearchItem.FIELD_TITLE, blog.getName( ), Field.Store.YES ) );
275
276 doc.add( new StringField( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME, Field.Store.YES ) );
277
278
279 return doc;
280 }
281
282
283
284
285
286
287
288
289 private static String getContentToIndex( Blog blog )
290 {
291 StringBuilder sbContentToIndex = new StringBuilder( );
292
293 sbContentToIndex.append( blog.getName( ) );
294 sbContentToIndex.append( BLANK_SPACE );
295 sbContentToIndex.append( blog.getDescription( ) );
296 sbContentToIndex.append( BLANK_SPACE );
297 sbContentToIndex.append( blog.getHtmlContent( ) );
298 sbContentToIndex.append( BLANK_SPACE );
299 sbContentToIndex.append( blog.getId( ) );
300
301 return sbContentToIndex.toString( );
302 }
303
304
305
306
307
308
309
310
311 private static String getTagToIndex( Blog blog )
312 {
313 StringBuilder sbContentToIndex = new StringBuilder( );
314
315 for ( Tag tg : blog.getTag( ) )
316 {
317 sbContentToIndex.append( BLANK_SPACE );
318 sbContentToIndex.append( tg.getIdTag( ) );
319 }
320
321 return sbContentToIndex.toString( );
322 }
323
324
325
326
327
328
329
330
331 private static String getUsersEditedBlogVersions( Blog blog )
332 {
333 StringBuilder sbContentToIndex = new StringBuilder( );
334 List<String> usersList = BlogHome.getUsersEditedBlogVersions( blog.getId( ) );
335
336 for ( String user : usersList )
337 {
338 sbContentToIndex.append( BLANK_SPACE );
339 sbContentToIndex.append( user );
340 }
341
342 return sbContentToIndex.toString( );
343 }
344
345
346
347
348 @Override
349 public String getName( )
350 {
351 return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
352 }
353
354
355
356
357 @Override
358 public String getVersion( )
359 {
360 return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
361 }
362
363
364
365
366 @Override
367 public boolean isEnable( )
368 {
369 boolean bReturn = false;
370 String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
371
372 if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
373 && PluginService.isPluginEnable( BlogPlugin.PLUGIN_NAME ) )
374 {
375 bReturn = true;
376 }
377
378 return bReturn;
379 }
380
381
382
383
384
385
386
387
388
389
390
391 private void sbLogBlog( StringBuilder sbLogs, int nIdBlog, int nAction )
392 {
393 sbLogs.append( "Indexing Blogs:" );
394
395 switch( nAction )
396 {
397 case IndexerAction.TASK_CREATE:
398 sbLogs.append( "Insert " );
399
400 break;
401
402 case IndexerAction.TASK_MODIFY:
403 sbLogs.append( "Modify " );
404
405 break;
406
407 case IndexerAction.TASK_DELETE:
408 sbLogs.append( "Delete " );
409
410 break;
411
412 default:
413 break;
414 }
415
416 if ( nIdBlog != BlogUtils.CONSTANT_ID_NULL )
417 {
418 sbLogs.append( "id_blog=" );
419 sbLogs.append( nIdBlog );
420 }
421
422 sbLogs.append( "\r\n" );
423 }
424
425 }