1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.blog.service.docsearch;
35
36 import java.io.ByteArrayInputStream;
37 import java.io.IOException;
38 import java.util.ArrayList;
39 import java.util.Date;
40 import java.util.List;
41
42 import org.apache.lucene.document.DateTools;
43 import org.apache.lucene.document.Field;
44 import org.apache.lucene.document.FieldType;
45 import org.apache.lucene.document.NumericDocValuesField;
46 import org.apache.lucene.document.StringField;
47 import org.apache.lucene.document.TextField;
48 import org.apache.lucene.index.CorruptIndexException;
49 import org.apache.lucene.index.IndexWriter;
50 import org.apache.lucene.index.Term;
51 import org.apache.tika.exception.TikaException;
52 import org.apache.tika.metadata.Metadata;
53 import org.apache.tika.parser.ParseContext;
54 import org.apache.tika.parser.html.HtmlParser;
55 import org.apache.tika.sax.BodyContentHandler;
56 import org.xml.sax.ContentHandler;
57 import org.xml.sax.SAXException;
58
59 import fr.paris.lutece.plugins.blog.business.Blog;
60 import fr.paris.lutece.plugins.blog.business.BlogHome;
61 import fr.paris.lutece.plugins.blog.business.IndexerAction;
62 import fr.paris.lutece.plugins.blog.business.Tag;
63 import fr.paris.lutece.plugins.blog.service.BlogPlugin;
64 import fr.paris.lutece.plugins.blog.service.BlogService;
65 import fr.paris.lutece.plugins.blog.utils.BlogUtils;
66 import fr.paris.lutece.portal.service.message.SiteMessageException;
67 import fr.paris.lutece.portal.service.plugin.PluginService;
68 import fr.paris.lutece.portal.service.search.SearchItem;
69 import fr.paris.lutece.portal.service.util.AppException;
70 import fr.paris.lutece.portal.service.util.AppPropertiesService;
71 import org.apache.lucene.document.Document;
72
73
74
75
76 public class DefaultBlogIndexer implements IBlogSearchIndexer
77 {
78 private static final String PROPERTY_INDEXER_NAME = "blog.indexer.name";
79 private static final String ENABLE_VALUE_TRUE = "1";
80 private static final String PROPERTY_INDEXER_DESCRIPTION = "blog.indexer.description";
81 private static final String PROPERTY_INDEXER_VERSION = "blog.indexer.version";
82 private static final String PROPERTY_INDEXER_ENABLE = "blog.indexer.enable";
83 private static final String BLANK_SPACE = " ";
84
85
86
87
88 @Override
89 public String getDescription( )
90 {
91 return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
92 }
93
94
95
96
97
98
99
100
101
102
103
104
105
106 private void indexListBlog( IndexWriter indexWriter, List<Integer> listIdBlog ) throws IOException
107 {
108 for ( Integer nBlogId : listIdBlog )
109 {
110 Blog blog = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( nBlogId );
111 if ( blog != null )
112 {
113 Document doc = getDocument( blog );
114 indexWriter.addDocument( doc );
115 }
116 }
117 }
118
119
120
121
122 @Override
123 public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuilder sbLogs )
124 throws IOException, InterruptedException, SiteMessageException
125 {
126 List<Integer> listIdBlog = new ArrayList<>( );
127
128 if ( !bCreate )
129 {
130
131
132 for ( fr.paris.lutece.plugins.blog.business.IndexerAction action : BlogSearchService.getInstance( )
133 .getAllIndexerActionByTask( IndexerAction.TASK_DELETE ) )
134 {
135 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_DELETE );
136
137 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
138 Term [ ] terms = {
139 term
140 };
141
142 indexWriter.deleteDocuments( terms );
143 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
144 }
145
146
147 for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY ) )
148 {
149 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_MODIFY );
150
151 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
152 Term [ ] terms = {
153 term
154 };
155
156 indexWriter.deleteDocuments( terms );
157 listIdBlog = new ArrayList<>( );
158 listIdBlog.add( action.getIdBlog( ) );
159 this.indexListBlog( indexWriter, listIdBlog );
160 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
161 }
162
163 listIdBlog = new ArrayList<>( );
164
165
166 for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE ) )
167 {
168 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_CREATE );
169 listIdBlog.add( action.getIdBlog( ) );
170
171 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
172 }
173
174 indexListBlog( indexWriter, listIdBlog );
175 }
176 else
177 {
178 for ( Blog doc : BlogHome.getBlogsList( ) )
179 {
180
181 sbLogs.append( "Indexing Blog" );
182 sbLogs.append( "\r\n" );
183
184 sbLogBlog( sbLogs, doc.getId( ), IndexerAction.TASK_CREATE );
185
186 listIdBlog.add( doc.getId( ) );
187
188 }
189
190 indexListBlog( indexWriter, listIdBlog );
191 }
192
193 indexWriter.commit( );
194 }
195
196
197
198
199
200
201
202
203
204
205 public static org.apache.lucene.document.Document getDocument( Blog blog ) throws IOException
206 {
207
208 org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
209
210 doc.add( new StringField( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( blog.getId( ) ), Field.Store.YES ) );
211
212 doc.add( new StringField( BlogSearchItem.FIELD_USER, blog.getUserCreator( ).toLowerCase( ), Field.Store.YES ) );
213
214 doc.add( new TextField( BlogSearchItem.FIELD_TAGS, getTagToIndex( blog ), Field.Store.YES ) );
215 doc.add( new TextField( BlogSearchItem.FIELD_USERS_EDITED_BLOG, getUsersEditedBlogVersions( blog ), Field.Store.YES ) );
216
217 FieldType ft = new FieldType( StringField.TYPE_STORED );
218 ft.setOmitNorms( false );
219 doc.add( new Field( SearchItem.FIELD_DATE, DateTools.timeToString( blog.getUpdateDate( ).getTime( ), DateTools.Resolution.MINUTE ), ft ) );
220 doc.add( new NumericDocValuesField( BlogSearchItem.FIELD_DATE_UPDATE, blog.getUpdateDate( ).getTime( ) ) );
221
222 Date today = new Date( );
223 boolean isPublished = blog.getBlogPublication( ).stream( )
224 .anyMatch( publication -> today.after( publication.getDateBeginPublishing( ) ) && today.before( publication.getDateEndPublishing( ) ) );
225 doc.add( new TextField( BlogSearchItem.FIELD_UNPUBLISHED, ( isPublished ) ? "false" : "true", Field.Store.YES ) );
226
227
228
229
230 String strIdAnnounce = String.valueOf( blog.getId( ) );
231 doc.add( new StringField( SearchItem.FIELD_UID, strIdAnnounce, Field.Store.YES ) );
232
233 String strContentToIndex = getContentToIndex( blog );
234
235 ContentHandler handler = new BodyContentHandler( -1 );
236 Metadata metadata = new Metadata( );
237
238 try
239 {
240 new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
241 }
242 catch( TikaException | SAXException e )
243 {
244 throw new AppException( "Error during blog parsing. blog Id: " + blog.getId( ), e );
245 }
246
247 String strContent = handler.toString( );
248
249
250
251 doc.add( new TextField( SearchItem.FIELD_CONTENTS, strContent, Field.Store.NO ) );
252
253 doc.add( new TextField( SearchItem.FIELD_SUMMARY, blog.getHtmlContent( ), Field.Store.YES ) );
254
255
256 doc.add( new StringField( SearchItem.FIELD_TITLE, blog.getName( ), Field.Store.YES ) );
257
258 doc.add( new StringField( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME, Field.Store.YES ) );
259
260
261 return doc;
262 }
263
264
265
266
267
268
269
270
271 private static String getContentToIndex( Blog blog )
272 {
273 StringBuilder sbContentToIndex = new StringBuilder( );
274
275 sbContentToIndex.append( blog.getName( ) );
276 sbContentToIndex.append( BLANK_SPACE );
277 sbContentToIndex.append( blog.getDescription( ) );
278 sbContentToIndex.append( BLANK_SPACE );
279 sbContentToIndex.append( blog.getHtmlContent( ) );
280 sbContentToIndex.append( BLANK_SPACE );
281 sbContentToIndex.append( blog.getId( ) );
282
283 return sbContentToIndex.toString( );
284 }
285
286
287
288
289
290
291
292
293 private static String getTagToIndex( Blog blog )
294 {
295 StringBuilder sbContentToIndex = new StringBuilder( );
296
297 for ( Tag tg : blog.getTag( ) )
298 {
299 sbContentToIndex.append( BLANK_SPACE );
300 sbContentToIndex.append( tg.getIdTag( ) );
301 }
302
303 return sbContentToIndex.toString( );
304 }
305
306
307
308
309
310
311
312
313 private static String getUsersEditedBlogVersions( Blog blog )
314 {
315 StringBuilder sbContentToIndex = new StringBuilder( );
316 List<String> usersList = BlogHome.getUsersEditedBlogVersions( blog.getId( ) );
317
318 for ( String user : usersList )
319 {
320 sbContentToIndex.append( BLANK_SPACE );
321 sbContentToIndex.append( user );
322 }
323
324 return sbContentToIndex.toString( );
325 }
326
327
328
329
330 @Override
331 public String getName( )
332 {
333 return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
334 }
335
336
337
338
339 @Override
340 public String getVersion( )
341 {
342 return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
343 }
344
345
346
347
348 @Override
349 public boolean isEnable( )
350 {
351 boolean bReturn = false;
352 String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
353
354 if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
355 && PluginService.isPluginEnable( BlogPlugin.PLUGIN_NAME ) )
356 {
357 bReturn = true;
358 }
359
360 return bReturn;
361 }
362
363
364
365
366
367
368
369
370
371
372
373 private void sbLogBlog( StringBuilder sbLogs, int nIdBlog, int nAction )
374 {
375 sbLogs.append( "Indexing Blogs:" );
376
377 switch( nAction )
378 {
379 case IndexerAction.TASK_CREATE:
380 sbLogs.append( "Insert " );
381
382 break;
383
384 case IndexerAction.TASK_MODIFY:
385 sbLogs.append( "Modify " );
386
387 break;
388
389 case IndexerAction.TASK_DELETE:
390 sbLogs.append( "Delete " );
391
392 break;
393
394 default:
395 break;
396 }
397
398 if ( nIdBlog != BlogUtils.CONSTANT_ID_NULL )
399 {
400 sbLogs.append( "id_blog=" );
401 sbLogs.append( nIdBlog );
402 }
403
404 sbLogs.append( "\r\n" );
405 }
406
407 }