View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.blog.service.docsearch;
35  
36  import java.io.ByteArrayInputStream;
37  import java.io.IOException;
38  import java.util.ArrayList;
39  import java.util.Date;
40  import java.util.List;
41  
42  import org.apache.lucene.document.DateTools;
43  import org.apache.lucene.document.Field;
44  import org.apache.lucene.document.FieldType;
45  import org.apache.lucene.document.NumericDocValuesField;
46  import org.apache.lucene.document.StringField;
47  import org.apache.lucene.document.TextField;
48  import org.apache.lucene.document.IntPoint;
49  import org.apache.lucene.index.CorruptIndexException;
50  import org.apache.lucene.index.IndexWriter;
51  import org.apache.lucene.index.Term;
52  import org.apache.tika.exception.TikaException;
53  import org.apache.tika.metadata.Metadata;
54  import org.apache.tika.parser.ParseContext;
55  import org.apache.tika.parser.html.HtmlParser;
56  import org.apache.tika.sax.BodyContentHandler;
57  import org.xml.sax.ContentHandler;
58  import org.xml.sax.SAXException;
59  
60  import fr.paris.lutece.plugins.blog.business.Blog;
61  import fr.paris.lutece.plugins.blog.business.BlogHome;
62  import fr.paris.lutece.plugins.blog.business.IndexerAction;
63  import fr.paris.lutece.plugins.blog.business.Tag;
64  import fr.paris.lutece.plugins.blog.service.BlogPlugin;
65  import fr.paris.lutece.plugins.blog.service.BlogService;
66  import fr.paris.lutece.plugins.blog.utils.BlogUtils;
67  import fr.paris.lutece.portal.service.message.SiteMessageException;
68  import fr.paris.lutece.portal.service.plugin.PluginService;
69  import fr.paris.lutece.portal.service.search.SearchItem;
70  import fr.paris.lutece.portal.service.util.AppException;
71  import fr.paris.lutece.portal.service.util.AppPropertiesService;
72  import org.apache.lucene.document.Document;
73  
74  /**
75   * DefaultAnnounceIndexer
76   */
77  public class DefaultBlogIndexer implements IBlogSearchIndexer
78  {
79      private static final String PROPERTY_INDEXER_NAME = "blog.indexer.name";
80      private static final String ENABLE_VALUE_TRUE = "1";
81      private static final String PROPERTY_INDEXER_DESCRIPTION = "blog.indexer.description";
82      private static final String PROPERTY_INDEXER_VERSION = "blog.indexer.version";
83      private static final String PROPERTY_INDEXER_ENABLE = "blog.indexer.enable";
84      private static final String BLANK_SPACE = " ";
85  
86      /**
87       * {@inheritDoc}
88       */
89      @Override
90      public String getDescription( )
91      {
92          return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
93      }
94  
95      /**
96       * Index given list of record
97       * 
98       * @param indexWriter
99       *            the indexWriter
100      * @param listIdBlog
101      *            The list of id blog
102      * @throws CorruptIndexException
103      *             If the index is corrupted
104      * @throws IOException
105      *             If an IO Exception occurred
106      */
107     private void indexListBlog( IndexWriter indexWriter, List<Integer> listIdBlog ) throws IOException
108     {
109         for ( Integer nBlogId : listIdBlog )
110         {
111             Blog blog = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( nBlogId );
112             if ( blog != null )
113             {
114                 Document doc = getDocument( blog );
115                 indexWriter.addDocument( doc );
116             }
117         }
118     }
119 
120     /**
121      * Update isArchived field in the index
122      */
123 @Override
124     public void updateDocument( IndexWriter indexWriter, Blog blog ) throws IOException
125     {
126         Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( blog.getId( ) ) );
127         Term [ ] terms = {
128                 term
129         };
130 
131         indexWriter.deleteDocuments( terms );
132         Document doc = getDocument( blog );
133         indexWriter.addDocument( doc );
134     }
135 
136     /**
137      * {@inheritDoc}
138      */
139     @Override
140     public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuilder sbLogs )
141             throws IOException, InterruptedException, SiteMessageException
142     {
143         List<Integer> listIdBlog = new ArrayList<>( );
144 
145         if ( !bCreate )
146         {
147             // incremental indexing
148             // delete all record which must be deleted
149             for ( fr.paris.lutece.plugins.blog.business.IndexerAction action : BlogSearchService.getInstance( )
150                     .getAllIndexerActionByTask( IndexerAction.TASK_DELETE ) )
151             {
152                 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_DELETE );
153 
154                 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
155                 Term [ ] terms = {
156                         term
157                 };
158 
159                 indexWriter.deleteDocuments( terms );
160                 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
161             }
162 
163             // Update all record which must be updated
164             for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY ) )
165             {
166                 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_MODIFY );
167 
168                 Term term = new Term( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( action.getIdBlog( ) ) );
169                 Term [ ] terms = {
170                         term
171                 };
172 
173                 indexWriter.deleteDocuments( terms );
174                 listIdBlog = new ArrayList<>( );
175                 listIdBlog.add( action.getIdBlog( ) );
176                 this.indexListBlog( indexWriter, listIdBlog );
177                 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
178             }
179 
180             listIdBlog = new ArrayList<>( );
181 
182             // add all record which must be added
183             for ( IndexerAction action : BlogSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE ) )
184             {
185                 sbLogBlog( sbLogs, action.getIdBlog( ), IndexerAction.TASK_CREATE );
186                 listIdBlog.add( action.getIdBlog( ) );
187 
188                 BlogSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ) );
189             }
190 
191             indexListBlog( indexWriter, listIdBlog );
192         }
193         else
194         {
195             for ( Blog doc : BlogHome.getBlogsList( ) )
196             {
197 
198                 sbLogs.append( "Indexing Blog" );
199                 sbLogs.append( "\r\n" );
200 
201                 sbLogBlog( sbLogs, doc.getId( ), IndexerAction.TASK_CREATE );
202 
203                 listIdBlog.add( doc.getId( ) );
204 
205             }
206 
207             indexListBlog( indexWriter, listIdBlog );
208         }
209 
210         indexWriter.commit( );
211     }
212 
213     /**
214      * Builds a document which will be used by Lucene during the indexing of the announces list
215      * 
216      * @param blog
217      *            The blog post
218      * @throws IOException
219      *             If an IO Exception occurred
220      * @return the document
221      */
222     public static org.apache.lucene.document.Document getDocument( Blog blog ) throws IOException
223     {
224         // make a new, empty document
225         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
226 
227         doc.add( new StringField( BlogSearchItem.FIELD_ID_HTML_DOC, Integer.toString( blog.getId( ) ), Field.Store.YES ) );
228         // Add the user firstName as a field, so that index can be incrementally maintained.
229         doc.add( new StringField( BlogSearchItem.FIELD_USER, blog.getUserCreator( ).toLowerCase( ), Field.Store.YES ) );
230 
231         doc.add( new TextField( BlogSearchItem.FIELD_TAGS, getTagToIndex( blog ), Field.Store.YES ) );
232         doc.add( new TextField( BlogSearchItem.FIELD_USERS_EDITED_BLOG, getUsersEditedBlogVersions( blog ), Field.Store.YES ) );
233 
234         FieldType ft = new FieldType( StringField.TYPE_STORED );
235         ft.setOmitNorms( false );
236         doc.add( new Field( SearchItem.FIELD_DATE, DateTools.timeToString( blog.getUpdateDate( ).getTime( ), DateTools.Resolution.MINUTE ), ft ) );
237         doc.add( new NumericDocValuesField( BlogSearchItem.FIELD_DATE_UPDATE, blog.getUpdateDate( ).getTime( ) ) );
238         // is document published TODAY
239         Date today = new Date( );
240         boolean isPublished = blog.getBlogPublication( ).stream( )
241                 .anyMatch( publication -> today.after( publication.getDateBeginPublishing( ) ) && today.before( publication.getDateEndPublishing( ) ) );
242         doc.add( new TextField( BlogSearchItem.FIELD_UNPUBLISHED, ( isPublished ) ? "false" : "true", Field.Store.YES ) );
243        // add isArchived field
244         doc.add( new TextField( BlogSearchItem.FIELD_ARCHIVED, blog.isArchived( ) ? "true" : "false", Field.Store.YES ) );
245         // Add the uid as a field, so that index can be incrementally maintained.
246         // This field is not stored with question/answer, it is indexed, but it is not
247         // tokenized prior to indexing.
248         String strIdAnnounce = String.valueOf( blog.getId( ) );
249         doc.add( new StringField( SearchItem.FIELD_UID, strIdAnnounce, Field.Store.YES ) );
250 
251         String strContentToIndex = getContentToIndex( blog );
252         // NOUVEAU
253         ContentHandler handler = new BodyContentHandler( -1 );
254         Metadata metadata = new Metadata( );
255 
256         try
257         {
258             new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
259         }
260         catch( TikaException | SAXException e )
261         {
262             throw new AppException( "Error during blog parsing. blog Id: " + blog.getId( ), e );
263         }
264 
265         String strContent = handler.toString( );
266 
267         // Add the tag-stripped contents as a Reader-valued Text field so it will
268         // get tokenized and indexed.
269         doc.add( new TextField( SearchItem.FIELD_CONTENTS, strContent, Field.Store.NO ) );
270 
271         doc.add( new TextField( SearchItem.FIELD_SUMMARY, blog.getHtmlContent( ), Field.Store.YES ) );
272         // Add the subject name as a separate Text field, so that it can be searched
273         // separately.
274         doc.add( new StringField( SearchItem.FIELD_TITLE, blog.getName( ), Field.Store.YES ) );
275 
276         doc.add( new StringField( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME, Field.Store.YES ) );
277 
278         // return the document
279         return doc;
280     }
281 
282     /**
283      * Set the Content to index
284      * 
285      * @param blog
286      *            The {@link blog} to index
287      * @return The content to index
288      */
289     private static String getContentToIndex( Blog blog )
290     {
291         StringBuilder sbContentToIndex = new StringBuilder( );
292         // Do not index question here
293         sbContentToIndex.append( blog.getName( ) );
294         sbContentToIndex.append( BLANK_SPACE );
295         sbContentToIndex.append( blog.getDescription( ) );
296         sbContentToIndex.append( BLANK_SPACE );
297         sbContentToIndex.append( blog.getHtmlContent( ) );
298         sbContentToIndex.append( BLANK_SPACE );
299         sbContentToIndex.append( blog.getId( ) );
300 
301         return sbContentToIndex.toString( );
302     }
303 
304     /**
305      * Set the tag to index
306      * 
307      * @param blog
308      *            The {@link blog} to index
309      * @return The tag to index
310      */
311     private static String getTagToIndex( Blog blog )
312     {
313         StringBuilder sbContentToIndex = new StringBuilder( );
314 
315         for ( Tag tg : blog.getTag( ) )
316         {
317             sbContentToIndex.append( BLANK_SPACE );
318             sbContentToIndex.append( tg.getIdTag( ) );
319         }
320 
321         return sbContentToIndex.toString( );
322     }
323 
324     /**
325      * Set the user list edited the blog
326      * 
327      * @param blog
328      *            The blog to index
329      * @return The list of users
330      */
331     private static String getUsersEditedBlogVersions( Blog blog )
332     {
333         StringBuilder sbContentToIndex = new StringBuilder( );
334         List<String> usersList = BlogHome.getUsersEditedBlogVersions( blog.getId( ) );
335 
336         for ( String user : usersList )
337         {
338             sbContentToIndex.append( BLANK_SPACE );
339             sbContentToIndex.append( user );
340         }
341 
342         return sbContentToIndex.toString( );
343     }
344 
345     /**
346      * {@inheritDoc}
347      */
348     @Override
349     public String getName( )
350     {
351         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
352     }
353 
354     /**
355      * {@inheritDoc}
356      */
357     @Override
358     public String getVersion( )
359     {
360         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
361     }
362 
363     /**
364      * {@inheritDoc}
365      */
366     @Override
367     public boolean isEnable( )
368     {
369         boolean bReturn = false;
370         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
371 
372         if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
373                 && PluginService.isPluginEnable( BlogPlugin.PLUGIN_NAME ) )
374         {
375             bReturn = true;
376         }
377 
378         return bReturn;
379     }
380 
381     /**
382      * Indexing action performed on the recording
383      * 
384      * @param sbLogs
385      *            the buffer log
386      * @param nIdBlog
387      *            the id of the Blog
388      * @param nAction
389      *            the indexer action key performed
390      */
391     private void sbLogBlog( StringBuilder sbLogs, int nIdBlog, int nAction )
392     {
393         sbLogs.append( "Indexing Blogs:" );
394 
395         switch( nAction )
396         {
397             case IndexerAction.TASK_CREATE:
398                 sbLogs.append( "Insert " );
399 
400                 break;
401 
402             case IndexerAction.TASK_MODIFY:
403                 sbLogs.append( "Modify " );
404 
405                 break;
406 
407             case IndexerAction.TASK_DELETE:
408                 sbLogs.append( "Delete " );
409 
410                 break;
411 
412             default:
413                 break;
414         }
415 
416         if ( nIdBlog != BlogUtils.CONSTANT_ID_NULL )
417         {
418             sbLogs.append( "id_blog=" );
419             sbLogs.append( nIdBlog );
420         }
421 
422         sbLogs.append( "\r\n" );
423     }
424 
425 }