View Javadoc
1   /*
2    * Copyright (c) 2002-2020, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.announce.service.announcesearch;
35  
36  import java.io.ByteArrayInputStream;
37  import java.io.IOException;
38  import java.sql.Timestamp;
39  import java.util.ArrayList;
40  import java.util.Iterator;
41  import java.util.List;
42  
43  import org.apache.lucene.document.DateTools;
44  import org.apache.lucene.document.Document;
45  import org.apache.lucene.document.Field;
46  import org.apache.lucene.document.StoredField;
47  import org.apache.lucene.document.StringField;
48  import org.apache.lucene.document.TextField;
49  import org.apache.lucene.index.IndexWriter;
50  import org.apache.lucene.index.Term;
51  import org.apache.tika.exception.TikaException;
52  import org.apache.tika.metadata.Metadata;
53  import org.apache.tika.parser.ParseContext;
54  import org.apache.tika.parser.html.HtmlParser;
55  import org.apache.tika.sax.BodyContentHandler;
56  import org.xml.sax.ContentHandler;
57  import org.xml.sax.SAXException;
58  
59  import fr.paris.lutece.plugins.announce.business.Announce;
60  import fr.paris.lutece.plugins.announce.business.AnnounceHome;
61  import fr.paris.lutece.plugins.announce.business.AnnounceSort;
62  import fr.paris.lutece.plugins.announce.business.IndexerAction;
63  import fr.paris.lutece.plugins.announce.service.AnnouncePlugin;
64  import fr.paris.lutece.plugins.announce.utils.AnnounceUtils;
65  import fr.paris.lutece.portal.service.content.XPageAppService;
66  import fr.paris.lutece.portal.service.message.SiteMessageException;
67  import fr.paris.lutece.portal.service.plugin.Plugin;
68  import fr.paris.lutece.portal.service.plugin.PluginService;
69  import fr.paris.lutece.portal.service.search.IndexationService;
70  import fr.paris.lutece.portal.service.search.SearchItem;
71  import fr.paris.lutece.portal.service.util.AppException;
72  import fr.paris.lutece.portal.service.util.AppPathService;
73  import fr.paris.lutece.portal.service.util.AppPropertiesService;
74  import fr.paris.lutece.util.url.UrlItem;
75  
76  /**
77   * DefaultAnnounceIndexer
78   */
79  public class DefaultAnnounceIndexer implements IAnnounceSearchIndexer
80  {
81      private static final String PROPERTY_INDEXER_NAME = "announce.indexer.name";
82      private static final String PARAMETER_ANNOUNCE_ID = "announce_id";
83      private static final String ENABLE_VALUE_TRUE = "1";
84      private static final String PROPERTY_INDEXER_DESCRIPTION = "announce.indexer.description";
85      private static final String PROPERTY_INDEXER_VERSION = "announce.indexer.version";
86      private static final String PROPERTY_INDEXER_ENABLE = "announce.indexer.enable";
87      private static final String BLANK_SPACE = " ";
88  
89      /**
90       * {@inheritDoc}
91       */
92      @Override
93      public String getDescription( )
94      {
95          return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
96      }
97  
98      /**
99       * Index given list of record
100      * 
101      * @param indexWriter
102      *            the indexWriter
103      * @param listIdAnounce
104      *            The list of id announce
105      * @param plugin
106      *            the plugin
107      * @throws IOException
108      *             If an IO Exception occurred
109      */
110     private void indexListAnnounce( IndexWriter indexWriter, List<Integer> listIdAnounce, Plugin plugin ) throws IOException
111     {
112         String strPortalUrl = AppPathService.getPortalUrl( );
113         Iterator<Integer> it = listIdAnounce.iterator( );
114 
115         while ( it.hasNext( ) )
116         {
117             Integer nAnnounceId = it.next( );
118             Announce announce = AnnounceHome.findByPrimaryKey( nAnnounceId );
119 
120             UrlItem urlAnnounce = new UrlItem( strPortalUrl );
121             urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
122             urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
123 
124             indexWriter.addDocument( getDocument( announce, urlAnnounce.getUrl( ), plugin ) );
125         }
126     }
127 
128     /**
129      * {@inheritDoc}
130      */
131     @Override
132     public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuffer sbLogs )
133             throws IOException, InterruptedException, SiteMessageException
134     {
135         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
136         List<Integer> listIdAnnounce = new ArrayList<>( );
137 
138         if ( !bCreate )
139         {
140             // incremental indexing
141             // delete all record which must be deleted
142             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_DELETE, plugin ) )
143             {
144                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_DELETE );
145 
146                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
147                 Term [ ] terms = {
148                         term
149                 };
150 
151                 indexWriter.deleteDocuments( terms );
152                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
153             }
154 
155             // Update all record which must be updated
156             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY, plugin ) )
157             {
158                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_MODIFY );
159 
160                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
161                 Term [ ] terms = {
162                         term
163                 };
164 
165                 indexWriter.deleteDocuments( terms );
166 
167                 listIdAnnounce.add( action.getIdAnnounce( ) );
168 
169                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
170             }
171 
172             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
173 
174             listIdAnnounce = new ArrayList<>( );
175 
176             // add all record which must be added
177             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE, plugin ) )
178             {
179                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_CREATE );
180                 listIdAnnounce.add( action.getIdAnnounce( ) );
181 
182                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
183             }
184 
185             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
186         }
187         else
188         {
189             for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
190             {
191                 if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
192                 {
193                     sbLogs.append( "Indexing Announce" );
194                     sbLogs.append( "\r\n" );
195 
196                     sbLogAnnounce( sbLogs, announce.getId( ), IndexerAction.TASK_CREATE );
197 
198                     listIdAnnounce.add( announce.getId( ) );
199                 }
200             }
201 
202             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
203         }
204 
205         indexWriter.commit( );
206     }
207 
208     /**
209      * Get the subject document
210      * 
211      * @param strDocument
212      *            id of the subject to index
213      * @return The list of lucene documents
214      * @throws IOException
215      *             If an IO Exception occurred
216      */
217     public static List<Document> getDocuments( String strDocument ) throws IOException
218     {
219         List<org.apache.lucene.document.Document> listDocs = new ArrayList<>( );
220         String strPortalUrl = AppPathService.getPortalUrl( );
221         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
222 
223         for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
224         {
225             if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
226             {
227                 UrlItem urlAnnounce = new UrlItem( strPortalUrl );
228                 urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
229                 urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
230 
231                 org.apache.lucene.document.Document docAnnounce = getDocument( announce, urlAnnounce.getUrl( ), plugin );
232                 listDocs.add( docAnnounce );
233                 if ( docAnnounce != null )
234                 {
235                     IndexationService.write( docAnnounce );
236                 }
237             }
238         }
239 
240         return listDocs;
241     }
242 
243     /**
244      * Builds a document which will be used by Lucene during the indexing of the announces list
245      * 
246      * @param announce
247      *            the announce
248      * @param strUrl
249      *            the url
250      * @param plugin
251      *            the plugin
252      * @throws IOException
253      *             If an IO Exception occurred
254      * @return the document
255      */
256     public static org.apache.lucene.document.Document getDocument( Announce announce, String strUrl, Plugin plugin ) throws IOException
257     {
258         // make a new, empty document
259         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
260         doc.add( new Field( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( announce.getCategory( ).getIdSector( ) ), TextField.TYPE_STORED ) );
261 
262         doc.add( new Field( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( announce.getCategory( ).getId( ) ), TextField.TYPE_STORED ) );
263         doc.add( new Field( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( announce.getId( ) ), TextField.TYPE_STORED ) );
264 
265         doc.add( new Field( AnnounceSearchItem.FIELD_TAGS, announce.getTags( ), TextField.TYPE_STORED ) );
266 
267         // Add the url as a field named "url". Use an UnIndexed field, so
268         // that the url is just stored with the question/answer, but is not searchable.
269         doc.add( new Field( SearchItem.FIELD_URL, strUrl, TextField.TYPE_STORED ) );
270 
271         // Add the uid as a field, so that index can be incrementally maintained.
272         // This field is not stored with question/answer, it is indexed, but it is not
273         // tokenized prior to indexing.
274         String strIdAnnounce = String.valueOf( announce.getId( ) );
275         doc.add( new Field( SearchItem.FIELD_UID, strIdAnnounce, TextField.TYPE_STORED ) );
276 
277         // Add the last modified date of the file a field named "modified".
278         // Use a field that is indexed (i.e. searchable), but don't tokenize
279         // the field into words.
280         String strDate = DateTools.dateToString(
281                 ( announce.getTimePublication( ) > 0 ) ? new Timestamp( announce.getTimePublication( ) ) : announce.getDateCreation( ),
282                 DateTools.Resolution.DAY );
283         doc.add( new Field( SearchItem.FIELD_DATE, strDate, TextField.TYPE_STORED ) );
284 
285         if ( announce.getPrice( ) != 0.0 )
286         {
287             double dPrice = announce.getPrice( );
288             // Add the price of the announce
289             doc.add( new Field( AnnounceSearchItem.FIELD_PRICE, AnnounceSearchService.formatPriceForIndexer( dPrice ), TextField.TYPE_STORED ) );
290         }
291 
292         String strContentToIndex = getContentToIndex( announce );
293 
294         // NOUVEAU
295         ContentHandler handler = new BodyContentHandler( );
296         Metadata metadata = new Metadata( );
297 
298         try
299         {
300             new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
301         }
302         catch( SAXException | TikaException e )
303         {
304             throw new AppException( "Error during announce parsing." );
305         }
306 
307         String strContent = handler.toString( );
308 
309         // Add the tag-stripped contents as a Reader-valued Text field so it will
310         // get tokenized and indexed.
311         doc.add( new Field( SearchItem.FIELD_CONTENTS, strContent, TextField.TYPE_NOT_STORED ) );
312 
313         // Add the subject name as a separate Text field, so that it can be searched
314         // separately.
315         doc.add( new StoredField( SearchItem.FIELD_TITLE, announce.getTitle( ) ) );
316 
317         doc.add( new Field( SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME, StringField.TYPE_STORED ) );
318 
319         // return the document
320         return doc;
321     }
322 
323     /**
324      * Set the Content to index
325      * 
326      * @param announce
327      *            The {@link Announce} to index
328      * @param plugin
329      *            The {@link Plugin}
330      * @return The content to index
331      */
332     private static String getContentToIndex( Announce announce )
333     {
334         StringBuffer sbContentToIndex = new StringBuffer( );
335         // Do not index question here
336         sbContentToIndex.append( announce.getTitle( ) );
337         sbContentToIndex.append( BLANK_SPACE );
338         sbContentToIndex.append( announce.getDescription( ) );
339         sbContentToIndex.append( BLANK_SPACE );
340         sbContentToIndex.append( announce.getTags( ) );
341 
342         return sbContentToIndex.toString( );
343     }
344 
345     /**
346      * {@inheritDoc}
347      */
348     @Override
349     public String getName( )
350     {
351         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
352     }
353 
354     /**
355      * {@inheritDoc}
356      */
357     @Override
358     public String getVersion( )
359     {
360         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
361     }
362 
363     /**
364      * {@inheritDoc}
365      */
366     @Override
367     public boolean isEnable( )
368     {
369         boolean bReturn = false;
370         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
371 
372         if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
373                 && PluginService.isPluginEnable( AnnouncePlugin.PLUGIN_NAME ) )
374         {
375             bReturn = true;
376         }
377 
378         return bReturn;
379     }
380 
381     /**
382      * Indexing action performed on the recording
383      * 
384      * @param sbLogs
385      *            the buffer log
386      * @param nIdAnnounce
387      *            the id of the announce
388      * @param nAction
389      *            the indexer action key performed
390      */
391     private void sbLogAnnounce( StringBuffer sbLogs, int nIdAnnounce, int nAction )
392     {
393         sbLogs.append( "Indexing Announce:" );
394 
395         switch( nAction )
396         {
397             case IndexerAction.TASK_CREATE:
398                 sbLogs.append( "Insert " );
399 
400                 break;
401 
402             case IndexerAction.TASK_MODIFY:
403                 sbLogs.append( "Modify " );
404 
405                 break;
406 
407             case IndexerAction.TASK_DELETE:
408                 sbLogs.append( "Delete " );
409 
410                 break;
411 
412             default:
413                 break;
414         }
415 
416         if ( nIdAnnounce != AnnounceUtils.CONSTANT_ID_NULL )
417         {
418             sbLogs.append( "id_announce=" );
419             sbLogs.append( nIdAnnounce );
420         }
421 
422         sbLogs.append( "\r\n" );
423     }
424 }