View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.announce.service.announcesearch;
35  
36  import java.io.ByteArrayInputStream;
37  import java.io.IOException;
38  import java.sql.Timestamp;
39  import java.util.ArrayList;
40  import java.util.Iterator;
41  import java.util.List;
42  import java.util.stream.Collectors;
43  
44  import org.apache.commons.collections.CollectionUtils;
45  import org.apache.commons.lang.StringUtils;
46  import org.apache.lucene.document.DateTools;
47  import org.apache.lucene.document.Document;
48  import org.apache.lucene.document.Field;
49  import org.apache.lucene.document.StoredField;
50  import org.apache.lucene.document.StringField;
51  import org.apache.lucene.document.TextField;
52  import org.apache.lucene.index.IndexWriter;
53  import org.apache.lucene.index.Term;
54  import org.apache.tika.exception.TikaException;
55  import org.apache.tika.metadata.Metadata;
56  import org.apache.tika.parser.ParseContext;
57  import org.apache.tika.parser.html.HtmlParser;
58  import org.apache.tika.sax.BodyContentHandler;
59  import org.xml.sax.ContentHandler;
60  import org.xml.sax.SAXException;
61  
62  import fr.paris.lutece.plugins.announce.business.Announce;
63  import fr.paris.lutece.plugins.announce.business.AnnounceHome;
64  import fr.paris.lutece.plugins.announce.business.AnnounceSort;
65  import fr.paris.lutece.plugins.announce.business.IndexerAction;
66  import fr.paris.lutece.plugins.announce.service.AnnouncePlugin;
67  import fr.paris.lutece.plugins.announce.utils.AnnounceUtils;
68  import fr.paris.lutece.plugins.genericattributes.business.Response;
69  import fr.paris.lutece.portal.service.content.XPageAppService;
70  import fr.paris.lutece.portal.service.message.SiteMessageException;
71  import fr.paris.lutece.portal.service.plugin.Plugin;
72  import fr.paris.lutece.portal.service.plugin.PluginService;
73  import fr.paris.lutece.portal.service.search.IndexationService;
74  import fr.paris.lutece.portal.service.search.SearchItem;
75  import fr.paris.lutece.portal.service.util.AppException;
76  import fr.paris.lutece.portal.service.util.AppPathService;
77  import fr.paris.lutece.portal.service.util.AppPropertiesService;
78  import fr.paris.lutece.util.url.UrlItem;
79  
80  /**
81   * DefaultAnnounceIndexer
82   */
83  public class DefaultAnnounceIndexer implements IAnnounceSearchIndexer
84  {
85      private static final String PROPERTY_INDEXER_NAME = "announce.indexer.name";
86      private static final String PARAMETER_ANNOUNCE_ID = "announce_id";
87      private static final String ENABLE_VALUE_TRUE = "1";
88      private static final String PROPERTY_INDEXER_DESCRIPTION = "announce.indexer.description";
89      private static final String PROPERTY_INDEXER_VERSION = "announce.indexer.version";
90      private static final String PROPERTY_INDEXER_ENABLE = "announce.indexer.enable";
91      private static final String BLANK_SPACE = " ";
92  
93      /**
94       * {@inheritDoc}
95       */
96      @Override
97      public String getDescription( )
98      {
99          return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
100     }
101 
102     /**
103      * Index given list of record
104      * 
105      * @param indexWriter
106      *            the indexWriter
107      * @param listIdAnounce
108      *            The list of id announce
109      * @param plugin
110      *            the plugin
111      * @throws IOException
112      *             If an IO Exception occurred
113      */
114     private void indexListAnnounce( IndexWriter indexWriter, List<Integer> listIdAnounce, Plugin plugin ) throws IOException
115     {
116         String strPortalUrl = AppPathService.getPortalUrl( );
117         Iterator<Integer> it = listIdAnounce.iterator( );
118 
119         while ( it.hasNext( ) )
120         {
121             Integer nAnnounceId = it.next( );
122             Announce announce = AnnounceHome.findByPrimaryKey( nAnnounceId );
123             
124             List<Response> listResponses = AnnounceHome.findListResponse( nAnnounceId, false );
125             announce.setListResponse(listResponses);
126 
127             UrlItem urlAnnounce = new UrlItem( strPortalUrl );
128             urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
129             urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
130 
131             indexWriter.addDocument( getDocument( announce, urlAnnounce.getUrl( ), plugin ) );
132         }
133     }
134 
135     /**
136      * {@inheritDoc}
137      */
138     @Override
139     public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuffer sbLogs )
140             throws IOException, InterruptedException, SiteMessageException
141     {
142         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
143         List<Integer> listIdAnnounce = new ArrayList<>( );
144 
145         if ( !bCreate )
146         {
147             // incremental indexing
148             // delete all record which must be deleted
149             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_DELETE, plugin ) )
150             {
151                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_DELETE );
152 
153                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
154                 Term [ ] terms = {
155                         term
156                 };
157 
158                 indexWriter.deleteDocuments( terms );
159                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
160             }
161 
162             // Update all record which must be updated
163             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY, plugin ) )
164             {
165                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_MODIFY );
166 
167                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
168                 Term [ ] terms = {
169                         term
170                 };
171 
172                 indexWriter.deleteDocuments( terms );
173 
174                 listIdAnnounce.add( action.getIdAnnounce( ) );
175 
176                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
177             }
178 
179             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
180 
181             listIdAnnounce = new ArrayList<>( );
182 
183             // add all record which must be added
184             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE, plugin ) )
185             {
186                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_CREATE );
187                 listIdAnnounce.add( action.getIdAnnounce( ) );
188 
189                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
190             }
191 
192             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
193         }
194         else
195         {
196             for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
197             {
198                 if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
199                 {
200                     sbLogs.append( "Indexing Announce" );
201                     sbLogs.append( "\r\n" );
202 
203                     sbLogAnnounce( sbLogs, announce.getId( ), IndexerAction.TASK_CREATE );
204 
205                     listIdAnnounce.add( announce.getId( ) );
206                 }
207             }
208 
209             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
210         }
211 
212         indexWriter.commit( );
213     }
214 
215     /**
216      * Get the subject document
217      * 
218      * @param strDocument
219      *            id of the subject to index
220      * @return The list of lucene documents
221      * @throws IOException
222      *             If an IO Exception occurred
223      */
224     public static List<Document> getDocuments( String strDocument ) throws IOException
225     {
226         List<org.apache.lucene.document.Document> listDocs = new ArrayList<>( );
227         String strPortalUrl = AppPathService.getPortalUrl( );
228         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
229 
230         for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
231         {
232             if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
233             {
234                 UrlItem urlAnnounce = new UrlItem( strPortalUrl );
235                 urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
236                 urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
237 
238                 org.apache.lucene.document.Document docAnnounce = getDocument( announce, urlAnnounce.getUrl( ), plugin );
239                 listDocs.add( docAnnounce );
240                 if ( docAnnounce != null )
241                 {
242                     IndexationService.write( docAnnounce );
243                 }
244             }
245         }
246 
247         return listDocs;
248     }
249 
250     /**
251      * Builds a document which will be used by Lucene during the indexing of the announces list
252      * 
253      * @param announce
254      *            the announce
255      * @param strUrl
256      *            the url
257      * @param plugin
258      *            the plugin
259      * @throws IOException
260      *             If an IO Exception occurred
261      * @return the document
262      */
263     public static org.apache.lucene.document.Document getDocument( Announce announce, String strUrl, Plugin plugin ) throws IOException
264     {
265         // make a new, empty document
266         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
267         doc.add( new Field( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( announce.getCategory( ).getIdSector( ) ), TextField.TYPE_STORED ) );
268 
269         doc.add( new Field( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( announce.getCategory( ).getId( ) ), TextField.TYPE_STORED ) );
270         doc.add( new Field( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( announce.getId( ) ), TextField.TYPE_STORED ) );
271 
272         doc.add( new Field( AnnounceSearchItem.FIELD_TAGS, announce.getTags( ), TextField.TYPE_STORED ) );
273 
274         // Add the url as a field named "url". Use an UnIndexed field, so
275         // that the url is just stored with the question/answer, but is not searchable.
276         doc.add( new Field( SearchItem.FIELD_URL, strUrl, TextField.TYPE_STORED ) );
277 
278         // Add the uid as a field, so that index can be incrementally maintained.
279         // This field is not stored with question/answer, it is indexed, but it is not
280         // tokenized prior to indexing.
281         String strIdAnnounce = String.valueOf( announce.getId( ) );
282         doc.add( new Field( SearchItem.FIELD_UID, strIdAnnounce, TextField.TYPE_STORED ) );
283 
284         // Add the last modified date of the file a field named "modified".
285         // Use a field that is indexed (i.e. searchable), but don't tokenize
286         // the field into words.
287         String strDate = DateTools.dateToString(
288                 ( announce.getTimePublication( ) > 0 ) ? new Timestamp( announce.getTimePublication( ) ) : announce.getDateCreation( ),
289                 DateTools.Resolution.DAY );
290         doc.add( new Field( SearchItem.FIELD_DATE, strDate, TextField.TYPE_STORED ) );
291 
292         if ( announce.getPrice( ) != 0.0 )
293         {
294             double dPrice = announce.getPrice( );
295             // Add the price of the announce
296             doc.add( new Field( AnnounceSearchItem.FIELD_PRICE, AnnounceSearchService.formatPriceForIndexer( dPrice ), TextField.TYPE_STORED ) );
297         }
298 
299         String strContentToIndex = getContentToIndex( announce );
300 
301         // NOUVEAU
302         ContentHandler handler = new BodyContentHandler( );
303         Metadata metadata = new Metadata( );
304 
305         try
306         {
307             new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
308         }
309         catch( SAXException | TikaException e )
310         {
311             throw new AppException( "Error during announce parsing." );
312         }
313 
314         String strContent = handler.toString( );
315 
316         // Add the tag-stripped contents as a Reader-valued Text field so it will
317         // get tokenized and indexed.
318         doc.add( new Field( SearchItem.FIELD_CONTENTS, strContent, TextField.TYPE_NOT_STORED ) );
319 
320         // Add the subject name as a separate Text field, so that it can be searched
321         // separately.
322         doc.add( new StoredField( SearchItem.FIELD_TITLE, announce.getTitle( ) ) );
323 
324         doc.add( new Field( SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME, StringField.TYPE_STORED ) );
325 
326         // return the document
327         return doc;
328     }
329 
330     /**
331      * Set the Content to index
332      * 
333      * @param announce
334      *            The {@link Announce} to index
335      * @param plugin
336      *            The {@link Plugin}
337      * @return The content to index
338      */
339     private static String getContentToIndex( Announce announce )
340     {
341         StringBuffer sbContentToIndex = new StringBuffer( );
342         // Do not index question here
343         sbContentToIndex.append( announce.getTitle( ) );
344         sbContentToIndex.append( BLANK_SPACE );
345         sbContentToIndex.append( announce.getDescription( ) );
346         sbContentToIndex.append( BLANK_SPACE );
347         sbContentToIndex.append( announce.getTags( ) );
348         
349         if ( !CollectionUtils.isEmpty(announce.getListResponse()) )
350         {       
351         	String strAttributs = announce.getListResponse().stream()
352                 .filter( response -> StringUtils.isNotBlank(response.getResponseValue()) )
353                 .map( response -> BLANK_SPACE + response.getResponseValue() )
354                 .collect( Collectors.joining() );
355         	sbContentToIndex.append( strAttributs );
356         }
357 
358         return sbContentToIndex.toString( );
359     }
360 
361     /**
362      * {@inheritDoc}
363      */
364     @Override
365     public String getName( )
366     {
367         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
368     }
369 
370     /**
371      * {@inheritDoc}
372      */
373     @Override
374     public String getVersion( )
375     {
376         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
377     }
378 
379     /**
380      * {@inheritDoc}
381      */
382     @Override
383     public boolean isEnable( )
384     {
385         boolean bReturn = false;
386         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
387 
388         if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
389                 && PluginService.isPluginEnable( AnnouncePlugin.PLUGIN_NAME ) )
390         {
391             bReturn = true;
392         }
393 
394         return bReturn;
395     }
396 
397     /**
398      * Indexing action performed on the recording
399      * 
400      * @param sbLogs
401      *            the buffer log
402      * @param nIdAnnounce
403      *            the id of the announce
404      * @param nAction
405      *            the indexer action key performed
406      */
407     private void sbLogAnnounce( StringBuffer sbLogs, int nIdAnnounce, int nAction )
408     {
409         sbLogs.append( "Indexing Announce:" );
410 
411         switch( nAction )
412         {
413             case IndexerAction.TASK_CREATE:
414                 sbLogs.append( "Insert " );
415 
416                 break;
417 
418             case IndexerAction.TASK_MODIFY:
419                 sbLogs.append( "Modify " );
420 
421                 break;
422 
423             case IndexerAction.TASK_DELETE:
424                 sbLogs.append( "Delete " );
425 
426                 break;
427 
428             default:
429                 break;
430         }
431 
432         if ( nIdAnnounce != AnnounceUtils.CONSTANT_ID_NULL )
433         {
434             sbLogs.append( "id_announce=" );
435             sbLogs.append( nIdAnnounce );
436         }
437 
438         sbLogs.append( "\r\n" );
439     }
440 }