View Javadoc
1   /*
2    * Copyright (c) 2002-2021, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.announce.service.announcesearch;
35  
36  import java.io.ByteArrayInputStream;
37  import java.io.IOException;
38  import java.sql.Timestamp;
39  import java.util.ArrayList;
40  import java.util.Iterator;
41  import java.util.List;
42  import java.util.stream.Collectors;
43  
44  import org.apache.commons.collections.CollectionUtils;
45  import org.apache.commons.lang.StringUtils;
46  import org.apache.lucene.document.DateTools;
47  import org.apache.lucene.document.Document;
48  import org.apache.lucene.document.Field;
49  import org.apache.lucene.document.FieldType;
50  import org.apache.lucene.document.StringField;
51  import org.apache.lucene.document.TextField;
52  import org.apache.lucene.index.IndexOptions;
53  import org.apache.lucene.index.IndexWriter;
54  import org.apache.lucene.index.Term;
55  import org.apache.tika.exception.TikaException;
56  import org.apache.tika.metadata.Metadata;
57  import org.apache.tika.parser.ParseContext;
58  import org.apache.tika.parser.html.HtmlParser;
59  import org.apache.tika.sax.BodyContentHandler;
60  import org.xml.sax.ContentHandler;
61  import org.xml.sax.SAXException;
62  
63  import fr.paris.lutece.plugins.announce.business.Announce;
64  import fr.paris.lutece.plugins.announce.business.AnnounceHome;
65  import fr.paris.lutece.plugins.announce.business.AnnounceSort;
66  import fr.paris.lutece.plugins.announce.business.IndexerAction;
67  import fr.paris.lutece.plugins.announce.service.AnnouncePlugin;
68  import fr.paris.lutece.plugins.announce.utils.AnnounceUtils;
69  import fr.paris.lutece.plugins.genericattributes.business.Response;
70  import fr.paris.lutece.portal.service.content.XPageAppService;
71  import fr.paris.lutece.portal.service.message.SiteMessageException;
72  import fr.paris.lutece.portal.service.plugin.Plugin;
73  import fr.paris.lutece.portal.service.plugin.PluginService;
74  import fr.paris.lutece.portal.service.search.IndexationService;
75  import fr.paris.lutece.portal.service.search.SearchItem;
76  import fr.paris.lutece.portal.service.util.AppException;
77  import fr.paris.lutece.portal.service.util.AppPathService;
78  import fr.paris.lutece.portal.service.util.AppPropertiesService;
79  import fr.paris.lutece.util.url.UrlItem;
80  
81  /**
82   * DefaultAnnounceIndexer
83   */
84  public class DefaultAnnounceIndexer implements IAnnounceSearchIndexer
85  {
86      private static final String PROPERTY_INDEXER_NAME = "announce.indexer.name";
87      private static final String PARAMETER_ANNOUNCE_ID = "announce_id";
88      private static final String ENABLE_VALUE_TRUE = "1";
89      private static final String PROPERTY_INDEXER_DESCRIPTION = "announce.indexer.description";
90      private static final String PROPERTY_INDEXER_VERSION = "announce.indexer.version";
91      private static final String PROPERTY_INDEXER_ENABLE = "announce.indexer.enable";
92      private static final String BLANK_SPACE = " ";
93  
94      /**
95       * {@inheritDoc}
96       */
97      @Override
98      public String getDescription( )
99      {
100         return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
101     }
102 
103     /**
104      * Index given list of record
105      * 
106      * @param indexWriter
107      *            the indexWriter
108      * @param listIdAnounce
109      *            The list of id announce
110      * @param plugin
111      *            the plugin
112      * @throws IOException
113      *             If an IO Exception occurred
114      */
115     private void indexListAnnounce( IndexWriter indexWriter, List<Integer> listIdAnounce, Plugin plugin ) throws IOException
116     {
117         String strPortalUrl = AppPathService.getPortalUrl( );
118         Iterator<Integer> it = listIdAnounce.iterator( );
119 
120         while ( it.hasNext( ) )
121         {
122             Integer nAnnounceId = it.next( );
123             Announce announce = AnnounceHome.findByPrimaryKey( nAnnounceId );
124 
125             List<Response> listResponses = AnnounceHome.findListResponse( nAnnounceId, false );
126             announce.setListResponse(listResponses);
127 
128             UrlItem urlAnnounce = new UrlItem( strPortalUrl );
129             urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
130             urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
131 
132             indexWriter.addDocument( getDocument( announce, urlAnnounce.getUrl( ), plugin ) );
133         }
134     }
135 
136     /**
137      * {@inheritDoc}
138      */
139     @Override
140     public synchronized void processIndexing( IndexWriter indexWriter, boolean bCreate, StringBuffer sbLogs )
141             throws IOException, InterruptedException, SiteMessageException
142     {
143         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
144         List<Integer> listIdAnnounce = new ArrayList<>( );
145 
146         if ( !bCreate )
147         {
148             // incremental indexing
149             // delete all record which must be deleted
150             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_DELETE, plugin ) )
151             {
152                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_DELETE );
153 
154                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
155                 Term [ ] terms = {
156                         term
157                 };
158 
159                 indexWriter.deleteDocuments( terms );
160                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
161             }
162 
163             // Update all record which must be updated
164             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_MODIFY, plugin ) )
165             {
166                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_MODIFY );
167 
168                 Term term = new Term( AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString( action.getIdAnnounce( ) ) );
169                 Term [ ] terms = {
170                         term
171                 };
172 
173                 indexWriter.deleteDocuments( terms );
174 
175                 listIdAnnounce.add( action.getIdAnnounce( ) );
176 
177                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
178             }
179 
180             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
181 
182             listIdAnnounce = new ArrayList<>( );
183 
184             // add all record which must be added
185             for ( IndexerAction action : AnnounceSearchService.getInstance( ).getAllIndexerActionByTask( IndexerAction.TASK_CREATE, plugin ) )
186             {
187                 sbLogAnnounce( sbLogs, action.getIdAnnounce( ), IndexerAction.TASK_CREATE );
188                 listIdAnnounce.add( action.getIdAnnounce( ) );
189 
190                 AnnounceSearchService.getInstance( ).removeIndexerAction( action.getIdAction( ), plugin );
191             }
192 
193             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
194         }
195         else
196         {
197             for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
198             {
199                 if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
200                 {
201                     sbLogs.append( "Indexing Announce" );
202                     sbLogs.append( "\r\n" );
203 
204                     sbLogAnnounce( sbLogs, announce.getId( ), IndexerAction.TASK_CREATE );
205 
206                     listIdAnnounce.add( announce.getId( ) );
207                 }
208             }
209 
210             this.indexListAnnounce( indexWriter, listIdAnnounce, plugin );
211         }
212 
213         indexWriter.commit( );
214     }
215 
216     /**
217      * Get the subject document
218      * 
219      * @param strDocument
220      *            id of the subject to index
221      * @return The list of lucene documents
222      * @throws IOException
223      *             If an IO Exception occurred
224      */
225     public static List<Document> getDocuments( String strDocument ) throws IOException
226     {
227         List<org.apache.lucene.document.Document> listDocs = new ArrayList<>( );
228         String strPortalUrl = AppPathService.getPortalUrl( );
229         Plugin plugin = PluginService.getPlugin( AnnouncePlugin.PLUGIN_NAME );
230 
231         for ( Announce announce : AnnounceHome.findAllPublished( AnnounceSort.DEFAULT_SORT ) )
232         {
233             if ( !announce.getSuspended( ) && !announce.getSuspendedByUser( ) )
234             {
235                 UrlItem urlAnnounce = new UrlItem( strPortalUrl );
236                 urlAnnounce.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( AnnounceUtils.PARAMETER_PAGE_ANNOUNCE ) ); // FIXME
237                 urlAnnounce.addParameter( PARAMETER_ANNOUNCE_ID, announce.getId( ) );
238 
239                 org.apache.lucene.document.Document docAnnounce = getDocument( announce, urlAnnounce.getUrl( ), plugin );
240                 listDocs.add( docAnnounce );
241                 if ( docAnnounce != null )
242                 {
243                     IndexationService.write( docAnnounce );
244                 }
245             }
246         }
247 
248         return listDocs;
249     }
250 
251     /**
252      * Builds a document which will be used by Lucene during the indexing of the announces list
253      * 
254      * @param announce
255      *            the announce
256      * @param strUrl
257      *            the url
258      * @param plugin
259      *            the plugin
260      * @throws IOException
261      *             If an IO Exception occurred
262      * @return the document
263      */
264     public static org.apache.lucene.document.Document getDocument( Announce announce, String strUrl, Plugin plugin ) throws IOException
265     {
266         // make a new, empty document
267         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
268 
269 
270         FieldType storedFieldType = new FieldType();
271         storedFieldType.setStored(true);
272         storedFieldType.setTokenized(true);
273         storedFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
274          storedFieldType.freeze();
275 
276         FieldType ftStoredDocs = new FieldType();
277         ftStoredDocs.setStored(true);
278         ftStoredDocs.setTokenized(false);
279         ftStoredDocs.setIndexOptions(IndexOptions.DOCS);
280         ftStoredDocs.freeze();
281 
282         // Add fields to the document
283         doc.add(new StringField(AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf(announce.getCategory().getIdSector()), Field.Store.YES));
284         doc.add(new StringField(AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf(announce.getCategory().getId()), Field.Store.YES));
285         doc.add(new Field(AnnounceSearchItem.FIELD_ID_ANNOUNCE, Integer.toString(announce.getId()), storedFieldType));
286         doc.add(new StringField(AnnounceSearchItem.FIELD_TAGS, announce.getTags(),  Field.Store.YES));
287         // Add the url as a field named "url". Use an UnIndexed field, so
288         // that the url is just stored with the question/answer, but is not searchable.
289         doc.add(new Field(SearchItem.FIELD_URL, strUrl, ftStoredDocs));
290         // Add the uid as a field, so that index can be incrementally maintained.
291         // This field is stored so that the announce and query it in the database
292         // tokenized prior to indexing.
293         doc.add(new Field(SearchItem.FIELD_UID, String.valueOf(announce.getId()), ftStoredDocs));
294         // Add the last modified date of the file a field named "modified".
295         // Use a field that is indexed (i.e. searchable), but don't tokenize
296         // the field into words.
297         doc.add(new Field(SearchItem.FIELD_DATE, DateTools.dateToString(
298                 (announce.getTimePublication() > 0) ? new Timestamp(announce.getTimePublication()) : announce.getDateCreation(),
299                 DateTools.Resolution.DAY), ftStoredDocs));
300         if ( announce.getPrice( ) != 0.0 )
301         {
302             double dPrice = announce.getPrice( );
303             // Add the price of the announce
304             doc.add(new StringField( AnnounceSearchItem.FIELD_PRICE, AnnounceSearchService.formatPriceForIndexer( dPrice ),  Field.Store.YES) );
305         }
306         doc.add(new StringField(SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME, Field.Store.YES));
307 
308         String strContentToIndex = getContentToIndex( announce );
309 
310         // NOUVEAU
311         ContentHandler handler = new BodyContentHandler( );
312         Metadata metadata = new Metadata( );
313 
314         try
315         {
316             new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata, new ParseContext( ) );
317         }
318         catch( SAXException | TikaException e )
319         {
320             throw new AppException( "Error during announce parsing." );
321         }
322 
323         String strContent = handler.toString( );
324 
325         // Define field types with ngram tokenization for "contents" and "title"
326         FieldType ngramType = new FieldType();
327         ngramType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
328         ngramType.setTokenized(true);
329         ngramType.setStored(false);
330         // Add the ngram-tokenized "contents" field
331         doc.add(new Field(SearchItem.FIELD_CONTENTS, strContent, ngramType));
332 
333         // Add the ngram-tokenized "title" field
334         FieldType storedNgramType = new FieldType();
335         storedNgramType.setStored(true);
336         storedNgramType.setIndexOptions(IndexOptions.DOCS);
337         storedNgramType.setTokenized(true);
338         storedNgramType.setStoreTermVectors(true);
339         storedNgramType.freeze();
340         doc.add(new Field(SearchItem.FIELD_TITLE, announce.getTitle(), storedNgramType));
341 
342 
343         // return the document
344         return doc;
345     }
346 
347     /**
348      * Set the Content to index
349      * 
350      * @param announce
351      *            The {@link Announce} to index
352      * @param plugin
353      *            The {@link Plugin}
354      * @return The content to index
355      */
356     private static String getContentToIndex( Announce announce )
357     {
358         StringBuffer sbContentToIndex = new StringBuffer( );
359         // Do not index question here
360         sbContentToIndex.append( announce.getTitle( ) );
361         sbContentToIndex.append( BLANK_SPACE );
362         sbContentToIndex.append( announce.getDescription( ) );
363         sbContentToIndex.append( BLANK_SPACE );
364         sbContentToIndex.append( announce.getTags( ) );
365 
366         if ( !CollectionUtils.isEmpty(announce.getListResponse()) )
367         {
368         	String strAttributs = announce.getListResponse().stream()
369                 .filter( response -> StringUtils.isNotBlank(response.getResponseValue()) )
370                 .map( response -> BLANK_SPACE + response.getResponseValue() )
371                 .collect( Collectors.joining() );
372         	sbContentToIndex.append( strAttributs );
373         }
374 
375         return sbContentToIndex.toString( );
376     }
377 
378     /**
379      * {@inheritDoc}
380      */
381     @Override
382     public String getName( )
383     {
384         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
385     }
386 
387     /**
388      * {@inheritDoc}
389      */
390     @Override
391     public String getVersion( )
392     {
393         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
394     }
395 
396     /**
397      * {@inheritDoc}
398      */
399     @Override
400     public boolean isEnable( )
401     {
402         boolean bReturn = false;
403         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
404 
405         if ( ( strEnable != null ) && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
406                 && PluginService.isPluginEnable( AnnouncePlugin.PLUGIN_NAME ) )
407         {
408             bReturn = true;
409         }
410 
411         return bReturn;
412     }
413 
414     /**
415      * Indexing action performed on the recording
416      * 
417      * @param sbLogs
418      *            the buffer log
419      * @param nIdAnnounce
420      *            the id of the announce
421      * @param nAction
422      *            the indexer action key performed
423      */
424     private void sbLogAnnounce( StringBuffer sbLogs, int nIdAnnounce, int nAction )
425     {
426         sbLogs.append( "Indexing Announce:" );
427 
428         switch( nAction )
429         {
430             case IndexerAction.TASK_CREATE:
431                 sbLogs.append( "Insert " );
432 
433                 break;
434 
435             case IndexerAction.TASK_MODIFY:
436                 sbLogs.append( "Modify " );
437 
438                 break;
439 
440             case IndexerAction.TASK_DELETE:
441                 sbLogs.append( "Delete " );
442 
443                 break;
444 
445             default:
446                 break;
447         }
448 
449         if ( nIdAnnounce != AnnounceUtils.CONSTANT_ID_NULL )
450         {
451             sbLogs.append( "id_announce=" );
452             sbLogs.append( nIdAnnounce );
453         }
454 
455         sbLogs.append( "\r\n" );
456     }
457 }