View Javadoc
1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.calendar.service.search;
35  
36  import fr.paris.lutece.plugins.calendar.business.Agenda;
37  import fr.paris.lutece.plugins.calendar.business.CalendarHome;
38  import fr.paris.lutece.plugins.calendar.business.Event;
39  import fr.paris.lutece.plugins.calendar.business.OccurrenceEvent;
40  import fr.paris.lutece.plugins.calendar.business.SimpleEvent;
41  import fr.paris.lutece.plugins.calendar.business.category.Category;
42  import fr.paris.lutece.plugins.calendar.service.AgendaResource;
43  import fr.paris.lutece.plugins.calendar.service.CalendarPlugin;
44  import fr.paris.lutece.plugins.calendar.service.Utils;
45  import fr.paris.lutece.plugins.calendar.web.Constants;
46  import fr.paris.lutece.portal.service.content.XPageAppService;
47  import fr.paris.lutece.portal.service.message.SiteMessageException;
48  import fr.paris.lutece.portal.service.plugin.Plugin;
49  import fr.paris.lutece.portal.service.plugin.PluginService;
50  import fr.paris.lutece.portal.service.search.IndexationService;
51  import fr.paris.lutece.portal.service.search.SearchIndexer;
52  import fr.paris.lutece.portal.service.search.SearchItem;
53  import fr.paris.lutece.portal.service.util.AppException;
54  import fr.paris.lutece.portal.service.util.AppPathService;
55  import fr.paris.lutece.portal.service.util.AppPropertiesService;
56  import fr.paris.lutece.util.url.UrlItem;
57  
58  import java.io.ByteArrayInputStream;
59  import java.io.IOException;
60  import java.util.ArrayList;
61  import java.util.Collection;
62  import java.util.Iterator;
63  import java.util.List;
64  
65  import org.apache.lucene.document.Document;
66  import org.apache.lucene.document.Field;
67  import org.apache.lucene.document.FieldType;
68  import org.apache.lucene.document.StringField;
69  import org.apache.lucene.document.TextField;
70  import org.apache.tika.exception.TikaException;
71  import org.apache.tika.metadata.Metadata;
72  import org.apache.tika.parser.ParseContext;
73  import org.apache.tika.parser.html.HtmlParser;
74  import org.apache.tika.sax.BodyContentHandler;
75  import org.xml.sax.ContentHandler;
76  import org.xml.sax.SAXException;
77  
78  
79  /**
80   * CalendarIndexer
81   */
82  public class CalendarIndexer implements SearchIndexer
83  {
84      //properties
85      public static final String PROPERTY_INDEXER_NAME = "calendar.indexer.name";
86      public static final String SHORT_NAME = "cld";
87      private static final String ENABLE_VALUE_TRUE = "1";
88      private static final String PROPERTY_INDEXER_DESCRIPTION = "calendar.indexer.description";
89      private static final String PROPERTY_INDEXER_VERSION = "calendar.indexer.version";
90      private static final String PROPERTY_INDEXER_ENABLE = "calendar.indexer.enable";
91      private static final String PROPERTY_DESCRIPTION_MAX_CHARACTERS = "calendar.description.max.characters";
92      private static final String BLANK = " ";
93      private static final String PROPERTY_DESCRIPTION_ETC = "...";
94      private static final String JSP_SEARCH_CALENDAR = "jsp/site/Portal.jsp?page=calendar&action=search";
95  
96      /**
97       * Index all documents
98       * 
99       * @throws IOException the exception
100      * @throws InterruptedException the exception
101      * @throws SiteMessageException the exception
102      */
103     public void indexDocuments( ) throws IOException, InterruptedException, SiteMessageException
104     {
105         String sRoleKey = "";
106 
107         for ( AgendaResource agenda : Utils.getAgendaResourcesWithOccurrences( ) )
108         {
109             sRoleKey = agenda.getRole( );
110 
111             String strAgenda = agenda.getId( );
112 
113             for ( Event oEvent : agenda.getAgenda( ).getEvents( ) )
114             {
115                 indexSubject( oEvent, sRoleKey, strAgenda );
116             }
117         }
118     }
119 
120     /**
121      * Recursive method for indexing a calendar event
122      * @param oEvent the event
123      * @param sRoleKey the role key
124      * @param strAgenda the agenda
125      * @throws IOException I/O Exception
126      * @throws InterruptedException interruptedException
127      */
128     public void indexSubject( Event oEvent, String sRoleKey, String strAgenda ) throws IOException,
129             InterruptedException
130     {
131         OccurrenceEvent occurrence = (OccurrenceEvent) oEvent;
132 
133         if ( occurrence.getStatus( ).equals(
134                 AppPropertiesService.getProperty( Constants.PROPERTY_EVENT_STATUS_CONFIRMED ) ) )
135         {
136             String strPortalUrl = AppPathService.getPortalUrl( );
137 
138             UrlItem urlEvent = new UrlItem( strPortalUrl );
139             urlEvent.addParameter( XPageAppService.PARAM_XPAGE_APP, CalendarPlugin.PLUGIN_NAME );
140             urlEvent.addParameter( Constants.PARAMETER_ACTION, Constants.ACTION_SHOW_RESULT );
141             urlEvent.addParameter( Constants.PARAMETER_EVENT_ID, occurrence.getEventId( ) );
142             urlEvent.addParameter( Constants.PARAM_AGENDA, strAgenda );
143 
144             org.apache.lucene.document.Document docSubject = null;
145             try
146             {
147                 docSubject = getDocument( occurrence, sRoleKey, urlEvent.getUrl( ), strAgenda );
148             }
149             catch ( Exception e )
150             {
151                 String strMessage = "Agenda ID : " + strAgenda + " - Occurrence ID : " + occurrence.getId( );
152                 IndexationService.error( this, e, strMessage );
153             }
154             if ( docSubject != null )
155             {
156                 IndexationService.write( docSubject );
157             }
158         }
159     }
160 
161     /**
162      * Get the calendar document
163      * @param strDocument id of the subject to index
164      * @return The list of lucene documents
165      * @throws IOException the exception
166      * @throws InterruptedException the exception
167      * @throws SiteMessageException the exception
168      */
169     public List<Document> getDocuments( String strDocument ) throws IOException, InterruptedException,
170             SiteMessageException
171     {
172         List<org.apache.lucene.document.Document> listDocs = new ArrayList<org.apache.lucene.document.Document>( );
173         String strPortalUrl = AppPathService.getPortalUrl( );
174         Plugin plugin = PluginService.getPlugin( CalendarPlugin.PLUGIN_NAME );
175 
176         OccurrenceEvent occurrence = CalendarHome.findOccurrence( Integer.parseInt( strDocument ), plugin );
177         if ( !occurrence.getStatus( ).equals(
178                 AppPropertiesService.getProperty( Constants.PROPERTY_EVENT_STATUS_CONFIRMED ) ) )
179         {
180             return null;
181         }
182 
183         SimpleEvent event = CalendarHome.findEvent( occurrence.getEventId( ), plugin );
184 
185         AgendaResource agendaResource = CalendarHome.findAgendaResource( event.getIdCalendar( ), plugin );
186         Utils.loadAgendaOccurrences( agendaResource, plugin );
187 
188         String sRoleKey = agendaResource.getRole( );
189         Agenda agenda = agendaResource.getAgenda( );
190 
191         UrlItem urlEvent = new UrlItem( strPortalUrl );
192         urlEvent.addParameter( XPageAppService.PARAM_XPAGE_APP, CalendarPlugin.PLUGIN_NAME );
193         urlEvent.addParameter( Constants.PARAMETER_ACTION, Constants.ACTION_SHOW_RESULT );
194         urlEvent.addParameter( Constants.PARAMETER_EVENT_ID, occurrence.getEventId( ) );
195         urlEvent.addParameter( Constants.PARAM_AGENDA, agenda.getKeyName( ) );
196 
197         org.apache.lucene.document.Document docEvent = getDocument( occurrence, sRoleKey, urlEvent.getUrl( ),
198                 agenda.getKeyName( ) );
199 
200         listDocs.add( docEvent );
201 
202         return listDocs;
203     }
204 
205     /**
206      * Builds a document which will be used by Lucene during the indexing of the
207      * calendar list
208      * @param occurrence The occurence event
209      * @param strUrl the url of the subject
210      * @param strRoleKey The role key
211      * @param strAgenda the calendar id
212      * @return A Lucene {@link Document} containing QuestionAnswer Data
213      * @throws IOException The IO Exception
214      * @throws InterruptedException The InterruptedException
215      */
216     public static org.apache.lucene.document.Document getDocument( OccurrenceEvent occurrence, String strRoleKey,
217             String strUrl, String strAgenda ) throws IOException, InterruptedException
218     {
219         // make a new, empty document
220         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
221 
222         FieldType ft = new FieldType( StringField.TYPE_STORED );
223         ft.setOmitNorms( false );
224 
225         FieldType ftNotStored = new FieldType( StringField.TYPE_NOT_STORED );
226         ft.setOmitNorms( false );
227 
228         //add the id of the calendar
229         doc.add( new Field( Constants.FIELD_CALENDAR_ID, strAgenda + "_" + Constants.CALENDAR_SHORT_NAME, ftNotStored ) );
230 
231         //add the category of the event
232         Collection<Category> arrayCategories = occurrence.getListCategories( );
233         String strCategories = Constants.EMPTY_STRING;
234 
235         if ( arrayCategories != null )
236         {
237             Iterator<Category> i = arrayCategories.iterator( );
238 
239             while ( i.hasNext( ) )
240             {
241                 strCategories += ( i.next( ).getId( ) + BLANK );
242             }
243         }
244 
245         doc.add( new Field( Constants.FIELD_CATEGORY, strCategories, TextField.TYPE_NOT_STORED ) );
246 
247         doc.add( new Field( SearchItem.FIELD_ROLE, strRoleKey, ft ) );
248 
249         // Add the url as a field named "url".  Use an UnIndexed field, so
250         // that the url is just stored with the question/answer, but is not searchable.
251         doc.add( new Field( SearchItem.FIELD_URL, strUrl, ft ) );
252 
253         // Add the uid as a field, so that index can be incrementally maintained.
254         // This field is not stored with question/answer, it is indexed, but it is not
255         // tokenized prior to indexing.
256         String strIdEvent = String.valueOf( occurrence.getId( ) );
257         doc.add( new Field( SearchItem.FIELD_UID, strIdEvent + "_" + Constants.CALENDAR_SHORT_NAME, ft ) );
258 
259         // Add the last modified date of the file a field named "modified".
260         // Use a field that is indexed (i.e. searchable), but don't tokenize
261         // the field into words.
262         String strDate = Utils.getDate( occurrence.getDate( ) );
263         doc.add( new Field( SearchItem.FIELD_DATE, strDate, ft ) );
264 
265         String strContentToIndex = getContentToIndex( occurrence );
266         ContentHandler handler = new BodyContentHandler( );
267         Metadata metadata = new Metadata( );
268 
269         try
270         {
271             new HtmlParser( ).parse( new ByteArrayInputStream( strContentToIndex.getBytes( ) ), handler, metadata,
272                     new ParseContext( ) );
273         }
274         catch ( SAXException e )
275         {
276             throw new AppException( "Error during page parsing." );
277         }
278         catch ( TikaException e )
279         {
280             throw new AppException( "Error during page parsing." );
281         }
282 
283         //the content of the article is recovered in the parser because this one
284         //had replaced the encoded caracters (as &eacute;) by the corresponding special caracter (as ?)
285         StringBuilder sb = new StringBuilder( occurrence.getTitle( ) + " - " + handler.toString( ) );
286 
287         // Add the description as a summary field, so that index can be incrementally maintained.
288         // This field is stored, but it is not indexed
289         int length = AppPropertiesService.getPropertyInt( PROPERTY_DESCRIPTION_MAX_CHARACTERS, 200 );
290         String strDescription = Utils.parseHtmlToPlainTextString( occurrence.getDescription( ) );
291 
292         if ( strDescription.length( ) > length )
293         {
294             strDescription = strDescription.substring( 0, length ) + PROPERTY_DESCRIPTION_ETC;
295         }
296 
297         doc.add( new Field( SearchItem.FIELD_SUMMARY, strDescription, TextField.TYPE_STORED ) );
298         doc.add( new Field( CalendarSearchItem.FIELD_HTML_SUMMARY, occurrence.getDescription( ), TextField.TYPE_STORED ) );
299 
300         // Add the tag-stripped contents as a Reader-valued Text field so it will
301         // get tokenized and indexed.
302         doc.add( new Field( SearchItem.FIELD_CONTENTS, sb.toString( ), TextField.TYPE_NOT_STORED ) );
303 
304         // Add the subject name as a separate Text field, so that it can be searched
305         // separately.
306         doc.add( new Field( SearchItem.FIELD_TITLE, occurrence.getTitle( ), TextField.TYPE_STORED ) );
307 
308         doc.add( new Field( SearchItem.FIELD_TYPE, CalendarPlugin.PLUGIN_NAME, ft ) );
309 
310         // return the document
311         return doc;
312     }
313 
314     /**
315      * Set the Content to index (Description, location)
316      * @param event The Event
317      * @return The content to index
318      */
319     private static String getContentToIndex( Event event )
320     {
321         StringBuffer sbContentToIndex = new StringBuffer( );
322         //Do not index question here
323         sbContentToIndex.append( event.getDescription( ) );
324         sbContentToIndex.append( BLANK );
325         sbContentToIndex.append( event.getLocationAddress( ) );
326         sbContentToIndex.append( BLANK );
327         sbContentToIndex.append( event.getLocationTown( ) );
328         sbContentToIndex.append( BLANK );
329         sbContentToIndex.append( event.getLocationZip( ) );
330 
331         return sbContentToIndex.toString( );
332     }
333 
334     /**
335      * Returns the indexer service name
336      * @return the indexer service name
337      */
338     public String getName( )
339     {
340         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
341     }
342 
343     /**
344      * Returns the indexer service version
345      * @return the indexer service version
346      */
347     public String getVersion( )
348     {
349         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
350     }
351 
352     /**
353      * Returns the indexer service description
354      * @return the indexer service description
355      */
356     public String getDescription( )
357     {
358         return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
359     }
360 
361     /**
362      * Tells whether the service is enable or not
363      * @return true if enable, otherwise false
364      */
365     public boolean isEnable( )
366     {
367         boolean bReturn = false;
368         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
369 
370         if ( ( strEnable != null )
371                 && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
372                 && PluginService.isPluginEnable( CalendarPlugin.PLUGIN_NAME ) )
373         {
374             bReturn = true;
375         }
376 
377         return bReturn;
378     }
379 
380     /**
381      * {@inheritDoc}
382      */
383     public List<String> getListType( )
384     {
385         List<String> listType = new ArrayList<String>( );
386         listType.add( CalendarPlugin.PLUGIN_NAME );
387 
388         return listType;
389     }
390 
391     /**
392      * {@inheritDoc}
393      */
394     public String getSpecificSearchAppUrl( )
395     {
396         return JSP_SEARCH_CALENDAR;
397     }
398 }