View Javadoc

1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.digglike.service.search;
35  
36  import fr.paris.lutece.plugins.digglike.business.CommentSubmit;
37  import fr.paris.lutece.plugins.digglike.business.Digg;
38  import fr.paris.lutece.plugins.digglike.business.DiggFilter;
39  import fr.paris.lutece.plugins.digglike.business.DiggHome;
40  import fr.paris.lutece.plugins.digglike.business.DiggSubmit;
41  import fr.paris.lutece.plugins.digglike.business.SubmitFilter;
42  import fr.paris.lutece.plugins.digglike.service.DiggSubmitService;
43  import fr.paris.lutece.plugins.digglike.service.DigglikePlugin;
44  import fr.paris.lutece.plugins.digglike.service.digglikesearch.DigglikeSearchItem;
45  import fr.paris.lutece.plugins.digglike.web.DiggApp;
46  import fr.paris.lutece.portal.service.content.XPageAppService;
47  import fr.paris.lutece.portal.service.message.SiteMessageException;
48  import fr.paris.lutece.portal.service.plugin.Plugin;
49  import fr.paris.lutece.portal.service.plugin.PluginService;
50  import fr.paris.lutece.portal.service.search.IndexationService;
51  import fr.paris.lutece.portal.service.search.SearchIndexer;
52  import fr.paris.lutece.portal.service.search.SearchItem;
53  import fr.paris.lutece.portal.service.util.AppException;
54  import fr.paris.lutece.portal.service.util.AppPathService;
55  import fr.paris.lutece.portal.service.util.AppPropertiesService;
56  import fr.paris.lutece.util.url.UrlItem;
57  
58  import java.io.ByteArrayInputStream;
59  import java.io.IOException;
60  import java.io.StringWriter;
61  import java.util.ArrayList;
62  import java.util.List;
63  
64  import org.apache.lucene.document.Document;
65  import org.apache.lucene.document.Field;
66  import org.apache.lucene.document.FieldType;
67  import org.apache.lucene.document.StringField;
68  import org.apache.lucene.document.TextField;
69  import org.apache.tika.exception.TikaException;
70  import org.apache.tika.metadata.Metadata;
71  import org.apache.tika.parser.ParseContext;
72  import org.apache.tika.parser.html.HtmlParser;
73  import org.apache.tika.sax.BodyContentHandler;
74  import org.xml.sax.ContentHandler;
75  import org.xml.sax.SAXException;
76  
77  
78  /**
79   * DiggLikeIndexer
80   * 
81   */
82  public class DigglikeIndexer implements SearchIndexer
83  {
84      public static final String INDEX_TYPE_DIGG = "digg";
85      public static final String PROPERTY_INDEXER_NAME = "digglike.indexer.name";
86      public static final String SHORT_NAME = "dgl";
87      private static final String ENABLE_VALUE_TRUE = "1";
88      private static final String PROPERTY_INDEXER_DESCRIPTION = "digglike.indexer.description";
89      private static final String PROPERTY_INDEXER_VERSION = "digglike.indexer.version";
90      private static final String PROPERTY_INDEXER_ENABLE = "digglike.indexer.enable";
91      private static final String PROPERTY_XPAGE_APPLICATION_ID = "digglike.xpage.applicationId";
92      private static final String JSP_SEARCH_DIGG = "jsp/site/Portal.jsp?page=digg";
93  
94      // request parameters
95      private static final String PARAMETER_ID_DIGG = "id_digg";
96      private static final String PARAMETER_ID_DIGG_SUBMIT = "id_digg_submit";
97  
98      /**
99       * Returns the indexer service description
100      * @return The indexer service description
101      */
102     public String getDescription( )
103     {
104         return AppPropertiesService.getProperty( PROPERTY_INDEXER_DESCRIPTION );
105     }
106 
107     /**
108      * Index digglike documents
109      * @throws IOException Exception
110      * @throws InterruptedException Exception
111      * @throws SiteMessageException Exception
112      */
113     public void indexDocuments( ) throws IOException, InterruptedException, SiteMessageException
114     {
115         Plugin plugin = PluginService.getPlugin( DigglikePlugin.PLUGIN_NAME );
116 
117         //filter on digg state(the digg submit are add if the digg is activated)
118         DiggFilter diggFilter = new DiggFilter( );
119         diggFilter.setIdState( Digg.STATE_ENABLE );
120 
121         List<Digg> diggActivatedList = DiggHome.getDiggList( diggFilter, plugin );
122         List<Integer> diggSubmitActivatedList;
123 
124         SubmitFilter submitFilter = new SubmitFilter( );
125         //        submitFilter.setIdDiggSubmitState( DiggSubmit.STATE_PUBLISH );
126         submitFilter.getSortBy( ).add( SubmitFilter.SORT_BY_SCORE_DESC );
127         submitFilter.getSortBy( ).add( SubmitFilter.SORT_BY_DATE_RESPONSE_DESC );
128 
129         for ( Digg digg : diggActivatedList )
130         {
131             submitFilter.setIdDigg( digg.getIdDigg( ) );
132             diggSubmitActivatedList = DiggSubmitService.getService( ).getDiggSubmitListId( submitFilter, plugin );
133 
134             for ( Integer idDiggSubmit : diggSubmitActivatedList )
135             {
136                 //url.addParameter( DiggApp.PARAMETER_CLEAR_FILTER,DiggApp.PARAMETER_CLEAR_FILTER);
137                 //url.setAnchor(DiggApp.ANCHOR_DIGG_SUBMIT+diggSubmit.getIdDiggSubmit());
138                 List<Document> listDocDiggSubmit = null;
139 
140                 try
141                 {
142                     listDocDiggSubmit = getDocuments( idDiggSubmit.toString( ) );
143                 }
144                 catch ( Exception e )
145                 {
146                     String strMessage = "Digg ID : " + digg.getIdDigg( );
147                     IndexationService.error( this, e, strMessage );
148                 }
149 
150                 if ( ( listDocDiggSubmit != null ) && ( listDocDiggSubmit.size( ) != 0 ) )
151                 {
152                     for ( Document docDiggSubmit : listDocDiggSubmit )
153                     {
154                         IndexationService.write( docDiggSubmit );
155                     }
156                 }
157             }
158         }
159     }
160 
161     /**
162      * Return Lucene documents
163      * @param strIdDigg the if of the digg
164      * @return a list of Documents
165      * @throws IOException Exception
166      * @throws InterruptedException Exception
167      * @throws SiteMessageException Exception
168      */
169     public List<Document> getDocuments( String strIdDiggSubmit ) throws IOException, InterruptedException,
170             SiteMessageException
171     {
172         List<org.apache.lucene.document.Document> listDocs = new ArrayList<org.apache.lucene.document.Document>( );
173         String strPortalUrl = AppPathService.getPortalUrl( );
174         Integer nIdDiggSubmit = Integer.parseInt( strIdDiggSubmit );
175         Plugin plugin = PluginService.getPlugin( DigglikePlugin.PLUGIN_NAME );
176         DiggSubmit diggSubmit = DiggSubmitService.getService( ).findByPrimaryKey( nIdDiggSubmit, true, plugin );
177 
178         //
179         //SubmitFilter commentFilter=new SubmitFilter();
180         // commentFilter.setIdDiggSubmit(nIdDiggSubmit);
181         if ( diggSubmit != null )
182         {
183             //Add comment
184             //diggSubmit.setComments(CommentSubmitHome.getCommentSubmitList(commentFilter, plugin)); 
185             UrlItem url = new UrlItem( strPortalUrl );
186             url.addParameter( XPageAppService.PARAM_XPAGE_APP,
187                     AppPropertiesService.getProperty( PROPERTY_XPAGE_APPLICATION_ID, "digg" ) );
188             url.addParameter( PARAMETER_ID_DIGG, diggSubmit.getDigg( ).getIdDigg( ) );
189             url.addParameter( PARAMETER_ID_DIGG_SUBMIT, diggSubmit.getIdDiggSubmit( ) );
190             url.addParameter( DiggApp.PARAMETER_DIGG_DETAIL, 1 );
191 
192             //url.addParameter( DiggApp.PARAMETER_CLEAR_FILTER,DiggApp.PARAMETER_CLEAR_FILTER);
193             //url.setAnchor(DiggApp.ANCHOR_DIGG_SUBMIT+diggSubmit.getIdDiggSubmit());
194             org.apache.lucene.document.Document docDiggSubmit = getDocument( diggSubmit, url.getUrl( ) );
195             listDocs.add( docDiggSubmit );
196         }
197 
198         //}
199         return listDocs;
200     }
201 
202     /**
203      * Builds a document which will be used by Lucene during the indexing of the
204      * digg submit list
205      * 
206      * @param diggSubmit the digg submit to index
207      * @param strUrl the url of digg submit
208      * @return a lucene document
209      * 
210      * @throws IOException The IO Exception
211      * @throws InterruptedException The InterruptedException
212      */
213     public static org.apache.lucene.document.Document getDocument( DiggSubmit diggSubmit, String strUrl )
214             throws IOException, InterruptedException
215     {
216         // make a new, empty document
217         org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document( );
218 
219         FieldType ft = new FieldType( StringField.TYPE_STORED );
220         ft.setOmitNorms( false );
221 
222         FieldType ftNo = new FieldType( StringField.TYPE_STORED );
223         ftNo.setIndexed( false );
224         ftNo.setTokenized( false );
225         ftNo.setOmitNorms( false );
226 
227         // Add the url as a field named "url".  Use an UnIndexed field, so
228         // that the url is just stored with the question/answer, but is not searchable.
229         doc.add( new Field( SearchItem.FIELD_URL, strUrl, ft ) );
230 
231         doc.add( new Field( DigglikeSearchItem.FIELD_ID_DIGG, String.valueOf( diggSubmit.getDigg( ).getIdDigg( ) ), ft ) );
232         doc.add( new Field( DigglikeSearchItem.FIELD_ID_DIGG_SUBMIT, Integer.toString( diggSubmit.getIdDiggSubmit( ) ),
233                 ft ) );
234 
235         // Add the uid as a field, so that index can be incrementally maintained.
236         // This field is not stored with question/answer, it is indexed, but it is not
237         // tokenized prior to indexing.
238         doc.add( new Field( DigglikeSearchItem.FIELD_UID, String.valueOf( diggSubmit.getIdDiggSubmit( ) ) + "_"
239                 + SHORT_NAME, ft ) );
240         //Add state
241         doc.add( new Field( DigglikeSearchItem.FIELD_STATE, Integer.toString( diggSubmit.getDiggSubmitState( )
242                 .getIdDiggSubmitState( ) ), ft ) );
243 
244         StringWriter writerFieldContent = new StringWriter( );
245         writerFieldContent.write( diggSubmit.getDiggSubmitValue( ) );
246 
247         //Add the list of comments
248         if ( diggSubmit.getComments( ) != null )
249         {
250             for ( CommentSubmit comment : diggSubmit.getComments( ) )
251             {
252                 writerFieldContent.write( comment.getValue( ) );
253             }
254         }
255 
256         ContentHandler handler = new BodyContentHandler( );
257         Metadata metadata = new Metadata( );
258         try
259         {
260             new HtmlParser( ).parse( new ByteArrayInputStream( writerFieldContent.toString( ).getBytes( ) ), handler,
261                     metadata, new ParseContext( ) );
262         }
263         catch ( SAXException e )
264         {
265             throw new AppException( "Error during page parsing." );
266         }
267         catch ( TikaException e )
268         {
269             throw new AppException( "Error during page parsing." );
270         }
271 
272         //the content of the article is recovered in the parser because this one
273         //had replaced the encoded caracters (as &eacute;) by the corresponding special caracter (as ?)
274         StringBuilder sb = new StringBuilder( handler.toString( ) );
275         // Add the tag-stripped contents as a Reader-valued Text field so it will
276         // get tokenized and indexed.
277         doc.add( new Field( DigglikeSearchItem.FIELD_CONTENTS, sb.toString( ), TextField.TYPE_NOT_STORED ) );
278 
279         // Add the title as a separate Text field, so that it can be searched
280         // separately.
281         doc.add( new Field( DigglikeSearchItem.FIELD_TITLE, diggSubmit.getDiggSubmitTitle( ), ftNo ) );
282 
283         // Add the summary as an UnIndexed field, so that it is stored and returned
284         // with hit documents for display.
285         // doc.add( new Field( SearchItem.FIELD_SUMMARY, diggSubmit.getDiggSubmitValueShowInTheList(), Field.Store.YES, Field.Index.NO ) );
286         doc.add( new Field( DigglikeSearchItem.FIELD_TYPE, INDEX_TYPE_DIGG, ft ) );
287 
288         Plugin plugin = PluginService.getPlugin( DigglikePlugin.PLUGIN_NAME );
289         Digg digg = DiggHome.findByPrimaryKey( diggSubmit.getDigg( ).getIdDigg( ), plugin );
290 
291         if( digg.getRole() != null )
292         {
293             doc.add( new Field( SearchItem.FIELD_ROLE, digg.getRole( ), ft ) );
294         }
295 
296         // return the document
297         return doc;
298     }
299 
300     /**
301      * Get the name of the indexer.
302      * @return The name
303      */
304     public String getName( )
305     {
306         return AppPropertiesService.getProperty( PROPERTY_INDEXER_NAME );
307     }
308 
309     /**
310      * Get the version of the indexer
311      * @return The version number
312      */
313     public String getVersion( )
314     {
315         return AppPropertiesService.getProperty( PROPERTY_INDEXER_VERSION );
316     }
317 
318     /**
319      * Get the state of indexer
320      * @return Return true if the indexer is enabled, false else.
321      */
322     public boolean isEnable( )
323     {
324         boolean bReturn = false;
325         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE );
326 
327         if ( ( strEnable != null )
328                 && ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) || strEnable.equals( ENABLE_VALUE_TRUE ) )
329                 && PluginService.isPluginEnable( DigglikePlugin.PLUGIN_NAME ) )
330         {
331             bReturn = true;
332         }
333 
334         return bReturn;
335     }
336 
337     /**
338      * {@inheritDoc}
339      */
340     public List<String> getListType( )
341     {
342         List<String> listType = new ArrayList<String>( );
343         listType.add( INDEX_TYPE_DIGG );
344 
345         return listType;
346     }
347 
348     /**
349      * {@inheritDoc}
350      */
351     public String getSpecificSearchAppUrl( )
352     {
353         return JSP_SEARCH_DIGG;
354     }
355 }