View Javadoc
1   /*
2    * Copyright (c) 2002-2020, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.blog.modules.solr.indexer;
35  
36  import java.util.ArrayList;
37  import java.util.Calendar;
38  import java.util.Collection;
39  import java.util.Date;
40  import java.util.GregorianCalendar;
41  import java.util.List;
42  import java.util.stream.Collectors;
43  
44  import org.apache.commons.collections.CollectionUtils;
45  import org.xml.sax.ContentHandler;
46  
47  import fr.paris.lutece.plugins.blog.business.Blog;
48  import fr.paris.lutece.plugins.blog.business.DocContent;
49  import fr.paris.lutece.plugins.blog.business.DocContentHome;
50  import fr.paris.lutece.plugins.blog.business.Tag;
51  import fr.paris.lutece.plugins.blog.business.portlet.BlogPublication;
52  import fr.paris.lutece.plugins.blog.service.BlogService;
53  import fr.paris.lutece.plugins.blog.utils.BlogUtils;
54  import fr.paris.lutece.plugins.search.solr.business.field.Field;
55  import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexer;
56  import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
57  import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
58  import fr.paris.lutece.plugins.search.solr.util.LuteceSolrException;
59  import fr.paris.lutece.plugins.search.solr.util.SolrConstants;
60  import fr.paris.lutece.plugins.search.solr.util.TikaIndexerUtil;
61  import fr.paris.lutece.portal.service.util.AppException;
62  import fr.paris.lutece.portal.service.util.AppLogService;
63  import fr.paris.lutece.portal.service.util.AppPropertiesService;
64  import fr.paris.lutece.util.url.UrlItem;
65  
66  /**
67   * The indexer service for Solr.
68   *
69   */
70  public class SolrBlogIndexer implements SolrIndexer
71  {
72      public static final String BEAN_NAME = "blog-solr.solrBlogIndexer";
73      private static final String TYPE = "blogs";
74      private static final String COMMENT = "comment";
75      private static final String LABEL = "label";
76      private static final String HTML_CONTENT = "htmlContent";
77  
78      private static final String PARAMETER_PORTLET_ID = "portlet_id";
79      private static final String PROPERTY_INDEXER_ENABLE = "solr.indexer.document.enable";
80      private static final String PROPERTY_NAME = "blog-solr.indexer.name";
81      private static final String PROPERTY_DESCRIPTION = "blog-solr.indexer.description";
82      private static final String PROPERTY_VERSION = "blog-solr.indexer.version";
83      private static final String PARAMETER_BLOG_ID = "id";
84      private static final String PARAMETER_XPAGE = "page";
85      private static final String XPAGE_BLOG = "blog";
86      private static final List<String> LIST_RESSOURCES_NAME = new ArrayList<>( );
87      private static final String SHORT_NAME = "blog";
88      private static final String DOC_INDEXATION_ERROR = "[SolrBlogIndexer] An error occured during the indexation of the document number ";
89      private static final String DOC_PARSING_ERROR = "[SolrBlogIndexer] Error during document parsing. ";
90  
91      /**
92       * Creates a new SolrPageIndexer
93       */
94      public SolrBlogIndexer( )
95      {
96          LIST_RESSOURCES_NAME.add( BlogUtils.CONSTANT_TYPE_RESOURCE );
97      }
98  
99      @Override
100     public boolean isEnable( )
101     {
102         return "true".equalsIgnoreCase( AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE ) );
103     }
104 
105     /**
106      * {@inheritDoc}
107      */
108     @Override
109     public List<String> indexDocuments( )
110     {
111         List<String> lstErrors = new ArrayList<>( );
112         List<Integer> listDocument = new ArrayList<>( );
113 
114         Collection<SolrItem> solrItems = new ArrayList<>( );
115 
116         for ( Blog document : BlogService.getInstance( ).getListBlogWithoutBinaries( ) )
117         {
118             try
119             {
120 
121                 if ( !listDocument.contains( document.getId( ) ) )
122                 {
123                     // Generates the item to index
124                     SolrItem item = getItem( document );
125 
126                     if ( item != null )
127                     {
128                         solrItems.add( item );
129                     }
130                     listDocument.add( document.getId( ) );
131                 }
132             }
133             catch ( Exception e )
134             {
135                 lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
136                 AppLogService.error( DOC_INDEXATION_ERROR + document.getId( ), e );
137             }
138         }
139 
140         if ( CollectionUtils.isNotEmpty( solrItems ) )
141         {
142             try
143             {
144                 SolrIndexerService.write( solrItems );
145             }
146             catch ( Exception e )
147             {
148                 lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
149                 AppLogService.error( DOC_INDEXATION_ERROR, e );
150             }
151         }
152         return lstErrors;
153     }
154 
155     /**
156      * Index list of documents
157      * 
158      * @param listIdDocument
159      * @return error LIST
160      * @throws LuteceSolrException
161      */
162     public List<String> indexListDocuments( List<Integer> listIdDocument ) throws LuteceSolrException
163     {
164         List<String> lstErrors = new ArrayList<>( );
165 
166         Collection<SolrItem> solrItems = new ArrayList<>( );
167 
168         for ( Integer d : listIdDocument )
169         {
170 
171             Blog document = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( d );
172             // Generates the item to index
173             if ( document != null )
174             {
175                 SolrItem item = getItem( document );
176 
177                 if ( item != null )
178                 {
179                     solrItems.add( item );
180                 }
181 
182             }
183         }
184 
185         if ( CollectionUtils.isNotEmpty( solrItems ) )
186         {
187             try
188             {
189                 SolrIndexerService.write( solrItems );
190             }
191             catch ( Exception e )
192             {
193                 lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
194                 AppLogService.error( DOC_INDEXATION_ERROR, e );
195                 throw new LuteceSolrException( DOC_INDEXATION_ERROR, e );
196             }
197         }
198         return lstErrors;
199     }
200 
201     /**
202      * Builds a document which will be used by solr during the indexing of the pages
203      * of the site with the following fields : summary, uid, url, contents, title
204      * and description.
205      * 
206      * @param document The document
207      * @return The item
208      */
209     private SolrItem getItem( Blog document )
210     {
211         // Search for published blogs.
212         Date today = new Date( );
213         List<BlogPublication> listBlogPublications = document.getBlogPublication( ).stream( ).filter(
214                 bp -> bp.getDateBeginPublishing( ).before( today ) && bp.getDateEndPublishing( ).after( today ) )
215                 .collect( Collectors.toList( ) );
216 
217         if ( CollectionUtils.isEmpty( listBlogPublications ) )
218         {
219             return null;
220         }
221 
222         // the item
223         SolrItem item = new SolrItem( );
224         item.setUid( getResourceUid( Integer.toString( document.getId( ) ), BlogUtils.CONSTANT_TYPE_RESOURCE ) );
225         item.setDate( document.getUpdateDate( ) );
226         item.setSummary( document.getDescription( ) );
227         item.setTitle( document.getName( ) );
228         item.setType( TYPE );
229         item.setSite( SolrIndexerService.getWebAppName( ) );
230         item.setRole( "none" );
231         String portlet = listBlogPublications.stream( ).map( BlogPublication::getIdPortlet ).map( String::valueOf )
232                 .collect( Collectors.joining( SolrConstants.CONSTANT_AND ) );
233         item.setDocPortletId( portlet );
234 
235         // Reload the full object to get all its searchable attributes
236         UrlItem url = new UrlItem( SolrIndexerService.getBaseUrl( ) );
237         url.addParameter( PARAMETER_XPAGE, XPAGE_BLOG );
238         url.addParameter( PARAMETER_BLOG_ID, document.getId( ) );
239         url.addParameter( PARAMETER_PORTLET_ID, listBlogPublications.get( 0 ).getIdPortlet( ) );
240         item.setUrl( url.getUrl( ) );
241 
242         // Date Hierarchy
243         GregorianCalendar calendar = new GregorianCalendar( );
244         calendar.setTime( document.getUpdateDate( ) );
245         item.setHieDate( calendar.get( Calendar.YEAR ) + "/" + ( calendar.get( Calendar.MONTH ) + 1 ) + "/"
246                 + calendar.get( Calendar.DAY_OF_MONTH ) + "/" );
247 
248         List<String> categorie = new ArrayList<>( );
249 
250         for ( Tag cat : document.getTag( ) )
251         {
252             categorie.add( cat.getName( ) );
253         }
254 
255         item.setCategorie( categorie );
256 
257         // The content
258         String strContentToIndex = getContentToIndex( document, item );
259         try
260         {
261             ContentHandler handler =  TikaIndexerUtil.parseHtml( strContentToIndex );
262             item.setContent( handler.toString( ) );
263             
264             List<DocContent> list = DocContentHome.getDocsContentByHtmlDoc( document.getId( ) );
265             if ( CollectionUtils.isNotEmpty( list ) )
266             {
267                 // Parse All Doc Contents
268                 TikaIndexerUtil.addFileContentToSolrItem( item, list.stream( ).map( DocContent::getBinaryValue ).collect( Collectors.toList( ) ) );
269             }
270         }
271         catch ( LuteceSolrException e )
272         {
273             throw new AppException( DOC_PARSING_ERROR, e );
274         }
275 
276         return item;
277     }
278 
279     /**
280      * GEt the content to index
281      * 
282      * @param document The document
283      * @param item     The SolR item
284      * @return The content
285      */
286     private static String getContentToIndex( Blog document, SolrItem item )
287     {
288         StringBuilder sbContentToIndex = new StringBuilder( );
289         sbContentToIndex.append( document.getName( ) );
290         sbContentToIndex.append( " " );
291         sbContentToIndex.append( document.getHtmlContent( ) );
292         sbContentToIndex.append( " " );
293         sbContentToIndex.append( document.getDescription( ) );
294 
295         item.addDynamicField( COMMENT, document.getEditComment( ) );
296         item.addDynamicField( LABEL, document.getContentLabel( ) );
297         item.addDynamicField( HTML_CONTENT, document.getHtmlContent( ) );
298         return sbContentToIndex.toString( );
299     }
300 
301     // GETTERS & SETTERS
302     /**
303      * Returns the name of the indexer.
304      *
305      * @return the name of the indexer
306      */
307     @Override
308     public String getName( )
309     {
310         return AppPropertiesService.getProperty( PROPERTY_NAME );
311     }
312 
313     /**
314      * Returns the version.
315      *
316      * @return the version.
317      */
318     @Override
319     public String getVersion( )
320     {
321         return AppPropertiesService.getProperty( PROPERTY_VERSION );
322     }
323     
324     /**
325      * {@inheritDoc}
326      */
327     @Override
328     public String getDescription( )
329     {
330         return AppPropertiesService.getProperty( PROPERTY_DESCRIPTION );
331     }
332 
333     /**
334      * {@inheritDoc}
335      */
336     @Override
337     public List<Field> getAdditionalFields( )
338     {
339         return new ArrayList<>( );
340     }
341 
342     /**
343      * {@inheritDoc}
344      */
345     @Override
346     public List<SolrItem> getDocuments( String strIdDocument )
347     {
348         List<SolrItem> lstItems = new ArrayList<>( );
349 
350         int nIdDocument = Integer.parseInt( strIdDocument );
351         Blog document = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( nIdDocument );
352 
353         try
354         {
355             SolrItem sorlItem = getItem( document );
356             if ( sorlItem != null )
357             {
358                 lstItems.add( sorlItem );
359             }
360         }
361         catch ( Exception e )
362         {
363             throw new AppException( e.getMessage( ), e );
364         }
365 
366         return lstItems;
367     }
368 
369     /**
370      * {@inheritDoc}
371      */
372     @Override
373     public List<String> getResourcesName( )
374     {
375         return LIST_RESSOURCES_NAME;
376     }
377 
378     /**
379      * {@inheritDoc}
380      */
381     @Override
382     public String getResourceUid( String strResourceId, String strResourceType )
383     {
384         StringBuilder sb = new StringBuilder( strResourceId );
385         sb.append( SolrConstants.CONSTANT_UNDERSCORE ).append( SHORT_NAME );
386 
387         return sb.toString( );
388     }
389 }