SolrBlogIndexer.java
/*
* Copyright (c) 2002-2020, City of Paris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright notice
* and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice
* and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* License 1.0
*/
package fr.paris.lutece.plugins.blog.modules.solr.indexer;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.xml.sax.ContentHandler;
import fr.paris.lutece.plugins.blog.business.Blog;
import fr.paris.lutece.plugins.blog.business.DocContent;
import fr.paris.lutece.plugins.blog.business.DocContentHome;
import fr.paris.lutece.plugins.blog.business.Tag;
import fr.paris.lutece.plugins.blog.business.portlet.BlogPublication;
import fr.paris.lutece.plugins.blog.service.BlogService;
import fr.paris.lutece.plugins.blog.utils.BlogUtils;
import fr.paris.lutece.plugins.search.solr.business.field.Field;
import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexer;
import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
import fr.paris.lutece.plugins.search.solr.util.LuteceSolrException;
import fr.paris.lutece.plugins.search.solr.util.SolrConstants;
import fr.paris.lutece.plugins.search.solr.util.TikaIndexerUtil;
import fr.paris.lutece.portal.service.util.AppException;
import fr.paris.lutece.portal.service.util.AppLogService;
import fr.paris.lutece.portal.service.util.AppPropertiesService;
import fr.paris.lutece.util.url.UrlItem;
/**
* The indexer service for Solr.
*
*/
public class SolrBlogIndexer implements SolrIndexer
{
public static final String BEAN_NAME = "blog-solr.solrBlogIndexer";
private static final String TYPE = "blogs";
private static final String COMMENT = "comment";
private static final String LABEL = "label";
private static final String HTML_CONTENT = "htmlContent";
private static final String PARAMETER_PORTLET_ID = "portlet_id";
private static final String PROPERTY_INDEXER_ENABLE = "solr.indexer.document.enable";
private static final String PROPERTY_NAME = "blog-solr.indexer.name";
private static final String PROPERTY_DESCRIPTION = "blog-solr.indexer.description";
private static final String PROPERTY_VERSION = "blog-solr.indexer.version";
private static final String PARAMETER_BLOG_ID = "id";
private static final String PARAMETER_XPAGE = "page";
private static final String XPAGE_BLOG = "blog";
private static final List<String> LIST_RESSOURCES_NAME = new ArrayList<>( );
private static final String SHORT_NAME = "blog";
private static final String DOC_INDEXATION_ERROR = "[SolrBlogIndexer] An error occured during the indexation of the document number ";
private static final String DOC_PARSING_ERROR = "[SolrBlogIndexer] Error during document parsing. ";
/**
* Creates a new SolrPageIndexer
*/
public SolrBlogIndexer( )
{
LIST_RESSOURCES_NAME.add( BlogUtils.CONSTANT_TYPE_RESOURCE );
}
@Override
public boolean isEnable( )
{
return "true".equalsIgnoreCase( AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE ) );
}
/**
* {@inheritDoc}
*/
@Override
public List<String> indexDocuments( )
{
List<String> lstErrors = new ArrayList<>( );
List<Integer> listDocument = new ArrayList<>( );
Collection<SolrItem> solrItems = new ArrayList<>( );
for ( Blog document : BlogService.getInstance( ).getListBlogWithoutBinaries( ) )
{
try
{
if ( !listDocument.contains( document.getId( ) ) )
{
// Generates the item to index
SolrItem item = getItem( document );
if ( item != null )
{
solrItems.add( item );
}
listDocument.add( document.getId( ) );
}
}
catch ( Exception e )
{
lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
AppLogService.error( DOC_INDEXATION_ERROR + document.getId( ), e );
}
}
if ( CollectionUtils.isNotEmpty( solrItems ) )
{
try
{
SolrIndexerService.write( solrItems );
}
catch ( Exception e )
{
lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
AppLogService.error( DOC_INDEXATION_ERROR, e );
}
}
return lstErrors;
}
/**
* Index list of documents
*
* @param listIdDocument
* @return error LIST
* @throws LuteceSolrException
*/
public List<String> indexListDocuments( List<Integer> listIdDocument ) throws LuteceSolrException
{
List<String> lstErrors = new ArrayList<>( );
Collection<SolrItem> solrItems = new ArrayList<>( );
for ( Integer d : listIdDocument )
{
Blog document = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( d );
// Generates the item to index
if ( document != null )
{
SolrItem item = getItem( document );
if ( item != null )
{
solrItems.add( item );
}
}
}
if ( CollectionUtils.isNotEmpty( solrItems ) )
{
try
{
SolrIndexerService.write( solrItems );
}
catch ( Exception e )
{
lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
AppLogService.error( DOC_INDEXATION_ERROR, e );
throw new LuteceSolrException( DOC_INDEXATION_ERROR, e );
}
}
return lstErrors;
}
/**
* Builds a document which will be used by solr during the indexing of the pages
* of the site with the following fields : summary, uid, url, contents, title
* and description.
*
* @param document The document
* @return The item
*/
private SolrItem getItem( Blog document )
{
// Search for published blogs.
Date today = new Date( );
List<BlogPublication> listBlogPublications = document.getBlogPublication( ).stream( ).filter(
bp -> bp.getDateBeginPublishing( ).before( today ) && bp.getDateEndPublishing( ).after( today ) )
.collect( Collectors.toList( ) );
if ( CollectionUtils.isEmpty( listBlogPublications ) )
{
return null;
}
// the item
SolrItem item = new SolrItem( );
item.setUid( getResourceUid( Integer.toString( document.getId( ) ), BlogUtils.CONSTANT_TYPE_RESOURCE ) );
item.setDate( document.getUpdateDate( ) );
item.setSummary( document.getDescription( ) );
item.setTitle( document.getName( ) );
item.setType( TYPE );
item.setSite( SolrIndexerService.getWebAppName( ) );
item.setRole( "none" );
String portlet = listBlogPublications.stream( ).map( BlogPublication::getIdPortlet ).map( String::valueOf )
.collect( Collectors.joining( SolrConstants.CONSTANT_AND ) );
item.setDocPortletId( portlet );
// Reload the full object to get all its searchable attributes
UrlItem url = new UrlItem( SolrIndexerService.getBaseUrl( ) );
url.addParameter( PARAMETER_XPAGE, XPAGE_BLOG );
url.addParameter( PARAMETER_BLOG_ID, document.getId( ) );
url.addParameter( PARAMETER_PORTLET_ID, listBlogPublications.get( 0 ).getIdPortlet( ) );
item.setUrl( url.getUrl( ) );
// Date Hierarchy
GregorianCalendar calendar = new GregorianCalendar( );
calendar.setTime( document.getUpdateDate( ) );
item.setHieDate( calendar.get( Calendar.YEAR ) + "/" + ( calendar.get( Calendar.MONTH ) + 1 ) + "/"
+ calendar.get( Calendar.DAY_OF_MONTH ) + "/" );
List<String> categorie = new ArrayList<>( );
for ( Tag cat : document.getTag( ) )
{
categorie.add( cat.getName( ) );
}
item.setCategorie( categorie );
// The content
String strContentToIndex = getContentToIndex( document, item );
try
{
ContentHandler handler = TikaIndexerUtil.parseHtml( strContentToIndex );
item.setContent( handler.toString( ) );
List<DocContent> list = DocContentHome.getDocsContentByHtmlDoc( document.getId( ) );
if ( CollectionUtils.isNotEmpty( list ) )
{
// Parse All Doc Contents
TikaIndexerUtil.addFileContentToSolrItem( item, list.stream( ).map( DocContent::getBinaryValue ).collect( Collectors.toList( ) ) );
}
}
catch ( LuteceSolrException e )
{
throw new AppException( DOC_PARSING_ERROR, e );
}
return item;
}
/**
* GEt the content to index
*
* @param document The document
* @param item The SolR item
* @return The content
*/
private static String getContentToIndex( Blog document, SolrItem item )
{
StringBuilder sbContentToIndex = new StringBuilder( );
sbContentToIndex.append( document.getName( ) );
sbContentToIndex.append( " " );
sbContentToIndex.append( document.getHtmlContent( ) );
sbContentToIndex.append( " " );
sbContentToIndex.append( document.getDescription( ) );
item.addDynamicField( COMMENT, document.getEditComment( ) );
item.addDynamicField( LABEL, document.getContentLabel( ) );
item.addDynamicField( HTML_CONTENT, document.getHtmlContent( ) );
return sbContentToIndex.toString( );
}
// GETTERS & SETTERS
/**
* Returns the name of the indexer.
*
* @return the name of the indexer
*/
@Override
public String getName( )
{
return AppPropertiesService.getProperty( PROPERTY_NAME );
}
/**
* Returns the version.
*
* @return the version.
*/
@Override
public String getVersion( )
{
return AppPropertiesService.getProperty( PROPERTY_VERSION );
}
/**
* {@inheritDoc}
*/
@Override
public String getDescription( )
{
return AppPropertiesService.getProperty( PROPERTY_DESCRIPTION );
}
/**
* {@inheritDoc}
*/
@Override
public List<Field> getAdditionalFields( )
{
return new ArrayList<>( );
}
/**
* {@inheritDoc}
*/
@Override
public List<SolrItem> getDocuments( String strIdDocument )
{
List<SolrItem> lstItems = new ArrayList<>( );
int nIdDocument = Integer.parseInt( strIdDocument );
Blog document = BlogService.getInstance( ).findByPrimaryKeyWithoutBinaries( nIdDocument );
try
{
SolrItem sorlItem = getItem( document );
if ( sorlItem != null )
{
lstItems.add( sorlItem );
}
}
catch ( Exception e )
{
throw new AppException( e.getMessage( ), e );
}
return lstItems;
}
/**
* {@inheritDoc}
*/
@Override
public List<String> getResourcesName( )
{
return LIST_RESSOURCES_NAME;
}
/**
* {@inheritDoc}
*/
@Override
public String getResourceUid( String strResourceId, String strResourceType )
{
StringBuilder sb = new StringBuilder( strResourceId );
sb.append( SolrConstants.CONSTANT_UNDERSCORE ).append( SHORT_NAME );
return sb.toString( );
}
}