IndexationService.java
/*
* Copyright (c) 2002-2022, City of Paris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright notice
* and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice
* and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* License 1.0
*/
package fr.paris.lutece.portal.service.search;
import java.io.IOException;
import java.io.Serializable;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import fr.paris.lutece.portal.business.indexeraction.IndexerAction;
import fr.paris.lutece.portal.business.indexeraction.IndexerActionFilter;
import fr.paris.lutece.portal.business.indexeraction.IndexerActionHome;
import fr.paris.lutece.portal.service.init.LuteceInitException;
import fr.paris.lutece.portal.service.message.SiteMessageException;
import fr.paris.lutece.portal.service.util.AppLogService;
import fr.paris.lutece.portal.service.util.AppPathService;
import fr.paris.lutece.portal.service.util.AppPropertiesService;
/**
* This class provides management methods for indexing
*/
public final class IndexationService
{
// Constants corresponding to the variables defined in the lutece.properties file
public static final String PATH_INDEX = "search.lucene.indexPath";
public static final String PATH_INDEX_IN_WEBAPP = "search.lucene.indexInWebapp";
public static final String PARAM_FORCING = "forcing";
public static final int ALL_DOCUMENT = -1;
public static final Version LUCENE_INDEX_VERSION = Version.LATEST;
private static final String PARAM_TYPE_PAGE = "Page";
private static final String PROPERTY_ANALYSER_CLASS_NAME = "search.lucene.analyser.className";
private static String _strIndex;
private static Analyzer _analyzer;
private static Map<String, SearchIndexer> _mapIndexers = new ConcurrentHashMap<>( );
private static IndexWriter _writer;
private static StringBuilder _sbLogs;
private static SearchIndexerComparator _comparator = new SearchIndexerComparator( );
/**
* The private constructor
*/
private IndexationService( )
{
}
/**
* Initalizes the service
*
* @throws LuteceInitException
* If an error occured
*/
public static void init( ) throws LuteceInitException
{
// Read configuration properties
boolean indexInWebapp = AppPropertiesService.getPropertyBoolean( PATH_INDEX_IN_WEBAPP, true );
if ( indexInWebapp )
{
_strIndex = AppPathService.getPath( PATH_INDEX );
}
else
{
_strIndex = AppPropertiesService.getProperty( PATH_INDEX );
}
if ( StringUtils.isEmpty( _strIndex ) )
{
throw new LuteceInitException( "Lucene index path not found in lucene.properties", null );
}
String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );
if ( StringUtils.isEmpty( strAnalyserClassName ) )
{
throw new LuteceInitException( "Analyser class name not found in lucene.properties", null );
}
try
{
_analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
}
catch( Exception e )
{
throw new LuteceInitException( "Failed to load Lucene Analyzer class", e );
}
}
/**
* Register an indexer
*
* @param indexer
* The indexer to add to the registry
*/
public static void registerIndexer( SearchIndexer indexer )
{
if ( indexer != null )
{
_mapIndexers.put( indexer.getName( ), indexer );
AppLogService.info( "New search indexer registered : " + indexer.getName( ) );
}
}
/**
* Unregister an indexer. The indexer is only removed if its name has not changed
*
* @param indexer
* the indexer to remove from the registry
*/
public static void unregisterIndexer( SearchIndexer indexer )
{
if ( indexer != null )
{
if ( _mapIndexers.remove( indexer.getName( ), indexer ) )
{
AppLogService.info( "Search indexer unregistered : " + indexer.getName( ) );
}
else
{
AppLogService.error( "Search indexer " + indexer.getName( ) + " could not be be unregistered" );
}
}
}
/**
* Process the indexing
*
* @param bCreate
* Force creating the index
* @return the result log of the indexing
*/
public static synchronized String processIndexing( boolean bCreate )
{
_sbLogs = new StringBuilder( );
_writer = null;
boolean bCreateIndex = bCreate;
Directory dir = null;
try
{
dir = IndexationService.getDirectoryIndex( );
if ( !DirectoryReader.indexExists( dir ) )
{ // init index
bCreateIndex = true;
}
Date start = new Date( );
IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
if ( bCreateIndex )
{
conf.setOpenMode( OpenMode.CREATE );
}
else
{
conf.setOpenMode( OpenMode.APPEND );
}
_writer = new IndexWriter( dir, conf );
if ( bCreateIndex )
{
processFullIndexing( );
}
else
{
processIncrementalIndexing( );
}
Date end = new Date( );
_sbLogs.append( "Duration of the treatment : " );
_sbLogs.append( end.getTime( ) - start.getTime( ) );
_sbLogs.append( " milliseconds\r\n" );
}
catch( Exception e )
{
error( "Indexing error ", e, "" );
}
finally
{
try
{
if ( _writer != null )
{
_writer.close( );
}
}
catch( IOException e )
{
AppLogService.error( e.getMessage( ), e );
}
try
{
if ( dir != null )
{
dir.close( );
}
}
catch( IOException e )
{
AppLogService.error( e.getMessage( ), e );
}
}
return _sbLogs.toString( );
}
/**
* Process all contents
*/
private static void processFullIndexing( )
{
_sbLogs.append( "\r\nIndexing all contents ...\r\n" );
for ( SearchIndexer indexer : getIndexerListSortedByName( ) )
{
// catch any exception coming from an indexer to prevent global indexation to fail
try
{
if ( indexer.isEnable( ) )
{
_sbLogs.append( "\r\n<strong>Indexer : " );
_sbLogs.append( indexer.getName( ) );
_sbLogs.append( " - " );
_sbLogs.append( indexer.getDescription( ) );
_sbLogs.append( "</strong>\r\n" );
// the indexer will call write(doc)
indexer.indexDocuments( );
}
}
catch( Exception e )
{
error( indexer, e, StringUtils.EMPTY );
}
}
removeAllIndexerAction( );
}
/**
* Process incremental indexing
*
* @throws CorruptIndexException
* if an error occurs
* @throws IOException
* if an error occurs
* @throws InterruptedException
* if an error occurs
* @throws SiteMessageException
* if an error occurs
*/
private static void processIncrementalIndexing( ) throws IOException, InterruptedException, SiteMessageException
{
_sbLogs.append( "\r\nIncremental Indexing ...\r\n" );
// incremental indexing
Collection<IndexerAction> actions = IndexerActionHome.getList( );
for ( IndexerAction action : actions )
{
// catch any exception coming from an indexer to prevent global indexation to fail
try
{
processIndexAction( action );
}
catch( Exception e )
{
error( action, e, StringUtils.EMPTY );
}
}
// reindexing all pages.
_writer.deleteDocuments( new Term( SearchItem.FIELD_TYPE, PARAM_TYPE_PAGE ) );
_mapIndexers.get( PageIndexer.INDEXER_NAME ).indexDocuments( );
}
private static void processIndexAction( IndexerAction action ) throws IOException, InterruptedException, SiteMessageException
{
SearchIndexer indexer = _mapIndexers.get( action.getIndexerName( ) );
if ( action.getIdTask( ) == IndexerAction.TASK_DELETE )
{
deleteDocument( action );
}
else
{
List<Document> luceneDocuments = indexer.getDocuments( action.getIdDocument( ) );
if ( CollectionUtils.isNotEmpty( luceneDocuments ) )
{
for ( Document doc : luceneDocuments )
{
if ( ( action.getIdPortlet( ) == ALL_DOCUMENT ) || ( ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) != null )
&& ( doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ).equals( doc.get( SearchItem.FIELD_UID ) + "&" + action.getIdPortlet( ) ) ) ) )
{
processDocument( action, doc );
}
}
}
}
removeIndexerAction( action.getIdAction( ) );
}
/**
* Delete a document from the index
*
* @param action
* The current action
* @throws CorruptIndexException
* if an error occurs
* @throws IOException
* if an error occurs
*/
private static void deleteDocument( IndexerAction action ) throws IOException
{
if ( action.getIdPortlet( ) != ALL_DOCUMENT )
{
// delete only the index linked to this portlet
_writer.deleteDocuments(
new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID, action.getIdDocument( ) + "&" + Integer.toString( action.getIdPortlet( ) ) ) );
}
else
{
// delete all index linked to uid
_writer.deleteDocuments( new Term( SearchItem.FIELD_UID, action.getIdDocument( ) ) );
}
_sbLogs.append( "Deleting #" ).append( action.getIdDocument( ) ).append( "\r\n" );
}
/**
* Create or update the index for a given document
*
* @param action
* The current action
* @param doc
* The document
* @throws CorruptIndexException
* if an error occurs
* @throws IOException
* if an error occurs
*/
private static void processDocument( IndexerAction action, Document doc ) throws IOException
{
if ( action.getIdTask( ) == IndexerAction.TASK_CREATE )
{
_writer.addDocument( doc );
logDoc( "Adding ", doc );
}
else
if ( action.getIdTask( ) == IndexerAction.TASK_MODIFY )
{
if ( action.getIdPortlet( ) != ALL_DOCUMENT )
{
// delete only the index linked to this portlet
_writer.updateDocument( new Term( SearchItem.FIELD_DOCUMENT_PORTLET_ID, doc.get( SearchItem.FIELD_DOCUMENT_PORTLET_ID ) ), doc );
}
else
{
_writer.updateDocument( new Term( SearchItem.FIELD_UID, doc.getField( SearchItem.FIELD_UID ).stringValue( ) ), doc );
}
logDoc( "Updating ", doc );
}
}
/**
* Index one document, called by plugin indexers
*
* @param doc
* the document to index
* @throws CorruptIndexException
* corruptIndexException
* @throws IOException
* i/o exception
*/
public static void write( Document doc ) throws IOException
{
_writer.addDocument( doc );
logDoc( "Indexing ", doc );
}
/**
* Log an action made on a document
*
* @param strAction
* The action
* @param doc
* The document
*/
private static void logDoc( String strAction, Document doc )
{
_sbLogs.append( strAction );
_sbLogs.append( doc.get( SearchItem.FIELD_TYPE ) );
_sbLogs.append( " #" );
_sbLogs.append( doc.get( SearchItem.FIELD_UID ) );
_sbLogs.append( " - " );
_sbLogs.append( doc.get( SearchItem.FIELD_TITLE ) );
_sbLogs.append( "\r\n" );
}
/**
* Log the error for the search indexer.
*
* @param indexer
* the {@link SearchIndexer}
* @param e
* the exception
* @param strMessage
* the str message
*/
public static void error( SearchIndexer indexer, Exception e, String strMessage )
{
String strTitle = "Indexer : " + indexer.getName( );
error( strTitle, e, strMessage );
}
/**
* Log the error for the indexer action.
*
* @param action
* the {@link IndexerAction}
* @param e
* the exception
* @param strMessage
* the str message
*/
public static void error( IndexerAction action, Exception e, String strMessage )
{
String strTitle = "Action from indexer : " + action.getIndexerName( );
strTitle += ( " Action ID : " + action.getIdAction( ) + " - Document ID : " + action.getIdDocument( ) );
error( strTitle, e, strMessage );
}
/**
* Log an exception
*
* @param strTitle
* The title of the error
* @param e
* The exception to log
* @param strMessage
* The message
*/
private static void error( String strTitle, Exception e, String strMessage )
{
_sbLogs.append( "</pre>\r\n" );
_sbLogs.append( "<div class=\"alert alert-danger\">\r\n" );
_sbLogs.append( strTitle );
_sbLogs.append( " - ERROR : " );
_sbLogs.append( "<strong>\r\n" );
_sbLogs.append( e.getMessage( ) );
_sbLogs.append( "</strong>\r\n" );
if ( e.getCause( ) != null )
{
_sbLogs.append( " : " );
_sbLogs.append( "<strong>\r\n" );
_sbLogs.append( e.getCause( ).getMessage( ) );
_sbLogs.append( "</strong>\r\n" );
}
if ( StringUtils.isNotBlank( strMessage ) )
{
_sbLogs.append( " - " );
_sbLogs.append( "<strong>\r\n" );
_sbLogs.append( strMessage );
_sbLogs.append( "</strong>\r\n" );
}
_sbLogs.append( "</div>\r\n" );
_sbLogs.append( "<pre>" );
AppLogService.error( "Indexing error : " + e.getMessage( ), e );
}
/**
* Gets the current IndexSearcher.
*
* @return IndexSearcher
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static Directory getDirectoryIndex( ) throws IOException
{
return FSDirectory.open( Paths.get( _strIndex ) );
}
/**
* Gets the current analyser
*
* @return The analyser
*/
public static Analyzer getAnalyser( )
{
return _analyzer;
}
/**
* Returns all search indexers
*
* @return A collection of indexers
*/
public static Collection<SearchIndexer> getIndexers( )
{
return _mapIndexers.values( );
}
/**
* return a list of IndexerAction by task key
*
* @param nIdTask
* the task kety
* @return a list of IndexerAction
*/
public static List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
{
IndexerActionFilter filter = new IndexerActionFilter( );
filter.setIdTask( nIdTask );
return IndexerActionHome.getList( filter );
}
/**
* Remove a Indexer Action
*
* @param nIdAction
* the key of the action to remove
*
*/
public static void removeIndexerAction( int nIdAction )
{
IndexerActionHome.remove( nIdAction );
}
/**
* Remove all Indexer Action
*
*/
public static void removeAllIndexerAction( )
{
IndexerActionHome.removeAll( );
}
/**
* Add Indexer Action to perform on a record
*
* @param strIdDocument
* the id of the document
* @param indexerName
* the name of the indexer
* @param nIdTask
* the key of the action to do
* @param nIdPortlet
* id of the portlet
*/
public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask, int nIdPortlet )
{
IndexerAction indexerAction = new IndexerAction( );
indexerAction.setIdDocument( strIdDocument );
indexerAction.setIdTask( nIdTask );
indexerAction.setIndexerName( indexerName );
indexerAction.setIdPortlet( nIdPortlet );
IndexerActionHome.create( indexerAction );
}
/**
* Add Indexer Action to perform on a record
*
* @param strIdDocument
* the id of the document
* @param indexerName
* the name of the indexer
* @param nIdTask
* the key of the action to do
*/
public static void addIndexerAction( String strIdDocument, String indexerName, int nIdTask )
{
addIndexerAction( strIdDocument, indexerName, nIdTask, ALL_DOCUMENT );
}
/**
* Gets a sorted list of registered indexers
*
* @return The list
*/
private static List<SearchIndexer> getIndexerListSortedByName( )
{
List<SearchIndexer> list = new ArrayList<>( _mapIndexers.values( ) );
Collections.sort( list, _comparator );
return list;
}
/**
* Comparator to sort indexer
*/
private static class SearchIndexerComparator implements Comparator<SearchIndexer>, Serializable
{
private static final long serialVersionUID = -3800252801777838562L;
/**
* {@inheritDoc}
*/
@Override
public int compare( SearchIndexer si1, SearchIndexer si2 )
{
return si1.getName( ).compareToIgnoreCase( si2.getName( ) );
}
}
}