BlogSearchService.java

/*
 * Copyright (c) 2002-2021, City of Paris
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice
 *     and the following disclaimer.
 *
 *  2. Redistributions in binary form must reproduce the above copyright notice
 *     and the following disclaimer in the documentation and/or other materials
 *     provided with the distribution.
 *
 *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
 *     contributors may be used to endorse or promote products derived from
 *     this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * License 1.0
 */
package fr.paris.lutece.plugins.blog.service.docsearch;

import fr.paris.lutece.plugins.blog.business.BlogSearchFilter;
import fr.paris.lutece.plugins.blog.business.IndexerAction;
import fr.paris.lutece.plugins.blog.business.IndexerActionFilter;
import fr.paris.lutece.plugins.blog.business.IndexerActionHome;
import fr.paris.lutece.plugins.blog.service.BlogPlugin;
import fr.paris.lutece.portal.service.search.LuceneSearchEngine;
import fr.paris.lutece.portal.service.search.SearchItem;
import fr.paris.lutece.portal.service.search.SearchResult;
import fr.paris.lutece.portal.service.spring.SpringContextService;
import fr.paris.lutece.portal.service.util.AppException;
import fr.paris.lutece.portal.service.util.AppLogService;
import fr.paris.lutece.portal.service.util.AppPathService;
import fr.paris.lutece.portal.service.util.AppPropertiesService;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;

/**
 * Blog Search Service
 */
public final class BlogSearchService
{
    private static final String PROPERTY_WRITER_MERGE_FACTOR = "blog.internalIndexer.lucene.writer.mergeFactor";
    private static final String PROPERTY_ANALYSER_CLASS_NAME = "blog.internalIndexer.lucene.analyser.className";
    private static final String PATH_INDEX = "blog.internalIndexer.lucene.indexPath";
    private volatile String _strIndex;
    private static final String WILDCARD = "*";

    // Default values
    private static final int DEFAULT_WRITER_MERGE_FACTOR = 20;

    // Constants corresponding to the variables defined in the lutece.properties file
    private static BlogSearchService _singleton;

    private Analyzer _analyzer;
    private IBlogSearchIndexer _indexer;
    private int _nWriterMergeFactor;

    /**
     * Creates a new instance of DirectorySearchService
     */
    private BlogSearchService( )
    {
        _strIndex = AppPathService.getPath( PATH_INDEX );
        if ( _strIndex == null )
        {
            throw new AppException( "Index path not defined. Property : blog.internalIndexer.lucene.indexPath in blogs.properties" );
        }

        _nWriterMergeFactor = AppPropertiesService.getPropertyInt( PROPERTY_WRITER_MERGE_FACTOR, DEFAULT_WRITER_MERGE_FACTOR );

        String strAnalyserClassName = AppPropertiesService.getProperty( PROPERTY_ANALYSER_CLASS_NAME );

        if ( ( strAnalyserClassName == null ) || ( strAnalyserClassName.equals( "" ) ) )
        {
            throw new AppException( "Analyser class name not found in blogs.properties", null );
        }

        _indexer = SpringContextService.getBean( "blog.blogIndexer" );

        try
        {
            _analyzer = (Analyzer) Class.forName( strAnalyserClassName ).newInstance( );
        }
        catch( ClassNotFoundException | IllegalAccessException | InstantiationException e )
        {
            throw new AppException( "Failed to load Lucene Analyzer class", e );
        }
    }

    /**
     * Get the HelpdeskSearchService instance
     *
     * @return The {@link BlogSearchService}
     */
    public static BlogSearchService getInstance( )
    {
        if ( _singleton == null )
        {
            _singleton = new BlogSearchService( );
        }
        return _singleton;
    }

    /**
     * Return search results
     *
     * @param filter
     *            The search filter
     * @param listIdBlog
     *            Results as a collection of id of blog posts
     * @return The total number of items found
     */
    public int getSearchResults( BlogSearchFilter filter, List<Integer> listIdBlog )
    {
        int nNbItems = 0;

        try
        {
            List<SearchResult> listResults = new ArrayList<>( );
            nNbItems = getSearchResultsByFilter( filter, listResults );

            for ( SearchResult searchResult : listResults )
            {
                if ( searchResult.getId( ) != null )
                {
                    listIdBlog.add( Integer.parseInt( searchResult.getId( ) ) );
                }
            }
        }
        catch( NumberFormatException e )
        {
            AppLogService.error( e.getMessage( ), e );
            // If an error occurred clean result list
            listIdBlog.clear( );
        }

        return nNbItems;
    }

    /**
     * Process indexing
     *
     * @param bCreate
     *            true for start full indexing false for begin incremental indexing
     * @return the log
     */
    public String processIndexing( boolean bCreate )
    {
        StringBuilder sbLogs = new StringBuilder( );
        IndexWriter writer = null;
        boolean bCreateIndex = bCreate;

        try
        {
            sbLogs.append( "\r\nIndexing all contents ...\r\n" );

            Directory dir = FSDirectory.open( Paths.get( _strIndex ) );

            // Nouveau
            if ( !DirectoryReader.indexExists( dir ) )
            { // init index
                bCreateIndex = true;
            }

            IndexWriterConfig conf = new IndexWriterConfig( _analyzer );
            LogMergePolicy mergePolicy = new LogDocMergePolicy( );
            mergePolicy.setMergeFactor( _nWriterMergeFactor );
            conf.setMergePolicy( mergePolicy );

            if ( bCreateIndex )
            {
                conf.setOpenMode( OpenMode.CREATE );
            }
            else
            {
                conf.setOpenMode( OpenMode.APPEND );
            }
            writer = new IndexWriter( dir, conf );

            Date start = new Date( );

            sbLogs.append( "\r\n<strong>Indexer : " );
            sbLogs.append( _indexer.getName( ) );
            sbLogs.append( " - " );
            sbLogs.append( _indexer.getDescription( ) );
            sbLogs.append( "</strong>\r\n" );
            _indexer.processIndexing( writer, bCreateIndex, sbLogs );

            Date end = new Date( );

            sbLogs.append( "Duration of the treatment : " );
            sbLogs.append( end.getTime( ) - start.getTime( ) );
            sbLogs.append( " milliseconds\r\n" );

        }
        catch( Exception e )
        {
            sbLogs.append( " caught a " );
            sbLogs.append( e.getClass( ) );
            sbLogs.append( "\n with message: " );
            sbLogs.append( e.getMessage( ) );
            sbLogs.append( "\r\n" );
            AppLogService.error( "Indexing error : " + e.getMessage( ), e );
        }
        finally
        {
            try
            {
                if ( writer != null )
                {
                    writer.close( );
                }
            }
            catch( IOException e )
            {
                AppLogService.error( e.getMessage( ), e );
            }
        }

        return sbLogs.toString( );
    }

    /**
     * Get search results
     *
     * @param filter
     *            The filter
     * @param listSearchResult
     *            The list of results
     * @return The result count
     */
    private int getSearchResultsByFilter( BlogSearchFilter filter, List<SearchResult> listSearchResult )
    {
        ArrayList<SearchItem> listResults = new ArrayList<>( );

        int nNbResults = 0;
        try ( Directory dir = FSDirectory.open( Paths.get( _strIndex ) ) ; DirectoryReader reader = DirectoryReader.open( dir ) ; )
        {
            IndexSearcher searcher = new IndexSearcher( reader );

            Query queryMulti = prepareQueryForFilter( filter );

            Sort sorter = new Sort( );
            String field = BlogSearchItem.FIELD_DATE_UPDATE;
            SortField.Type type = SortField.Type.LONG;
            boolean descending = true;

            SortField sortField = new SortField( field, type, descending );

            sorter.setSort( sortField );

            TopDocs topDocs = searcher.search( queryMulti, LuceneSearchEngine.MAX_RESPONSES, sorter );
            ScoreDoc [ ] hits = topDocs.scoreDocs;
            nNbResults = hits.length;

            for ( int i = 0; i < nNbResults; i++ )
            {
                int docId = hits [i].doc;
                Document document = searcher.doc( docId );
                SearchItem si = new SearchItem( document );
                listResults.add( si );
            }
            searcher.getIndexReader( ).close( );
        }
        catch( Exception e )
        {
            AppLogService.error( e.getMessage( ), e );
        }

        convertList( listResults, listSearchResult );
        return nNbResults;
    }

    private Query prepareQueryForFilter( BlogSearchFilter filter ) throws org.apache.lucene.queryparser.classic.ParseException
    {
        boolean bDateAfter = false;
        boolean bDateBefore = false;
        Collection<String> queries = new ArrayList<>( );
        Collection<String> sectors = new ArrayList<>( );
        Collection<BooleanClause.Occur> flags = new ArrayList<>( );

        if ( filter.getKeywords( ) != null && StringUtils.isNotBlank( filter.getKeywords( ) ) )
        {
            Term term = new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) );
            Query termQuery = new TermQuery( term );
            queries.add( termQuery.toString( ) );
            sectors.add( SearchItem.FIELD_CONTENTS );
            flags.add( BooleanClause.Occur.MUST );

        }
        if ( filter.getTag( ) != null )
        {
            for ( String tag : filter.getTag( ) )
            {
                Term term = new Term( BlogSearchItem.FIELD_TAGS, tag );
                Query termQuery = new TermQuery( term );
                queries.add( termQuery.toString( ) );
                sectors.add( BlogSearchItem.FIELD_TAGS );
                flags.add( BooleanClause.Occur.MUST );
            }

        }
        if ( filter.getUser( ) != null )
        {
            Term term = new Term( BlogSearchItem.FIELD_USER, filter.getUser( ) + WILDCARD );
            Query termQuery = new TermQuery( term );
            queries.add( termQuery.toString( ) );
            sectors.add( BlogSearchItem.FIELD_USER );
            flags.add( BooleanClause.Occur.MUST );

        }
        if ( filter.getUserEditedBlogVersion( ) != null )
        {
            Term term = new Term( BlogSearchItem.FIELD_USERS_EDITED_BLOG, filter.getUserEditedBlogVersion( ) );
            Query termQuery = new TermQuery( term );
            queries.add( termQuery.toString( ) );
            sectors.add( BlogSearchItem.FIELD_USERS_EDITED_BLOG );
            flags.add( BooleanClause.Occur.MUST );

        }

        if ( filter.getUpdateDateAfter( ) != null || filter.getUpdateDateBefor( ) != null )
        {
            BytesRef strAfter = null;
            BytesRef strBefore = null;

            if ( filter.getUpdateDateAfter( ) != null )
            {
                strAfter = new BytesRef( DateTools.dateToString( filter.getUpdateDateAfter( ), DateTools.Resolution.MINUTE ) );
                bDateAfter = true;
            }

            if ( filter.getUpdateDateBefor( ) != null )
            {
                Date dateBefore = filter.getUpdateDateBefor( );
                strBefore = new BytesRef( DateTools.dateToString( dateBefore, DateTools.Resolution.MINUTE ) );
                bDateBefore = true;
            }

            Query queryDate = new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
            queries.add( queryDate.toString( ) );
            sectors.add( SearchItem.FIELD_DATE );
            flags.add( BooleanClause.Occur.MUST );
        }

        if ( filter.getIsUnpulished( ) > 0 )
        {
            Term termIsUnpublished = new Term( BlogSearchItem.FIELD_UNPUBLISHED, String.valueOf( filter.getIsUnpulished( ) == 1 ) );
            Query termQueryIsUnpublished = new TermQuery( termIsUnpublished );
            queries.add( termQueryIsUnpublished.toString( ) );
            sectors.add( BlogSearchItem.FIELD_UNPUBLISHED );
            flags.add( BooleanClause.Occur.MUST );
        }

        Term term = new Term( SearchItem.FIELD_TYPE, BlogPlugin.PLUGIN_NAME );
        Query termQuery = new TermQuery( term );
        queries.add( termQuery.toString( ) );
        sectors.add( SearchItem.FIELD_TYPE );
        flags.add( BooleanClause.Occur.MUST );

        return MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
                flags.toArray( new BooleanClause.Occur [ flags.size( )] ), _analyzer );
    }

    /**
     * Add Indexer Action to perform on a record
     *
     * @param nIdBlog
     *            Blog id
     * @param nIdTask
     *            the key of the action to do
     */
    public void addIndexerAction( int nIdBlog, int nIdTask )
    {
        IndexerAction indexerAction = new IndexerAction( );
        indexerAction.setIdBlog( nIdBlog );
        indexerAction.setIdTask( nIdTask );
        IndexerActionHome.create( indexerAction );
    }

    /**
     * Remove a Indexer Action
     *
     * @param nIdAction
     *            the key of the action to remove
     */
    public void removeIndexerAction( int nIdAction )
    {
        IndexerActionHome.remove( nIdAction );
    }

    /**
     * return a list of IndexerAction by task key
     *
     * @param nIdTask
     *            the task key
     * @return a list of IndexerAction
     */
    public List<IndexerAction> getAllIndexerActionByTask( int nIdTask )
    {
        IndexerActionFilter filter = new IndexerActionFilter( );
        filter.setIdTask( nIdTask );

        return IndexerActionHome.getList( filter );
    }

    /**
     * Convert the SearchItem list on SearchResult list
     *
     * @param listSource
     *            The source list
     * @param listSearchResult
     *            The result list
     */
    private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
    {
        for ( SearchItem item : listSource )
        {
            SearchResult result = new SearchResult( );
            result.setId( item.getId( ) );

            try
            {
                result.setDate( DateTools.stringToDate( item.getDate( ) ) );
            }
            catch( ParseException e )
            {
                AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
            }

            result.setUrl( item.getUrl( ) );
            result.setTitle( item.getTitle( ) );
            result.setSummary( item.getSummary( ) );
            result.setType( item.getType( ) );
            listSearchResult.add( result );
        }
    }

}