AnnounceLuceneSearchEngine.java

/*
 * Copyright (c) 2002-2021, City of Paris
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice
 *     and the following disclaimer.
 *
 *  2. Redistributions in binary form must reproduce the above copyright notice
 *     and the following disclaimer in the documentation and/or other materials
 *     provided with the distribution.
 *
 *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
 *     contributors may be used to endorse or promote products derived from
 *     this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * License 1.0
 */
package fr.paris.lutece.plugins.announce.service.announcesearch;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Locale;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;

import fr.paris.lutece.plugins.announce.business.Announce;
import fr.paris.lutece.plugins.announce.business.AnnounceHome;
import fr.paris.lutece.plugins.announce.business.AnnounceSearchFilter;
import fr.paris.lutece.plugins.announce.business.AnnounceSort;
import fr.paris.lutece.plugins.announce.service.AnnouncePlugin;
import fr.paris.lutece.portal.service.plugin.Plugin;
import fr.paris.lutece.portal.service.search.SearchItem;
import fr.paris.lutece.portal.service.search.SearchResult;
import fr.paris.lutece.portal.service.util.AppLogService;

/**
 * LuceneSearchEngine
 */
public class AnnounceLuceneSearchEngine implements IAnnounceSearchEngine
{
    private static final int NO_CATEGORY = 0;
    private static final int NO_SECTOR = 0;
    private final SimpleDateFormat _dayFormat = new SimpleDateFormat( "yyyyMMdd", Locale.US );

    /**
     * {@inheritDoc}
     */
    @Override
    public int getSearchResults( AnnounceSearchFilter filter, Plugin plugin, List<SearchResult> listSearchResult, int nPage, int nItemsPerPage )
    {
        ArrayList<SearchItem> listResults = new ArrayList<>( );
        IndexSearcher searcher;

        int nNbResults = 0;

        try
        {
            searcher = AnnounceSearchService.getInstance( ).getSearcher( );

            Collection<String> queries = new ArrayList<>( );
            Collection<String> sectors = new ArrayList<>( );
            Collection<BooleanClause.Occur> flags = new ArrayList<>( );

            // Category id
            if ( filter.getIdCategory( ) != NO_CATEGORY )
            {
                Query queryCategoryId = new TermQuery( new Term( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( filter.getIdCategory( ) ) ) );
                queries.add( queryCategoryId.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_CATEGORY_ID );
                flags.add( BooleanClause.Occur.MUST );
            }

            // Category id
            if ( filter.getIdSector( ) != NO_SECTOR )
            {
                Query querySectorId = new TermQuery( new Term( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( filter.getIdSector( ) ) ) );
                queries.add( querySectorId.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_SECTOR_ID );
                flags.add( BooleanClause.Occur.MUST );
            }

            // Type (=announce)
            PhraseQuery.Builder queryTypeBuilder = new PhraseQuery.Builder( );
            // add character "e" to TYPE because field is not analyzed when added to lucene document, but it's analyzed then in MultiFieldQueryParser.parse
            // method
            queryTypeBuilder.add( new Term( SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME + "e" ) );
            queries.add( queryTypeBuilder.build( ).toString( ) );
            sectors.add( SearchItem.FIELD_TYPE );
            flags.add( BooleanClause.Occur.MUST );

            // Keywords in title or description
            if ( StringUtils.isNotBlank( filter.getKeywords( ) ) )
            {
                PhraseQuery.Builder queryContentBuilder = new PhraseQuery.Builder( );
                queryContentBuilder.add( new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) ) );
                queries.add( queryContentBuilder.build( ).toString( ) );
                sectors.add( SearchItem.FIELD_CONTENTS );
                flags.add( BooleanClause.Occur.MUST );
            }

            // contains range date
            if ( ( filter.getDateMin( ) != null ) || ( filter.getDateMax( ) != null ) )
            {
                Date dateMinToSearch = new Date( 0L );
                Date dateMaxToSearch = new Date( );
                if ( filter.getDateMin( ) != null )
                {
                    dateMinToSearch = filter.getDateMin( );
                }

                if ( filter.getDateMax( ) != null )
                {
                    dateMaxToSearch = filter.getDateMax( );
                }

                // String stringDateMin = DateUtil.
                String strLowerTerm = _dayFormat.format( dateMinToSearch );
                String strUpperTerm = _dayFormat.format( dateMaxToSearch );
                BytesRef bRLowerTerm = new BytesRef( strLowerTerm );
                BytesRef bRUpperTerm = new BytesRef( strUpperTerm );
                Query queryRangeDate = new TermRangeQuery( SearchItem.FIELD_DATE, bRLowerTerm, bRUpperTerm, true, true );
                queries.add( queryRangeDate.toString( ) );
                sectors.add( SearchItem.FIELD_DATE );
                flags.add( BooleanClause.Occur.MUST );
            }

            // contains range price
            if ( ( filter.getPriceMin( ) > 0 ) || ( filter.getPriceMax( ) > 0 ) )
            {
                int nPriceMin = ( filter.getPriceMin( ) > 0 ) ? filter.getPriceMin( ) : 0;
                int nPriceMax = ( filter.getPriceMax( ) > 0 ) ? filter.getPriceMax( ) : Integer.MAX_VALUE;
                Query queryRangePrice = new TermRangeQuery( AnnounceSearchItem.FIELD_PRICE,
                        new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMin ) ),
                        new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMax ) ), true, true );
                queries.add( queryRangePrice.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_PRICE );
                flags.add( BooleanClause.Occur.MUST );
            }

            Query queryMulti = MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
                    flags.toArray( new BooleanClause.Occur [ flags.size( )] ), AnnounceSearchService.getInstance( ).getAnalyzer( ) );

            TopDocs topDocs = searcher.search( queryMulti, 1000000 );
            ScoreDoc [ ] hits = topDocs.scoreDocs;
            nNbResults = hits.length;

            // We only get the documents of the current page
            int nFrom = ( nPage - 1 ) * nItemsPerPage;

            if ( nFrom < 0 )
            {
                nFrom = 0;
            }

            int nTo = ( nPage * nItemsPerPage );

            if ( ( nTo == 0 ) || ( nTo > nNbResults ) )
            {
                nTo = nNbResults;
            }

            for ( int i = nFrom; i < nTo; i++ )
            {
                int docId = hits [i].doc;
                Document document = searcher.doc( docId );
                SearchItem si = new SearchItem( document );
                listResults.add( si );
            }
        }
        catch( Exception e )
        {
            AppLogService.error( e.getMessage( ), e );
        }
        convertList( listResults, listSearchResult );

        return nNbResults;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int getSearchResultsBis( AnnounceSearchFilter filter, Plugin plugin, List<Announce> listAnnouncesResult, int nPage, int nItemsPerPage,
            AnnounceSort anSort )
    {
        ArrayList<SearchItem> listResults = new ArrayList<>( );
        List<Integer> listIdAnnounces = new ArrayList<>( );
        IndexSearcher searcher;

        Date dateMinToSearch;
        Date dateMaxToSearch;
        int nNbResults = 0;
        try
        {
            searcher = AnnounceSearchService.getInstance( ).getSearcher( );

            Collection<String> queries = new ArrayList<>( );
            Collection<String> sectors = new ArrayList<>( );
            Collection<BooleanClause.Occur> flags = new ArrayList<>( );

            // Category id
            if ( filter.getIdCategory( ) != NO_CATEGORY )
            {
                Query queryCategoryId = new TermQuery( new Term( AnnounceSearchItem.FIELD_CATEGORY_ID, String.valueOf( filter.getIdCategory( ) ) ) );
                queries.add( queryCategoryId.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_CATEGORY_ID );
                flags.add( BooleanClause.Occur.MUST );
            }

            // Category id
            if ( filter.getIdSector( ) != NO_SECTOR )
            {
                Query querySectorId = new TermQuery( new Term( AnnounceSearchItem.FIELD_SECTOR_ID, String.valueOf( filter.getIdSector( ) ) ) );
                queries.add( querySectorId.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_SECTOR_ID );
                flags.add( BooleanClause.Occur.MUST );
            }

            // Type (=announce)
            PhraseQuery.Builder queryTypeBuilder = new PhraseQuery.Builder( );
            // add character "e" to TYPE because field is not analyzed when added to lucene document, but it's analyzed then in MultiFieldQueryParser.parse
            // method
            queryTypeBuilder.add( new Term( SearchItem.FIELD_TYPE, AnnouncePlugin.PLUGIN_NAME + "e" ) );
            queries.add( queryTypeBuilder.build( ).toString( ) );
            sectors.add( SearchItem.FIELD_TYPE );
            flags.add( BooleanClause.Occur.MUST );

            // Keywords in title or description
            if ( StringUtils.isNotBlank( filter.getKeywords( ) ) )
            {
                PhraseQuery.Builder queryContentBuilder = new PhraseQuery.Builder( );
                queryContentBuilder.add( new Term( SearchItem.FIELD_CONTENTS, filter.getKeywords( ) ) );
                queries.add( queryContentBuilder.build( ).toString( ) );
                sectors.add( SearchItem.FIELD_CONTENTS );
                flags.add( BooleanClause.Occur.MUST );
            }

            // contains range date
            if ( ( filter.getDateMin( ) != null ) || ( filter.getDateMax( ) != null ) )
            {
                if ( filter.getDateMin( ) == null )
                {
                    dateMinToSearch = new Date( 0L );
                }
                else
                {
                    dateMinToSearch = filter.getDateMin( );
                }

                if ( filter.getDateMax( ) == null )
                {
                    dateMaxToSearch = new Date( );
                }
                else
                {
                    dateMaxToSearch = filter.getDateMax( );
                }

                // String stringDateMin = DateUtil.
                String strLowerTerm = _dayFormat.format( dateMinToSearch );
                String strUpperTerm = _dayFormat.format( dateMaxToSearch );
                BytesRef bRLowerTerm = new BytesRef( strLowerTerm );
                BytesRef bRUpperTerm = new BytesRef( strUpperTerm );
                Query queryRangeDate = new TermRangeQuery( SearchItem.FIELD_DATE, bRLowerTerm, bRUpperTerm, true, true );
                queries.add( queryRangeDate.toString( ) );
                sectors.add( SearchItem.FIELD_DATE );
                flags.add( BooleanClause.Occur.MUST );
            }

            // contains range price
            if ( ( filter.getPriceMin( ) > 0 ) || ( filter.getPriceMax( ) > 0 ) )
            {
                int nPriceMin = ( filter.getPriceMin( ) > 0 ) ? filter.getPriceMin( ) : 0;
                int nPriceMax = ( filter.getPriceMax( ) > 0 ) ? filter.getPriceMax( ) : Integer.MAX_VALUE;
                Query queryRangePrice = new TermRangeQuery( AnnounceSearchItem.FIELD_PRICE,
                        new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMin ) ),
                        new BytesRef( AnnounceSearchService.formatPriceForIndexer( nPriceMax ) ), true, true );
                queries.add( queryRangePrice.toString( ) );
                sectors.add( AnnounceSearchItem.FIELD_PRICE );
                flags.add( BooleanClause.Occur.MUST );
            }

            Query queryMulti = MultiFieldQueryParser.parse( queries.toArray( new String [ queries.size( )] ), sectors.toArray( new String [ sectors.size( )] ),
                    flags.toArray( new BooleanClause.Occur [ flags.size( )] ), AnnounceSearchService.getInstance( ).getAnalyzer( ) );

            TopDocs topDocs = searcher.search( queryMulti, 1000000 );
            ScoreDoc [ ] hits = topDocs.scoreDocs;
            nNbResults = hits.length;

            // -------------------------------------------------
            for ( int i = 0; i < nNbResults; i++ )
            {
                int docId = hits [i].doc;
                Document document = searcher.doc( docId );
                SearchItem si = new SearchItem( document );
                listResults.add( si );
            }
            for ( SearchItem searchResult : listResults )
            {
                if ( searchResult.getId( ) != null )
                {
                    listIdAnnounces.add( Integer.parseInt( searchResult.getId( ) ) );
                }
            }

            List<Announce> listAnnounces = AnnounceHome.findByListId( listIdAnnounces, anSort );

            // -------------------------------------------------

            // We only get the documents of the current page
            int nFrom = ( nPage - 1 ) * nItemsPerPage;

            if ( nFrom < 0 )
            {
                nFrom = 0;
            }

            int nTo = ( nPage * nItemsPerPage );

            if ( ( nTo == 0 ) || ( nTo > nNbResults ) )
            {
                nTo = nNbResults;
            }

            for ( int i = nFrom; i < nTo; i++ )
            {
                listAnnouncesResult.add( listAnnounces.get( i ) );
            }
        }
        catch( Exception e )
        {
            AppLogService.error( e.getMessage( ), e );
        }

        return nNbResults;
    }

    /**
     * Convert the SearchItem list on SearchResult list
     * 
     * @param listSource
     *            The source list
     * @param listSearchResult
     *            The result list
     */
    private void convertList( List<SearchItem> listSource, List<SearchResult> listSearchResult )
    {
        for ( SearchItem item : listSource )
        {
            SearchResult result = new SearchResult( );
            result.setId( item.getId( ) );

            try
            {
                result.setDate( DateTools.stringToDate( item.getDate( ) ) );
            }
            catch( ParseException e )
            {
                AppLogService.error( "Bad Date Format for indexed item \"" + item.getTitle( ) + "\" : " + e.getMessage( ) );
            }

            result.setUrl( item.getUrl( ) );
            result.setTitle( item.getTitle( ) );
            result.setSummary( item.getSummary( ) );
            result.setType( item.getType( ) );
            listSearchResult.add( result );
        }
    }
}