LuceneSearchEngine.java

  1. /*
  2.  * Copyright (c) 2002-2022, City of Paris
  3.  * All rights reserved.
  4.  *
  5.  * Redistribution and use in source and binary forms, with or without
  6.  * modification, are permitted provided that the following conditions
  7.  * are met:
  8.  *
  9.  *  1. Redistributions of source code must retain the above copyright notice
  10.  *     and the following disclaimer.
  11.  *
  12.  *  2. Redistributions in binary form must reproduce the above copyright notice
  13.  *     and the following disclaimer in the documentation and/or other materials
  14.  *     provided with the distribution.
  15.  *
  16.  *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
  17.  *     contributors may be used to endorse or promote products derived from
  18.  *     this software without specific prior written permission.
  19.  *
  20.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21.  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22.  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23.  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
  24.  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25.  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26.  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27.  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28.  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29.  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30.  * POSSIBILITY OF SUCH DAMAGE.
  31.  *
  32.  * License 1.0
  33.  */
  34. package fr.paris.lutece.portal.service.search;

  35. import java.text.ParseException;
  36. import java.util.ArrayList;
  37. import java.util.Arrays;
  38. import java.util.Date;
  39. import java.util.List;
  40. import java.util.Locale;

  41. import javax.servlet.http.HttpServletRequest;

  42. import org.apache.commons.collections.CollectionUtils;
  43. import org.apache.commons.lang3.StringUtils;
  44. import org.apache.commons.lang3.ArrayUtils;
  45. import org.apache.lucene.document.DateTools;
  46. import org.apache.lucene.document.DateTools.Resolution;
  47. import org.apache.lucene.document.Document;
  48. import org.apache.lucene.index.DirectoryReader;
  49. import org.apache.lucene.index.IndexReader;
  50. import org.apache.lucene.index.Term;
  51. import org.apache.lucene.queryparser.classic.QueryParser;
  52. import org.apache.lucene.queryparser.classic.QueryParserBase;
  53. import org.apache.lucene.search.BooleanClause;
  54. import org.apache.lucene.search.BooleanQuery;
  55. import org.apache.lucene.search.IndexSearcher;
  56. import org.apache.lucene.search.Query;
  57. import org.apache.lucene.search.ScoreDoc;
  58. import org.apache.lucene.search.TermQuery;
  59. import org.apache.lucene.search.TermRangeQuery;
  60. import org.apache.lucene.search.TopDocs;
  61. import org.apache.lucene.store.Directory;
  62. import org.apache.lucene.util.BytesRef;

  63. import fr.paris.lutece.portal.business.page.Page;
  64. import fr.paris.lutece.portal.service.security.LuteceUser;
  65. import fr.paris.lutece.portal.service.security.SecurityService;
  66. import fr.paris.lutece.portal.service.util.AppLogService;
  67. import fr.paris.lutece.util.date.DateUtil;

  68. /**
  69.  * LuceneSearchEngine
  70.  */
  71. public class LuceneSearchEngine implements SearchEngine
  72. {
  73.     public static final int MAX_RESPONSES = 1000000;
  74.     private static final String PARAMETER_TYPE_FILTER = "type_filter";
  75.     private static final String PARAMETER_DATE_AFTER = "date_after";
  76.     private static final String PARAMETER_DATE_BEFORE = "date_before";
  77.     private static final String PARAMETER_TAG_FILTER = "tag_filter";
  78.     private static final String PARAMETER_DEFAULT_OPERATOR = "default_operator";
  79.     private static final String PARAMETER_OPERATOR_AND = "AND";

  80.     /**
  81.      * Return search results
  82.      *
  83.      * @param strQuery
  84.      *            The search query
  85.      * @param request
  86.      *            The HTTP request
  87.      * @return Results as a collection of SearchResult
  88.      */
  89.     public List<SearchResult> getSearchResults( String strQuery, HttpServletRequest request )
  90.     {
  91.         List<Query> listFilter = new ArrayList<>( );
  92.         boolean bFilterResult = false;

  93.         if ( SecurityService.isAuthenticationEnable( ) )
  94.         {
  95.             LuteceUser user = SecurityService.getInstance( ).getRegisteredUser( request );

  96.             Query [ ] filtersRole = null;

  97.             if ( user != null )
  98.             {
  99.                 String [ ] userRoles = SecurityService.getInstance( ).getRolesByUser( user );

  100.                 if ( userRoles != null )
  101.                 {
  102.                     filtersRole = new Query [ userRoles.length + 1];

  103.                     for ( int i = 0; i < userRoles.length; i++ )
  104.                     {
  105.                         Query queryRole = new TermQuery( new Term( SearchItem.FIELD_ROLE, userRoles [i] ) );
  106.                         filtersRole [i] = queryRole;
  107.                     }
  108.                 }
  109.                 else
  110.                 {
  111.                     bFilterResult = true;
  112.                 }
  113.             }
  114.             else
  115.             {
  116.                 filtersRole = new Query [ 1];
  117.             }

  118.             if ( !bFilterResult )
  119.             {
  120.                 Query queryRole = new TermQuery( new Term( SearchItem.FIELD_ROLE, Page.ROLE_NONE ) );
  121.                 filtersRole [filtersRole.length - 1] = queryRole;
  122.                 BooleanQuery.Builder booleanQueryBuilderRole = new BooleanQuery.Builder( );
  123.                 Arrays.asList( filtersRole ).stream( ).forEach( filterRole -> booleanQueryBuilderRole.add( filterRole, BooleanClause.Occur.SHOULD ) );

  124.                 listFilter.add( booleanQueryBuilderRole.build( ) );
  125.             }
  126.         }

  127.         String [ ] typeFilter = request.getParameterValues( PARAMETER_TYPE_FILTER );
  128.         String strDateAfter = request.getParameter( PARAMETER_DATE_AFTER );
  129.         String strDateBefore = request.getParameter( PARAMETER_DATE_BEFORE );
  130.         Query allFilter = buildFinalFilter( listFilter, strDateAfter, strDateBefore, typeFilter, request.getLocale( ) );

  131.         String strTagFilter = request.getParameter( PARAMETER_TAG_FILTER );
  132.         return search( strTagFilter, strQuery, allFilter, request, bFilterResult );
  133.     }

  134.     private Query buildFinalFilter( List<Query> listFilter, String strDateAfter, String strDateBefore, String [ ] typeFilter, Locale locale )
  135.     {
  136.         Query filterDate = createFilterDate( strDateAfter, strDateBefore, locale );
  137.         if ( filterDate != null )
  138.         {
  139.             listFilter.add( filterDate );
  140.         }

  141.         Query filterType = createFilterType( typeFilter );
  142.         if ( filterType != null )
  143.         {
  144.             listFilter.add( filterType );
  145.         }

  146.         Query allFilter = null;
  147.         if ( CollectionUtils.isNotEmpty( listFilter ) )
  148.         {
  149.             BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder( );
  150.             for ( Query filter : listFilter )
  151.             {
  152.                 booleanQueryBuilder.add( filter, BooleanClause.Occur.MUST );
  153.             }
  154.             allFilter = booleanQueryBuilder.build( );
  155.         }
  156.         return allFilter;
  157.     }

  158.     private Query createFilterType( String [ ] typeFilter )
  159.     {
  160.         if ( ArrayUtils.isNotEmpty( typeFilter ) && !typeFilter [0].equals( SearchService.TYPE_FILTER_NONE ) )
  161.         {
  162.             BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder( );
  163.             for ( int i = 0; i < typeFilter.length; i++ )
  164.             {
  165.                 Query queryType = new TermQuery( new Term( SearchItem.FIELD_TYPE, typeFilter [i] ) );
  166.                 booleanQueryBuilder.add( queryType, BooleanClause.Occur.SHOULD );
  167.             }
  168.             return booleanQueryBuilder.build( );
  169.         }
  170.         return null;
  171.     }

  172.     private Query createFilterDate( String strDateAfter, String strDateBefore, Locale locale )
  173.     {
  174.         boolean bDateAfter = false;
  175.         boolean bDateBefore = false;

  176.         if ( StringUtils.isNotBlank( strDateAfter ) || StringUtils.isNotBlank( strDateBefore ) )
  177.         {
  178.             BytesRef strAfter = null;
  179.             BytesRef strBefore = null;

  180.             if ( StringUtils.isNotBlank( strDateAfter ) )
  181.             {
  182.                 Date dateAfter = DateUtil.formatDate( strDateAfter, locale );
  183.                 strAfter = new BytesRef( DateTools.dateToString( dateAfter, Resolution.DAY ) );
  184.                 bDateAfter = true;
  185.             }

  186.             if ( StringUtils.isNotBlank( strDateBefore ) )
  187.             {
  188.                 Date dateBefore = DateUtil.formatDate( strDateBefore, locale );
  189.                 strBefore = new BytesRef( DateTools.dateToString( dateBefore, Resolution.DAY ) );
  190.                 bDateBefore = true;
  191.             }

  192.             return new TermRangeQuery( SearchItem.FIELD_DATE, strAfter, strBefore, bDateAfter, bDateBefore );
  193.         }
  194.         return null;
  195.     }

  196.     private List<SearchResult> search( String strTagFilter, String strQuery, Query allFilter, HttpServletRequest request, boolean bFilterResult )
  197.     {
  198.         List<SearchItem> listResults = new ArrayList<>( );
  199.         try ( Directory directory = IndexationService.getDirectoryIndex( ) ; IndexReader ir = DirectoryReader.open( directory ) ; )
  200.         {
  201.             IndexSearcher searcher = new IndexSearcher( ir );

  202.             BooleanQuery.Builder bQueryBuilder = new BooleanQuery.Builder( );

  203.             if ( StringUtils.isNotBlank( strTagFilter ) )
  204.             {
  205.                 QueryParser parser = new QueryParser( SearchItem.FIELD_METADATA, IndexationService.getAnalyser( ) );

  206.                 String formatQuery = ( strQuery != null ) ? strQuery : "";

  207.                 Query queryMetaData = parser.parse( formatQuery );
  208.                 bQueryBuilder.add( queryMetaData, BooleanClause.Occur.SHOULD );

  209.                 parser = new QueryParser( SearchItem.FIELD_SUMMARY, IndexationService.getAnalyser( ) );

  210.                 Query querySummary = parser.parse( formatQuery );
  211.                 bQueryBuilder.add( querySummary, BooleanClause.Occur.SHOULD );
  212.             }
  213.             else
  214.             {
  215.                 QueryParser parser = new QueryParser( SearchItem.FIELD_CONTENTS, IndexationService.getAnalyser( ) );

  216.                 String operator = request.getParameter( PARAMETER_DEFAULT_OPERATOR );

  217.                 if ( PARAMETER_OPERATOR_AND.equals( operator ) )
  218.                 {
  219.                     parser.setDefaultOperator( QueryParserBase.AND_OPERATOR );
  220.                 }

  221.                 Query queryContent = parser.parse( ( StringUtils.isNotBlank( strQuery ) ) ? strQuery : "" );
  222.                 bQueryBuilder.add( queryContent, BooleanClause.Occur.SHOULD );
  223.             }

  224.             Query query = bQueryBuilder.build( );

  225.             if ( allFilter != null )
  226.             {
  227.                 BooleanQuery.Builder bQueryBuilderWithFilter = new BooleanQuery.Builder( );
  228.                 bQueryBuilderWithFilter.add( allFilter, BooleanClause.Occur.FILTER );
  229.                 bQueryBuilderWithFilter.add( query, BooleanClause.Occur.MUST );
  230.                 query = bQueryBuilderWithFilter.build( );
  231.             }

  232.             // Get results documents
  233.             TopDocs topDocs = searcher.search( query, MAX_RESPONSES );
  234.             ScoreDoc [ ] hits = topDocs.scoreDocs;

  235.             for ( int i = 0; i < hits.length; i++ )
  236.             {
  237.                 int docId = hits [i].doc;
  238.                 Document document = searcher.doc( docId );
  239.                 SearchItem si = new SearchItem( document );

  240.                 if ( ( !bFilterResult ) || ( si.getRole( ).equals( Page.ROLE_NONE ) )
  241.                         || ( SecurityService.getInstance( ).isUserInRole( request, si.getRole( ) ) ) )
  242.                 {
  243.                     listResults.add( si );
  244.                 }
  245.             }
  246.         }
  247.         catch( Exception e )
  248.         {
  249.             AppLogService.error( e.getMessage( ), e );
  250.         }
  251.         return convertList( listResults );
  252.     }

  253.     /**
  254.      * Convert a list of Lucene items into a list of generic search items
  255.      *
  256.      * @param listSource
  257.      *            The list of Lucene items
  258.      * @return A list of generic search items
  259.      */
  260.     private List<SearchResult> convertList( List<SearchItem> listSource )
  261.     {
  262.         List<SearchResult> listDest = new ArrayList<>( );

  263.         for ( SearchItem item : listSource )
  264.         {
  265.             SearchResult result = new SearchResult( );
  266.             result.setId( item.getId( ) );

  267.             try
  268.             {
  269.                 result.setDate( DateTools.stringToDate( item.getDate( ) ) );
  270.             }
  271.             catch( ParseException e )
  272.             {
  273.                 AppLogService.error( "Bad Date Format for indexed item \"{}\" : {}", item.getTitle( ), e.getMessage( ), e );
  274.             }

  275.             result.setUrl( item.getUrl( ) );
  276.             result.setTitle( item.getTitle( ) );
  277.             result.setSummary( item.getSummary( ) );
  278.             result.setType( item.getType( ) );
  279.             listDest.add( result );
  280.         }

  281.         return listDest;
  282.     }
  283. }