View Javadoc
1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.dila.service.search;
35  
36  import fr.paris.lutece.plugins.dila.business.enums.AudienceCategoryEnum;
37  import fr.paris.lutece.plugins.dila.business.fichelocale.dto.XmlDTO;
38  import fr.paris.lutece.plugins.dila.service.IDilaXmlService;
39  import fr.paris.lutece.portal.service.message.SiteMessageException;
40  import fr.paris.lutece.portal.service.search.IndexationService;
41  import fr.paris.lutece.portal.service.search.PageIndexer;
42  import fr.paris.lutece.portal.service.search.SearchIndexer;
43  import fr.paris.lutece.portal.service.search.SearchItem;
44  import fr.paris.lutece.portal.service.spring.SpringContextService;
45  import fr.paris.lutece.portal.service.util.AppPropertiesService;
46  import fr.paris.lutece.util.url.UrlItem;
47  
48  import java.io.IOException;
49  import java.util.ArrayList;
50  import java.util.List;
51  
52  import org.apache.lucene.document.DateTools;
53  import org.apache.lucene.document.Document;
54  import org.apache.lucene.document.Field;
55  
56  
57  /**
58   * DILA Xml indexer
59   */
60  public class DilaXMLIndexer implements SearchIndexer
61  {
62      public static final String INDEX_TYPE_XML = "DILA XML";
63      public static final String INDEXER_NAME = "DilaXMLIndexer";
64      protected static final String PARAMETER_PAGE_ID = "xmlFile";
65      protected static final String PARAMETER_PAGE_CATEGORIE = "categorie";
66      protected static final String PROPERTY_PAGE_BASE_URL = "dila.pageIndexer.baseUrl";
67      protected static final String PROPERTY_SEARCH_PAGE_URL = "search.pageSearch.baseUrl";
68      protected static final String PROPERTY_INDEXER_ENABLE = "dila.pageIndexer.enable";
69      private static final String INDEXER_DESCRIPTION = "DILA service for XML";
70      private static final String INDEXER_VERSION = "1.0.0";
71      private IDilaXmlService _dilaXmlService = SpringContextService.getBean( "dilaXmlService" );
72  
73      @Override
74      public void indexDocuments( ) throws IOException, InterruptedException, SiteMessageException
75      {
76          List<XmlDTO> listXml = _dilaXmlService.findAll( );
77  
78          for ( XmlDTO xml : listXml )
79          {
80              Document doc = null;
81  
82              try
83              {
84                  doc = getDocument( xml );
85              }
86              catch ( Exception e )
87              {
88                  String strMessage = "Page ID : " + xml.getId( );
89                  IndexationService.error( this, e, strMessage );
90              }
91  
92              if ( doc != null )
93              {
94                  IndexationService.write( doc );
95              }
96          }
97      }
98  
99      @Override
100     public List<Document> getDocuments( String strIdDocument ) throws IOException, InterruptedException,
101             SiteMessageException
102     {
103         return null;
104     }
105 
106     @Override
107     public String getName( )
108     {
109         return INDEXER_NAME;
110     }
111 
112     @Override
113     public String getVersion( )
114     {
115         return INDEXER_VERSION;
116     }
117 
118     @Override
119     public String getDescription( )
120     {
121         return INDEXER_DESCRIPTION;
122     }
123 
124     @Override
125     public boolean isEnable( )
126     {
127         String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE, "true" );
128 
129         return ( strEnable.equalsIgnoreCase( "true" ) );
130     }
131 
132     @Override
133     public List<String> getListType( )
134     {
135         List<String> listType = new ArrayList<String>( );
136         listType.add( PageIndexer.INDEX_TYPE_PAGE );
137 
138         return listType;
139     }
140 
141     @Override
142     public String getSpecificSearchAppUrl( )
143     {
144         return AppPropertiesService.getProperty( PROPERTY_SEARCH_PAGE_URL );
145     }
146 
147     /**
148      * Builds a document which will be used by Lucene during the indexing of the
149      * pages of the site with the following
150      * fields : summary, uid, url, contents, title and description.
151      * @return the built Document
152      * @param xml the xml to index
153      * @throws IOException The IO Exception
154      * @throws InterruptedException The InterruptedException
155      * @throws SiteMessageException occurs when a site message need to be
156      *             displayed
157      */
158     protected Document getDocument( XmlDTO xml ) throws IOException, InterruptedException, SiteMessageException
159     {
160         String strPageBaseUrl = AppPropertiesService.getProperty( PROPERTY_PAGE_BASE_URL );
161 
162         // make a new, empty document
163         Document doc = new Document( );
164 
165         // Add the url as a field named "url".  Use an UnIndexed field, so
166         // that the url is just stored with the document, but is not searchable.
167         doc.add( new Field( SearchItem.FIELD_TYPE, xml.getResourceType( ), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
168 
169         String strDate = null;
170 
171         if ( xml.getModificationDate( ) != null )
172         {
173             strDate = DateTools.dateToString( xml.getModificationDate( ), DateTools.Resolution.DAY );
174         }
175         else
176         {
177             strDate = DateTools.dateToString( xml.getCreationDate( ), DateTools.Resolution.DAY );
178         }
179 
180         doc.add( new Field( SearchItem.FIELD_DATE, strDate, Field.Store.YES, Field.Index.NOT_ANALYZED ) );
181 
182         // Add the url as a field named "url".  Use an UnIndexed field, so
183         // that the url is just stored with the document, but is not searchable.
184         UrlItem url = new UrlItem( strPageBaseUrl );
185         url.addParameter( PARAMETER_PAGE_ID, xml.getIdXml( ) );
186         url.addParameter( PARAMETER_PAGE_CATEGORIE, AudienceCategoryEnum.fromId( xml.getIdAudience( ) ).getLabel( ) );
187 
188         doc.add( new Field( SearchItem.FIELD_URL, url.getUrl( ), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
189 
190         StringBuilder content = new StringBuilder( );
191         content.append( xml.getIdXml( ) );
192         content.append( " " );
193         content.append( xml.getTitle( ) );
194 
195         doc.add( new Field( SearchItem.FIELD_CONTENTS, content.toString( ), Field.Store.NO, Field.Index.ANALYZED ) );
196 
197         // Add the uid as a field, so that index can be incrementally maintained.
198         // This field is not stored with document, it is indexed, but it is not
199         // tokenized prior to indexing.
200         String strIdPage = xml.getIdXml( );
201         doc.add( new Field( SearchItem.FIELD_UID, strIdPage, Field.Store.NO, Field.Index.NOT_ANALYZED ) );
202 
203         // Add the tag-stripped contents as a Reader-valued Text field so it will
204         // get tokenized and indexed.
205         doc.add( new Field( SearchItem.FIELD_TITLE, xml.getTitle( ), Field.Store.YES, Field.Index.NOT_ANALYZED ) );
206 
207         // return the document
208         return doc;
209     }
210 }