View Javadoc
1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.dila.modules.solr.utils.parsers;
35  
36  import fr.paris.lutece.plugins.dila.business.fichelocale.dto.LocalDTO;
37  import fr.paris.lutece.plugins.dila.service.IDilaLocalService;
38  import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
39  import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
40  import fr.paris.lutece.portal.service.content.XPageAppService;
41  import fr.paris.lutece.portal.service.spring.SpringContextService;
42  import fr.paris.lutece.portal.service.util.AppLogService;
43  import fr.paris.lutece.portal.service.util.AppPropertiesService;
44  import fr.paris.lutece.util.url.UrlItem;
45  
46  import org.apache.commons.collections.CollectionUtils;
47  import org.apache.commons.lang.StringUtils;
48  
49  import org.xml.sax.Attributes;
50  import org.xml.sax.SAXException;
51  import org.xml.sax.helpers.DefaultHandler;
52  
53  import java.io.ByteArrayInputStream;
54  import java.io.IOException;
55  import java.io.InputStream;
56  
57  import java.text.ParseException;
58  import java.text.SimpleDateFormat;
59  
60  import java.util.ArrayList;
61  import java.util.Arrays;
62  import java.util.Date;
63  import java.util.List;
64  import java.util.Locale;
65  
66  import javax.xml.parsers.ParserConfigurationException;
67  import javax.xml.parsers.SAXParser;
68  import javax.xml.parsers.SAXParserFactory;
69  
70  
71  /**
72   * Parser for local cards (dila)
73   */
74  public class DilaSolrLocalParser extends DefaultHandler
75  {
76      // -------------
77      // - Constants -
78      // -------------
79      // Plugin name
80      private static final String PROPERTY_PLUGIN_NAME = "dila.plugin.name";
81  
82      // XPath comparisons
83      private static final String PROPERTY_XPATH_CARD = "dila.parser.xpath.local.card";
84      private static final String PROPERTY_XPATH_DATE = "dila.parser.xpath.local.date";
85      private static final String PROPERTY_XPATH_TITLE = "dila.parser.xpath.local.title";
86      private static final String PROPERTY_XPATH_AUDIENCE = "dila.parser.xpath.local.audience";
87      private static final String PROPERTY_ATTRIBUTE_ID = "dila.parser.xpath.local.attribute.id";
88  
89      // Index type
90      private static final String PROPERTY_INDEXING_TYPE = "dila-solr.indexing.localType";
91  
92      // Path contents
93      private static final String PROPERTY_PATH_ID = "dila.parser.path.id";
94      private static final String PROPERTY_PATH_CATEGORY = "dila.parser.path.category";
95  
96      // Strings
97      private static final String STRING_EMPTY = "";
98      private static final String STRING_SLASH = "/";
99      private static final String STRING_SPACE = " ";
100 
101     // -------------
102     // - Variables -
103     // -------------
104     // List of Solr items
105     private List<SolrItem> _listSolrItems;
106 
107     // XPath
108     private String _strXPath;
109 
110     // Contents
111     private String _strId;
112     private String _strDate;
113     private String _strType;
114     private String _strSite;
115     private String _strProdUrl;
116     private String _strTitle;
117     private String _strAudience;
118     private String _strContents;
119 
120     // Services
121     private IDilaLocalService _dilaLocalService = SpringContextService.getBean( "dilaLocalService" );
122 
123     /**
124      * Initializes and launches the parsing of the local cards (public
125      * constructor)
126      */
127     public DilaSolrLocalParser(  )
128     {
129         // Gets the local cards
130         List<LocalDTO> localCardsList = _dilaLocalService.findAll(  );
131 
132         // Initializes the SolrItem list
133         _listSolrItems = new ArrayList<SolrItem>(  );
134 
135         // Initializes the indexing type
136         _strType = AppPropertiesService.getProperty( PROPERTY_INDEXING_TYPE );
137 
138         // Initializes the site
139         _strSite = SolrIndexerService.getWebAppName(  );
140 
141         // Initializes the prod url
142         _strProdUrl = SolrIndexerService.getBaseUrl(  );
143 
144         try
145         {
146             // Initializes the SAX parser
147             SAXParserFactory factory = SAXParserFactory.newInstance(  );
148             SAXParser parser = factory.newSAXParser(  );
149 
150             // Launches the parsing on each local card
151             parseAllLocalCards( localCardsList, parser );
152         }
153         catch ( ParserConfigurationException e )
154         {
155             AppLogService.error( e.getMessage(  ), e );
156         }
157         catch ( SAXException e )
158         {
159             AppLogService.error( e.getMessage(  ), e );
160         }
161     }
162 
163     /**
164      * Launches the parsing on each local card
165      *
166      * @param localCardsList the local cards
167      * @param parser the SAX parser
168      */
169     private void parseAllLocalCards( List<LocalDTO> localCardsList, SAXParser parser )
170     {
171         if ( CollectionUtils.isNotEmpty( localCardsList ) )
172         {
173             for ( LocalDTO currentCard : localCardsList )
174             {
175                 InputStream xmlInput = new ByteArrayInputStream( currentCard.getXml(  ).getBytes(  ) );
176 
177                 // Launches the parsing of this local card (with the current handler)
178                 try
179                 {
180                     parser.parse( xmlInput, this );
181                 }
182                 catch ( SAXException e )
183                 {
184                     AppLogService.error( e.getMessage(  ), e );
185                 }
186                 catch ( IOException e )
187                 {
188                     AppLogService.error( e.getMessage(  ), e );
189                 }
190             }
191         }
192     }
193 
194     /**
195      * Event received when starting the parsing operation
196      *
197      * @throws SAXException any SAX exception
198      */
199     public void startDocument(  ) throws SAXException
200     {
201         // Initializes the XPATH
202         _strXPath = STRING_EMPTY;
203 
204         // Initializes the contents
205         _strId = STRING_EMPTY;
206         _strDate = STRING_EMPTY;
207         _strTitle = STRING_EMPTY;
208         _strContents = STRING_EMPTY;
209         _strAudience = STRING_EMPTY;
210     }
211 
212     /**
213      * Event received at the end of the parsing operation
214      *
215      * @throws SAXException any SAX exception
216      */
217     public void endDocument(  ) throws SAXException
218     {
219         // Sets the full URL
220         UrlItem url = new UrlItem( _strProdUrl );
221         url.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( PROPERTY_PLUGIN_NAME ) );
222         url.addParameter( AppPropertiesService.getProperty( PROPERTY_PATH_ID ), _strId );
223         url.addParameter( AppPropertiesService.getProperty( PROPERTY_PATH_CATEGORY ), _strAudience );
224 
225         // Converts the date from "dd MMMMM yyyy" to "yyyyMMdd"
226         Locale locale = Locale.FRENCH;
227         Date dateUpdate = null;
228 
229         try
230         {
231             SimpleDateFormat dateFormat = new SimpleDateFormat( "yyyy-MM-dd", locale );
232             String strDate = _strDate.split( STRING_SPACE )[1];
233             dateUpdate = dateFormat.parse( strDate );
234 
235             dateFormat.applyPattern( "yyyyMMdd" );
236         }
237         catch ( ParseException e )
238         {
239             dateUpdate = null;
240         }
241 
242         if ( StringUtils.isNotEmpty( _strId ) )
243         {
244             // Creates a new lucene document
245             SolrItem item = new SolrItem(  );
246 
247             item.setUrl( url.getUrl(  ) );
248             item.setDate( dateUpdate );
249             item.setUid( _strId );
250             item.setContent( _strContents );
251             item.setTitle( _strTitle );
252             item.setType( _strType );
253             item.setSite( _strSite );
254 
255             String[] categories = new String[] { _strAudience };
256             item.setCategorie( Arrays.asList( categories ) );
257 
258             // Adds the new item to the list
259             _listSolrItems.add( item );
260         }
261     }
262 
263     /**
264      * Event received at the start of an element
265      *
266      * @param uri the Namespace URI
267      * @param localName the local name
268      * @param qName the qualified XML name
269      * @param atts the attributes attached to the element
270      *
271      * @throws SAXException any SAX exception
272      */
273     public void startElement( String uri, String localName, String qName, Attributes atts )
274         throws SAXException
275     {
276         // Updates the XPath
277         _strXPath += ( STRING_SLASH + qName );
278 
279         // Gets the URL (attribute)
280         String strXPathCard = AppPropertiesService.getProperty( PROPERTY_XPATH_CARD );
281 
282         if ( ( _strXPath != null ) && _strXPath.equals( strXPathCard ) )
283         {
284             String strAttributeId = AppPropertiesService.getProperty( PROPERTY_ATTRIBUTE_ID );
285             _strId = atts.getValue( strAttributeId );
286         }
287     }
288 
289     /**
290      * Event received at the end of an element
291      *
292      * @param uri the Namespace URI
293      * @param localName the local name
294      * @param qName the qualified XML name
295      *
296      * @throws SAXException any SAX exception
297      */
298     public void endElement( String uri, String localName, String qName )
299         throws SAXException
300     {
301         // Updates the XPath
302         _strXPath = _strXPath.substring( 0, _strXPath.lastIndexOf( STRING_SLASH ) );
303     }
304 
305     /**
306      * Event received when the analyzer encounters text (between two tags)
307      *
308      * @param ch the characters from the XML document
309      * @param start the start position in the array
310      * @param length the number of characters to read from the array
311      *
312      * @throws SAXException any SAX exception
313      */
314     public void characters( char[] ch, int start, int length )
315         throws SAXException
316     {
317         // Gets the XPath comparisons properties
318         String strXPathDate = AppPropertiesService.getProperty( PROPERTY_XPATH_DATE );
319         String strXPathTitle = AppPropertiesService.getProperty( PROPERTY_XPATH_TITLE );
320         String strXPathAudience = AppPropertiesService.getProperty( PROPERTY_XPATH_AUDIENCE );
321 
322         // Gets the date
323         if ( ( _strXPath != null ) && _strXPath.equals( strXPathDate ) )
324         {
325             _strDate += new String( ch, start, length );
326         }
327 
328         // Gets the title
329         else if ( ( _strXPath != null ) && _strXPath.equals( strXPathTitle ) )
330         {
331             _strTitle += new String( ch, start, length );
332         }
333 
334         // Gets the audience
335         else if ( ( _strXPath != null ) && _strXPath.equals( strXPathAudience ) )
336         {
337             _strAudience += new String( ch, start, length );
338         }
339 
340         // Gets the contents
341         if ( ( _strContents != null ) && !_strContents.equals( STRING_EMPTY ) )
342         {
343             _strContents += ( STRING_SPACE + new String( ch, start, length ) );
344         }
345         else
346         {
347             _strContents += new String( ch, start, length );
348         }
349     }
350 
351     /**
352      * Gets the list of Solr items
353      *
354      * @return The list of Solr items
355      */
356     public List<SolrItem> getLocalSolrItems(  )
357     {
358         return _listSolrItems;
359     }
360 }