View Javadoc
1   /*
2    * Copyright (c) 2002-2017, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.comarquage.modules.solr.utils.parsers;
35  
36  import java.io.File;
37  import java.io.IOException;
38  import java.text.ParseException;
39  import java.text.SimpleDateFormat;
40  import java.util.ArrayList;
41  import java.util.Date;
42  import java.util.List;
43  import java.util.Locale;
44  
45  import javax.xml.parsers.ParserConfigurationException;
46  import javax.xml.parsers.SAXParser;
47  import javax.xml.parsers.SAXParserFactory;
48  
49  import org.xml.sax.Attributes;
50  import org.xml.sax.SAXException;
51  import org.xml.sax.helpers.DefaultHandler;
52  
53  import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
54  import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
55  import fr.paris.lutece.plugins.search.solr.util.SolrConstants;
56  import fr.paris.lutece.portal.service.content.XPageAppService;
57  import fr.paris.lutece.portal.service.util.AppLogService;
58  import fr.paris.lutece.portal.service.util.AppPathService;
59  import fr.paris.lutece.portal.service.util.AppPropertiesService;
60  import fr.paris.lutece.util.url.UrlItem;
61  
62  
63  /**
64   * Parser for local cards (comarquage)
65   */
66  public class CoMarquageSolrLocalParser extends DefaultHandler
67  {
68      // -------------
69      // - Constants -
70      // -------------
71      // Plugin name
72      private static final String PROPERTY_PLUGIN_NAME = "comarquage.plugin.name";
73  
74      // Local cards path
75      private static final String PROPERTY_INDEXING_LOCAL_PATH = "comarquage.indexing.localBasePath";
76      private static final String PROPERTY_INDEXING_XML_BASE_VAR = "comarquage.path.xml";
77  
78      // XPath comparisons
79      private static final String PROPERTY_XPATH_CARD = "comarquage.parser.xpath.local.card";
80      private static final String PROPERTY_XPATH_DATE = "comarquage.parser.xpath.local.date";
81      private static final String PROPERTY_XPATH_TITLE = "comarquage.parser.xpath.local.title";
82      private static final String PROPERTY_ATTRIBUTE_URL = "comarquage.parser.xpath.local.attribute.url";
83  
84      // Index type
85      private static final String PROPERTY_INDEXING_TYPE = "comarquage-solr.indexing.localType";
86  
87      // Path contents
88      private static final String PROPERTY_PATH_ID = "comarquage.parser.path.id";
89      private static final String PROPERTY_PATH_FIRST_NODE = "comarquage.parser.path.first.node";
90  
91      // URL delimiter
92      private static final String PROPERTY_URL_DELIMITER = "comarquage.parser.url.local.delimiter";
93  
94      // Strings
95      private static final String STRING_EMPTY = "";
96      private static final String STRING_POINT = ".";
97      private static final String STRING_SLASH = "/";
98      private static final String STRING_SPACE = " ";
99      private static final String SHORT_NAME = "comgeloc";
100 
101     // -------------
102     // - Variables -
103     // -------------
104     // List of Solr items
105     private List<SolrItem> _listSolrItems;
106 
107     // XPath
108     private String _strXPath;
109 
110     // Contents
111     private String _strURL;
112     private String _strDate;
113     private String _strType;
114     private String _strSite;
115     private String _strProdUrl;
116     private String _strTitle;
117     private String _strContents;
118 
119     /**
120      * Initializes and launches the parsing of the local cards (public constructor)
121      */
122     public CoMarquageSolrLocalParser(  )
123     {
124         // Gets the local cards path
125         String strLocalBasePath = AppPropertiesService.getProperty( PROPERTY_INDEXING_LOCAL_PATH );
126         String strLocalPath = AppPathService.getPath( PROPERTY_INDEXING_XML_BASE_VAR, strLocalBasePath );
127         File fileBasePath = new File( strLocalPath );
128 
129         // Initializes the SolrItem list
130         _listSolrItems = new ArrayList<SolrItem>(  );
131 
132         // Initializes the indexing type
133         _strType = AppPropertiesService.getProperty( PROPERTY_INDEXING_TYPE );
134 
135         // Initializes the site
136         _strSite = SolrIndexerService.getWebAppName(  );
137 
138         // Initializes the prod url
139         _strProdUrl = SolrIndexerService.getBaseUrl(  );
140 
141         if ( !_strProdUrl.endsWith( "/" ) )
142         {
143             _strProdUrl = _strProdUrl + "/";
144         }
145 
146         try
147         {
148             // Initializes the SAX parser
149             SAXParserFactory factory = SAXParserFactory.newInstance(  );
150             SAXParser parser = factory.newSAXParser(  );
151 
152             // Launches the parsing on each local card
153             parseAllLocalCards( fileBasePath, parser );
154         }
155         catch ( ParserConfigurationException e )
156         {
157             AppLogService.error( e.getMessage(  ), e );
158         }
159         catch ( SAXException e )
160         {
161             AppLogService.error( e.getMessage(  ), e );
162         }
163     }
164 
165     /**
166      * Launches the parsing on each local card
167      *
168      * @param fileBasePath the base path
169      * @param parser the SAX parser
170      */
171     private void parseAllLocalCards( File fileBasePath, SAXParser parser )
172     {
173         if ( fileBasePath.isFile(  ) )
174         {
175             // Launches the parsing of this local card (with the current handler)
176             try
177             {
178                 parser.parse( fileBasePath.getAbsolutePath(  ), this );
179             }
180             catch ( SAXException e )
181             {
182                 AppLogService.error( e.getMessage(  ), e );
183             }
184             catch ( IOException e )
185             {
186                 AppLogService.error( e.getMessage(  ), e );
187             }
188         }
189         else
190         {
191             // Processes all the files of the current directory
192             File[] files = fileBasePath.listFiles(  );
193 
194             for ( File fileCurrent : files )
195             {
196                 if ( !fileCurrent.getAbsolutePath(  ).endsWith( "CVS" ) )
197                 {
198                     // Launches the parsing on each local card (recursive)
199                     parseAllLocalCards( fileCurrent, parser );
200                 }
201             }
202         }
203     }
204 
205     /**
206     * Event received when starting the parsing operation
207     *
208     * @throws SAXException any SAX exception
209     */
210     public void startDocument(  ) throws SAXException
211     {
212         // Initializes the XPATH
213         _strXPath = STRING_EMPTY;
214 
215         // Initializes the contents
216         _strURL = STRING_EMPTY;
217         _strDate = STRING_EMPTY;
218         _strTitle = STRING_EMPTY;
219         _strContents = STRING_EMPTY;
220     }
221 
222     /**
223     * Event received at the end of the parsing operation
224     *
225     * @throws SAXException any SAX exception
226     */
227     public void endDocument(  ) throws SAXException
228     {
229         // Sets the ID 
230         String strDelimiter = STRING_POINT + AppPropertiesService.getProperty( PROPERTY_URL_DELIMITER );
231         String strFirstNode = AppPropertiesService.getProperty( PROPERTY_PATH_FIRST_NODE ) + STRING_SLASH;
232         String strId = strFirstNode + _strURL.split( strDelimiter )[0];
233 
234         // Sets the full URL
235         UrlItem url = new UrlItem( _strProdUrl );
236         url.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( PROPERTY_PLUGIN_NAME ) );
237         url.addParameter( AppPropertiesService.getProperty( PROPERTY_PATH_ID ), strId );
238         
239         // Converts the date from "dd MMMMM yyyy" to "yyyyMMdd"
240         Locale locale = Locale.FRENCH;
241         Date dateUpdate = null;
242 
243         try
244         {
245             SimpleDateFormat dateFormat = new SimpleDateFormat( "dd MMMMM yyyy", locale );
246             dateUpdate = dateFormat.parse( _strDate );
247 
248             dateFormat.applyPattern( "yyyyMMdd" );
249         }
250         catch ( ParseException e )
251         {
252             dateUpdate = null;
253         }
254 
255         // Creates a new lucene document
256         SolrItem item = new SolrItem(  );
257 
258         item.setUrl( url.getUrl(  ) );
259         item.setDate( dateUpdate );
260         item.setUid( strId + SolrConstants.CONSTANT_UNDERSCORE + SHORT_NAME );
261         item.setContent( _strContents );
262         item.setTitle( _strTitle );
263         item.setType( _strType );
264         item.setSite( _strSite );
265 
266         // Adds the new item to the list
267         _listSolrItems.add( item );
268     }
269 
270     /**
271      * Event received at the start of an element
272      *
273      * @param uri the Namespace URI
274      * @param localName the local name
275      * @param qName the qualified XML name
276      * @param atts the attributes attached to the element
277      *
278      * @throws SAXException any SAX exception
279      */
280     public void startElement( String uri, String localName, String qName, Attributes atts )
281         throws SAXException
282     {
283         // Updates the XPath
284         _strXPath += ( STRING_SLASH + qName );
285 
286         // Gets the URL (attribute)
287         String strXPathCard = AppPropertiesService.getProperty( PROPERTY_XPATH_CARD );
288 
289         if ( ( _strXPath != null ) && _strXPath.equals( strXPathCard ) )
290         {
291             String strAttributeUrl = AppPropertiesService.getProperty( PROPERTY_ATTRIBUTE_URL );
292             _strURL = atts.getValue( strAttributeUrl );
293         }
294     }
295 
296     /**
297      * Event received at the end of an element
298      *
299      * @param uri the Namespace URI
300      * @param localName the local name
301      * @param qName the qualified XML name
302      *
303      * @throws SAXException any SAX exception
304      */
305     public void endElement( String uri, String localName, String qName )
306         throws SAXException
307     {
308         // Updates the XPath
309         _strXPath = _strXPath.substring( 0, _strXPath.lastIndexOf( STRING_SLASH ) );
310     }
311 
312     /**
313      * Event received when the analyzer encounters text (between two tags)
314      *
315      * @param ch the characters from the XML document
316      * @param start the start position in the array
317      * @param length the number of characters to read from the array
318      *
319      * @throws SAXException any SAX exception
320      */
321     public void characters( char[] ch, int start, int length )
322         throws SAXException
323     {
324         // Gets the XPath comparisons properties
325         String strXPathDate = AppPropertiesService.getProperty( PROPERTY_XPATH_DATE );
326         String strXPathTitle = AppPropertiesService.getProperty( PROPERTY_XPATH_TITLE );
327 
328         // Gets the date
329         if ( ( _strXPath != null ) && _strXPath.equals( strXPathDate ) )
330         {
331             _strDate += new String( ch, start, length );
332         }
333 
334         // Gets the title
335         else if ( ( _strXPath != null ) && _strXPath.equals( strXPathTitle ) )
336         {
337             _strTitle += new String( ch, start, length );
338         }
339 
340         // Gets the contents
341         if ( ( _strContents != null ) && !_strContents.equals( STRING_EMPTY ) )
342         {
343             _strContents += ( STRING_SPACE + new String( ch, start, length ) );
344         }
345         else
346         {
347             _strContents += new String( ch, start, length );
348         }
349     }
350 
351     /**
352     * Gets the list of Solr items
353     *
354     * @return The list of Solr items
355     */
356     public List<SolrItem> getLocalSolrItems(  )
357     {
358         return _listSolrItems;
359     }
360 }