View Javadoc
1   /*
2    * Copyright (c) 2002-2014, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.document.modules.ckan.service;
35  
36  import fr.paris.lutece.plugins.document.modules.ckan.business.PackageOrganization;
37  import fr.paris.lutece.plugins.document.modules.ckan.business.PackageResource;
38  import fr.paris.lutece.plugins.document.modules.ckan.business.PackageShowResult;
39  import fr.paris.lutece.plugins.document.modules.ckan.business.PackageTag;
40  import fr.paris.lutece.portal.service.spring.SpringContextService;
41  import fr.paris.lutece.portal.service.util.AppLogService;
42  import fr.paris.lutece.util.string.StringUtil;
43  
44  import org.w3c.dom.Document;
45  import org.w3c.dom.Node;
46  import org.w3c.dom.NodeList;
47  
48  import org.xml.sax.InputSource;
49  import org.xml.sax.SAXException;
50  
51  import java.io.IOException;
52  import java.io.StringReader;
53  
54  import java.text.MessageFormat;
55  import java.text.ParseException;
56  import java.text.SimpleDateFormat;
57  
58  import java.util.ArrayList;
59  import java.util.Date;
60  import java.util.List;
61  import java.util.StringTokenizer;
62  
63  import javax.xml.parsers.DocumentBuilder;
64  import javax.xml.parsers.DocumentBuilderFactory;
65  import javax.xml.parsers.ParserConfigurationException;
66  
67  
68  /**
69   * DocumentParser
70   */
71  public final class DocumentParser
72  {
73      private static final String TIMESTAMP_DEFAULT = "2000-01-01T00:00:00.000000";
74      private static final CkanService _service = SpringContextService.getBean( "document-ckan.ckanService" );
75      private static final SimpleDateFormat _dateFormaterInput = new SimpleDateFormat( "dd/MM/yyyy" );
76      private static final SimpleDateFormat _dateFormaterOutput = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSS" );
77  
78      /** Private constructor */
79      private DocumentParser(  )
80      {
81      }
82  
83      /**
84       * Parse the XML content of a document
85       * @param strXml The XML
86       * @param psr The PackageShowResult
87       * @param nPortletId The portlet ID
88       * @return the fulfilled PackageShowResult
89       * @throws SAXException if an error occurs
90       */
91      public static PackageShowResult parse( PackageShowResult psr, String strXml, int nPortletId )
92          throws SAXException
93      {
94          try
95          {
96              DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(  );
97              DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(  );
98              Document doc = dBuilder.parse( new InputSource( new StringReader( strXml ) ) );
99  
100             String strId = getValue( doc, "id" );
101             psr.setId( strId );
102 
103             String strTitle = getValue( doc, "title" );
104             psr.setTitle( strTitle );
105             psr.setName( formatName( strId, strTitle ) );
106             psr.setAuthor( getValue( doc, "author" ) );
107             psr.setAuthor_email( getValue( doc, "author-email" ) );
108             psr.setState( getValue( doc, "state" ) );
109             psr.setVersion( getValue( doc, "version" ) );
110             psr.setMaintainer( getValue( doc, "maintainer" ) );
111             psr.setNotes( getValue( doc, "notes" ) );
112             psr.setType( getValue( doc, "type" ) );
113             psr.setUrl( MessageFormat.format( _service.getDatasetUrlFormat(  ), strId, nPortletId ) );
114             psr.setLicense_id( getValue( doc, "license-id" ) );
115             psr.setLicense_title( getValue( doc, "license-title" ) );
116 
117             String strMetadataCreated = convertDateToTimestamp( getValue( doc, "metadata-created" ), TIMESTAMP_DEFAULT );
118             psr.setMetadata_created( strMetadataCreated );
119 
120             String strMetadataModified = convertDateToTimestamp( getValue( doc, "metadata-modified" ),
121                     strMetadataCreated );
122             psr.setMetadata_modified( strMetadataModified );
123             psr.setRevision_id( getValue( doc, "revision-id" ) );
124             psr.setRevision_timestamp( convertDateToTimestamp( getValue( doc, "revision-timestamp" ),
125                     strMetadataModified ) );
126             
127             List<String> listGroups = new ArrayList<String>();
128             listGroups.add( getValue( doc , "groups" ) );
129             psr.setGroups( listGroups );
130             psr.setFrequency( getValue( doc , "frequency" ));
131             psr.setTemporal_coverage_from( getValue( doc , "temporal-coverage-from" ));
132             psr.setTemporal_coverage_to( getValue( doc , "temporal-coverage-to" ));
133             psr.setTerritorial_coverage( getValue( doc , "territorial-coverage"));
134             psr.setTerritorial_coverage_granularity( getValue( doc , "territorial-coverage-granularity"));
135             
136             // Organization parsing
137             PackageOrganization po = new PackageOrganization(  );
138             po.setId( getValue( doc, "organization-id" ) );
139             po.setTitle( getValue( doc, "organization-title" ) );
140             po.setName( getValue( doc, "organization-name" ) );
141             po.setDescription( getValue( doc, "organization-description" ) );
142             po.setType( getValue( doc, "organization-type" ) );
143             po.setIs_organization( true );
144             po.setApproval_status( getValue( doc, "organization-approval-status" ) );
145             po.setState( getValue( doc, "organization-state" ) );
146             po.setId( getValue( doc, "organization-id" ) );
147             po.setRevision_id( getValue( doc, "organization-revision-id" ) );
148             po.setRevision_timestamp( getValue( doc, "organization-revision-timestamp" ) );
149             po.setCreated( getValue( doc, "organization-revision-timestamp" ) );
150             psr.setOrganization( po );
151 
152             // Resources parsing
153             List<PackageResource> listResources = new ArrayList<PackageResource>(  );
154 
155             for ( int i = 1; i < 4; i++ )
156             {
157                 String strFormat = getValue( doc, "resource-format-" + i );
158 
159                 if ( !"".equals( strFormat.trim(  ) ) )
160                 {
161                     PackageResource pr = new PackageResource(  );
162                     pr.setFormat( strFormat );
163                     pr.setDescription( strTitle );
164                     pr.setLast_modified( strMetadataModified );
165                     pr.setRevision_id( getValue( doc, "resource-revision-id" ) );
166                     fillResourceInfos( pr, doc, "resource-file-" + i, strMetadataCreated );
167                     String strDownloadUrl = getValue( doc, "resource-download-url-" + i );
168                     if( strDownloadUrl.length() > 4 )
169                     {
170                         pr.setUrl( strDownloadUrl );
171                     }
172                     listResources.add( pr );
173                 }
174             }
175 
176             psr.setResources( listResources );
177             psr.setNum_resources( listResources.size(  ) );
178 
179             // Tags parsing
180             List<PackageTag> listTags = new ArrayList<PackageTag>(  );
181             String strTags = getValue( doc, "tags" );
182             StringTokenizer st = new StringTokenizer( strTags );
183 
184             while ( st.hasMoreTokens(  ) )
185             {
186                 PackageTag tag = new PackageTag(  );
187                 String strName = st.nextToken(  );
188                 tag.setName( strName );
189                 tag.setDisplay_name( strName );
190                 tag.setRevision_timestamp( strMetadataModified );
191                 tag.setState( getValue( doc, "tag-state" ) );
192                 listTags.add( tag );
193             }
194 
195             psr.setTags( listTags );
196             psr.setNum_tags( listTags.size(  ) );
197         }
198         catch ( IOException e )
199         {
200             AppLogService.error( "Error parsing document : " + e.getMessage(  ), e );
201         }
202         catch ( ParserConfigurationException e )
203         {
204             AppLogService.error( "Error parsing document : " + e.getMessage(  ), e );
205         }
206 
207         return psr;
208     }
209 
210     /**
211      * Get a value for a given key
212      * @param doc The document
213      * @param strKey The key
214      * @return The value
215      */
216     private static String getValue( Document doc, String strKey )
217     {
218         String strDocumentTag = _service.getMapping( strKey );
219 
220         if ( !strDocumentTag.equals( CkanService.NOT_FOUND ) )
221         {
222             NodeList nList = doc.getElementsByTagName( strDocumentTag );
223             Node node = nList.item( 0 );
224 
225             if ( node != null )
226             {
227                 return node.getTextContent(  );
228             }
229         }
230 
231         return _service.getDefault( strKey );
232     }
233 
234     /**
235      * Fill a resource by parsing tags
236      * @param pr The PackageResource
237      * @param doc The document
238      * @param strKey The Key
239      * @param strCreated Creation Timestamp
240      */
241     private static void fillResourceInfos( PackageResource pr, Document doc, String strKey, String strCreated )
242     {
243         String strDocumentTag = _service.getMapping( strKey );
244 
245         if ( !strDocumentTag.equals( CkanService.NOT_FOUND ) )
246         {
247             NodeList nList = doc.getElementsByTagName( strDocumentTag );
248 
249             fillResource( pr, nList, strCreated );
250         }
251     }
252 
253     /**
254      * Recursive method to find useful tags
255      * @param pr The PackageResource
256      * @param nList The node list
257      * @param strCreated Creation Timestamp
258      */
259     private static void fillResource( PackageResource pr, NodeList nList, String strCreated )
260     {
261         String strId = "";
262         String strAttributeId = "";
263 
264         for ( int i = 0; i < nList.getLength(  ); i++ )
265         {
266             Node node = nList.item( i );
267 
268             NodeList childs = node.getChildNodes(  );
269 
270             if ( childs.getLength(  ) > 0 )
271             {
272                 fillResource( pr, childs, strCreated );
273             }
274 
275             if ( node.getNodeName(  ).equals( "resource-document-id" ) )
276             {
277                 strId = node.getTextContent(  );
278             }
279             else if ( node.getNodeName(  ).equals( "resource-attribute-id" ) )
280             {
281                 strAttributeId = node.getTextContent(  );
282             }
283             else if ( node.getNodeName(  ).equals( "resource-content-type" ) )
284             {
285                 pr.setMimetype( node.getTextContent(  ) );
286             }
287             else if ( node.getNodeName(  ).equals( "file-size" ) )
288             {
289                 pr.setSize( node.getTextContent(  ) );
290             }
291         }
292 
293         if ( ( !"".equals( strId ) ) && ( !"".equals( strAttributeId ) ) )
294         {
295             pr.setUrl( MessageFormat.format( _service.getResourceUrlFormat(  ), strId, strAttributeId ) );
296             pr.setResource_type( "file" );
297             pr.setId( formatResourceId( strId, strAttributeId ) );
298             pr.setCreated( strCreated );
299         }
300     }
301 
302     /**
303      * Format the field name
304      * @param strId The doc ID
305      * @param strTitle The doc title
306      * @return The Name
307      */
308     private static String formatName( String strId, String strTitle )
309     {
310         return ( strId + "-" + StringUtil.replaceAccent( strTitle ).replace( " ", "_" ).toLowerCase(  ) );
311     }
312 
313     /**
314      * Convert a date to a timestamp
315      * @param strDate The input date
316      * @param strDefault The default value if the conversion failed
317      * @return The timestamp
318      */
319     private static String convertDateToTimestamp( String strDate, String strDefault )
320     {
321         String strTimestamp = strDefault;
322 
323         try
324         {
325             Date date = _dateFormaterInput.parse( strDate );
326             strTimestamp = _dateFormaterOutput.format( date );
327         }
328         catch ( ParseException e )
329         {
330             AppLogService.error( "Error parsing document : " + e.getMessage(  ), e );
331         }
332 
333         return strTimestamp;
334     }
335 
336     /**
337      * Format a resource ID
338      * @param strId The dataset id
339      * @param strAttributeId the resource id
340      * @return The ID
341      */
342     private static String formatResourceId( String strId, String strAttributeId )
343     {
344         return strId + ":" + strAttributeId;
345     }
346 }