1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.document.modules.ckan.service;
35
36 import fr.paris.lutece.plugins.document.modules.ckan.business.PackageOrganization;
37 import fr.paris.lutece.plugins.document.modules.ckan.business.PackageResource;
38 import fr.paris.lutece.plugins.document.modules.ckan.business.PackageShowResult;
39 import fr.paris.lutece.plugins.document.modules.ckan.business.PackageTag;
40 import fr.paris.lutece.portal.service.spring.SpringContextService;
41 import fr.paris.lutece.portal.service.util.AppLogService;
42 import fr.paris.lutece.util.string.StringUtil;
43
44 import org.w3c.dom.Document;
45 import org.w3c.dom.Node;
46 import org.w3c.dom.NodeList;
47
48 import org.xml.sax.InputSource;
49 import org.xml.sax.SAXException;
50
51 import java.io.IOException;
52 import java.io.StringReader;
53
54 import java.text.MessageFormat;
55 import java.text.ParseException;
56 import java.text.SimpleDateFormat;
57
58 import java.util.ArrayList;
59 import java.util.Date;
60 import java.util.List;
61 import java.util.StringTokenizer;
62
63 import javax.xml.parsers.DocumentBuilder;
64 import javax.xml.parsers.DocumentBuilderFactory;
65 import javax.xml.parsers.ParserConfigurationException;
66
67
68
69
70
71 public final class DocumentParser
72 {
73 private static final String TIMESTAMP_DEFAULT = "2000-01-01T00:00:00.000000";
74 private static final CkanService _service = SpringContextService.getBean( "document-ckan.ckanService" );
75 private static final SimpleDateFormat _dateFormaterInput = new SimpleDateFormat( "dd/MM/yyyy" );
76 private static final SimpleDateFormat _dateFormaterOutput = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSS" );
77
78
79 private DocumentParser( )
80 {
81 }
82
83
84
85
86
87
88
89
90
91 public static PackageShowResult parse( PackageShowResult psr, String strXml, int nPortletId )
92 throws SAXException
93 {
94 try
95 {
96 DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance( );
97 DocumentBuilder dBuilder = dbFactory.newDocumentBuilder( );
98 Document doc = dBuilder.parse( new InputSource( new StringReader( strXml ) ) );
99
100 String strId = getValue( doc, "id" );
101 psr.setId( strId );
102
103 String strTitle = getValue( doc, "title" );
104 psr.setTitle( strTitle );
105 psr.setName( formatName( strId, strTitle ) );
106 psr.setAuthor( getValue( doc, "author" ) );
107 psr.setAuthor_email( getValue( doc, "author-email" ) );
108 psr.setState( getValue( doc, "state" ) );
109 psr.setVersion( getValue( doc, "version" ) );
110 psr.setMaintainer( getValue( doc, "maintainer" ) );
111 psr.setNotes( getValue( doc, "notes" ) );
112 psr.setType( getValue( doc, "type" ) );
113 psr.setUrl( MessageFormat.format( _service.getDatasetUrlFormat( ), strId, nPortletId ) );
114 psr.setLicense_id( getValue( doc, "license-id" ) );
115 psr.setLicense_title( getValue( doc, "license-title" ) );
116
117 String strMetadataCreated = convertDateToTimestamp( getValue( doc, "metadata-created" ), TIMESTAMP_DEFAULT );
118 psr.setMetadata_created( strMetadataCreated );
119
120 String strMetadataModified = convertDateToTimestamp( getValue( doc, "metadata-modified" ),
121 strMetadataCreated );
122 psr.setMetadata_modified( strMetadataModified );
123 psr.setRevision_id( getValue( doc, "revision-id" ) );
124 psr.setRevision_timestamp( convertDateToTimestamp( getValue( doc, "revision-timestamp" ),
125 strMetadataModified ) );
126
127 List<String> listGroups = new ArrayList<String>();
128 listGroups.add( getValue( doc , "groups" ) );
129 psr.setGroups( listGroups );
130 psr.setFrequency( getValue( doc , "frequency" ));
131 psr.setTemporal_coverage_from( getValue( doc , "temporal-coverage-from" ));
132 psr.setTemporal_coverage_to( getValue( doc , "temporal-coverage-to" ));
133 psr.setTerritorial_coverage( getValue( doc , "territorial-coverage"));
134 psr.setTerritorial_coverage_granularity( getValue( doc , "territorial-coverage-granularity"));
135
136
137 PackageOrganization po = new PackageOrganization( );
138 po.setId( getValue( doc, "organization-id" ) );
139 po.setTitle( getValue( doc, "organization-title" ) );
140 po.setName( getValue( doc, "organization-name" ) );
141 po.setDescription( getValue( doc, "organization-description" ) );
142 po.setType( getValue( doc, "organization-type" ) );
143 po.setIs_organization( true );
144 po.setApproval_status( getValue( doc, "organization-approval-status" ) );
145 po.setState( getValue( doc, "organization-state" ) );
146 po.setId( getValue( doc, "organization-id" ) );
147 po.setRevision_id( getValue( doc, "organization-revision-id" ) );
148 po.setRevision_timestamp( getValue( doc, "organization-revision-timestamp" ) );
149 po.setCreated( getValue( doc, "organization-revision-timestamp" ) );
150 psr.setOrganization( po );
151
152
153 List<PackageResource> listResources = new ArrayList<PackageResource>( );
154
155 for ( int i = 1; i < 4; i++ )
156 {
157 String strFormat = getValue( doc, "resource-format-" + i );
158
159 if ( !"".equals( strFormat.trim( ) ) )
160 {
161 PackageResource pr = new PackageResource( );
162 pr.setFormat( strFormat );
163 pr.setDescription( strTitle );
164 pr.setLast_modified( strMetadataModified );
165 pr.setRevision_id( getValue( doc, "resource-revision-id" ) );
166 fillResourceInfos( pr, doc, "resource-file-" + i, strMetadataCreated );
167 String strDownloadUrl = getValue( doc, "resource-download-url-" + i );
168 if( strDownloadUrl.length() > 4 )
169 {
170 pr.setUrl( strDownloadUrl );
171 }
172 listResources.add( pr );
173 }
174 }
175
176 psr.setResources( listResources );
177 psr.setNum_resources( listResources.size( ) );
178
179
180 List<PackageTag> listTags = new ArrayList<PackageTag>( );
181 String strTags = getValue( doc, "tags" );
182 StringTokenizer st = new StringTokenizer( strTags );
183
184 while ( st.hasMoreTokens( ) )
185 {
186 PackageTag tag = new PackageTag( );
187 String strName = st.nextToken( );
188 tag.setName( strName );
189 tag.setDisplay_name( strName );
190 tag.setRevision_timestamp( strMetadataModified );
191 tag.setState( getValue( doc, "tag-state" ) );
192 listTags.add( tag );
193 }
194
195 psr.setTags( listTags );
196 psr.setNum_tags( listTags.size( ) );
197 }
198 catch ( IOException e )
199 {
200 AppLogService.error( "Error parsing document : " + e.getMessage( ), e );
201 }
202 catch ( ParserConfigurationException e )
203 {
204 AppLogService.error( "Error parsing document : " + e.getMessage( ), e );
205 }
206
207 return psr;
208 }
209
210
211
212
213
214
215
216 private static String getValue( Document doc, String strKey )
217 {
218 String strDocumentTag = _service.getMapping( strKey );
219
220 if ( !strDocumentTag.equals( CkanService.NOT_FOUND ) )
221 {
222 NodeList nList = doc.getElementsByTagName( strDocumentTag );
223 Node node = nList.item( 0 );
224
225 if ( node != null )
226 {
227 return node.getTextContent( );
228 }
229 }
230
231 return _service.getDefault( strKey );
232 }
233
234
235
236
237
238
239
240
241 private static void fillResourceInfos( PackageResource pr, Document doc, String strKey, String strCreated )
242 {
243 String strDocumentTag = _service.getMapping( strKey );
244
245 if ( !strDocumentTag.equals( CkanService.NOT_FOUND ) )
246 {
247 NodeList nList = doc.getElementsByTagName( strDocumentTag );
248
249 fillResource( pr, nList, strCreated );
250 }
251 }
252
253
254
255
256
257
258
259 private static void fillResource( PackageResource pr, NodeList nList, String strCreated )
260 {
261 String strId = "";
262 String strAttributeId = "";
263
264 for ( int i = 0; i < nList.getLength( ); i++ )
265 {
266 Node node = nList.item( i );
267
268 NodeList childs = node.getChildNodes( );
269
270 if ( childs.getLength( ) > 0 )
271 {
272 fillResource( pr, childs, strCreated );
273 }
274
275 if ( node.getNodeName( ).equals( "resource-document-id" ) )
276 {
277 strId = node.getTextContent( );
278 }
279 else if ( node.getNodeName( ).equals( "resource-attribute-id" ) )
280 {
281 strAttributeId = node.getTextContent( );
282 }
283 else if ( node.getNodeName( ).equals( "resource-content-type" ) )
284 {
285 pr.setMimetype( node.getTextContent( ) );
286 }
287 else if ( node.getNodeName( ).equals( "file-size" ) )
288 {
289 pr.setSize( node.getTextContent( ) );
290 }
291 }
292
293 if ( ( !"".equals( strId ) ) && ( !"".equals( strAttributeId ) ) )
294 {
295 pr.setUrl( MessageFormat.format( _service.getResourceUrlFormat( ), strId, strAttributeId ) );
296 pr.setResource_type( "file" );
297 pr.setId( formatResourceId( strId, strAttributeId ) );
298 pr.setCreated( strCreated );
299 }
300 }
301
302
303
304
305
306
307
308 private static String formatName( String strId, String strTitle )
309 {
310 return ( strId + "-" + StringUtil.replaceAccent( strTitle ).replace( " ", "_" ).toLowerCase( ) );
311 }
312
313
314
315
316
317
318
319 private static String convertDateToTimestamp( String strDate, String strDefault )
320 {
321 String strTimestamp = strDefault;
322
323 try
324 {
325 Date date = _dateFormaterInput.parse( strDate );
326 strTimestamp = _dateFormaterOutput.format( date );
327 }
328 catch ( ParseException e )
329 {
330 AppLogService.error( "Error parsing document : " + e.getMessage( ), e );
331 }
332
333 return strTimestamp;
334 }
335
336
337
338
339
340
341
342 private static String formatResourceId( String strId, String strAttributeId )
343 {
344 return strId + ":" + strAttributeId;
345 }
346 }