1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.comarquage.modules.solr.utils.parsers;
35
36 import java.io.File;
37 import java.io.IOException;
38 import java.text.ParseException;
39 import java.text.SimpleDateFormat;
40 import java.util.ArrayList;
41 import java.util.Date;
42 import java.util.List;
43 import java.util.Locale;
44
45 import javax.xml.parsers.ParserConfigurationException;
46 import javax.xml.parsers.SAXParser;
47 import javax.xml.parsers.SAXParserFactory;
48
49 import org.xml.sax.Attributes;
50 import org.xml.sax.SAXException;
51 import org.xml.sax.helpers.DefaultHandler;
52
53 import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
54 import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
55 import fr.paris.lutece.plugins.search.solr.util.SolrConstants;
56 import fr.paris.lutece.portal.service.content.XPageAppService;
57 import fr.paris.lutece.portal.service.util.AppLogService;
58 import fr.paris.lutece.portal.service.util.AppPathService;
59 import fr.paris.lutece.portal.service.util.AppPropertiesService;
60 import fr.paris.lutece.util.url.UrlItem;
61
62
63
64
65
66 public class CoMarquageSolrLocalParser extends DefaultHandler
67 {
68
69
70
71
72 private static final String PROPERTY_PLUGIN_NAME = "comarquage.plugin.name";
73
74
75 private static final String PROPERTY_INDEXING_LOCAL_PATH = "comarquage.indexing.localBasePath";
76 private static final String PROPERTY_INDEXING_XML_BASE_VAR = "comarquage.path.xml";
77
78
79 private static final String PROPERTY_XPATH_CARD = "comarquage.parser.xpath.local.card";
80 private static final String PROPERTY_XPATH_DATE = "comarquage.parser.xpath.local.date";
81 private static final String PROPERTY_XPATH_TITLE = "comarquage.parser.xpath.local.title";
82 private static final String PROPERTY_ATTRIBUTE_URL = "comarquage.parser.xpath.local.attribute.url";
83
84
85 private static final String PROPERTY_INDEXING_TYPE = "comarquage-solr.indexing.localType";
86
87
88 private static final String PROPERTY_PATH_ID = "comarquage.parser.path.id";
89 private static final String PROPERTY_PATH_FIRST_NODE = "comarquage.parser.path.first.node";
90
91
92 private static final String PROPERTY_URL_DELIMITER = "comarquage.parser.url.local.delimiter";
93
94
95 private static final String STRING_EMPTY = "";
96 private static final String STRING_POINT = ".";
97 private static final String STRING_SLASH = "/";
98 private static final String STRING_SPACE = " ";
99 private static final String SHORT_NAME = "comgeloc";
100
101
102
103
104
105 private List<SolrItem> _listSolrItems;
106
107
108 private String _strXPath;
109
110
111 private String _strURL;
112 private String _strDate;
113 private String _strType;
114 private String _strSite;
115 private String _strProdUrl;
116 private String _strTitle;
117 private String _strContents;
118
119
120
121
122 public CoMarquageSolrLocalParser( )
123 {
124
125 String strLocalBasePath = AppPropertiesService.getProperty( PROPERTY_INDEXING_LOCAL_PATH );
126 String strLocalPath = AppPathService.getPath( PROPERTY_INDEXING_XML_BASE_VAR, strLocalBasePath );
127 File fileBasePath = new File( strLocalPath );
128
129
130 _listSolrItems = new ArrayList<SolrItem>( );
131
132
133 _strType = AppPropertiesService.getProperty( PROPERTY_INDEXING_TYPE );
134
135
136 _strSite = SolrIndexerService.getWebAppName( );
137
138
139 _strProdUrl = SolrIndexerService.getBaseUrl( );
140
141 if ( !_strProdUrl.endsWith( "/" ) )
142 {
143 _strProdUrl = _strProdUrl + "/";
144 }
145
146 try
147 {
148
149 SAXParserFactory factory = SAXParserFactory.newInstance( );
150 SAXParser parser = factory.newSAXParser( );
151
152
153 parseAllLocalCards( fileBasePath, parser );
154 }
155 catch ( ParserConfigurationException e )
156 {
157 AppLogService.error( e.getMessage( ), e );
158 }
159 catch ( SAXException e )
160 {
161 AppLogService.error( e.getMessage( ), e );
162 }
163 }
164
165
166
167
168
169
170
171 private void parseAllLocalCards( File fileBasePath, SAXParser parser )
172 {
173 if ( fileBasePath.isFile( ) )
174 {
175
176 try
177 {
178 parser.parse( fileBasePath.getAbsolutePath( ), this );
179 }
180 catch ( SAXException e )
181 {
182 AppLogService.error( e.getMessage( ), e );
183 }
184 catch ( IOException e )
185 {
186 AppLogService.error( e.getMessage( ), e );
187 }
188 }
189 else
190 {
191
192 File[] files = fileBasePath.listFiles( );
193
194 for ( File fileCurrent : files )
195 {
196 if ( !fileCurrent.getAbsolutePath( ).endsWith( "CVS" ) )
197 {
198
199 parseAllLocalCards( fileCurrent, parser );
200 }
201 }
202 }
203 }
204
205
206
207
208
209
210 public void startDocument( ) throws SAXException
211 {
212
213 _strXPath = STRING_EMPTY;
214
215
216 _strURL = STRING_EMPTY;
217 _strDate = STRING_EMPTY;
218 _strTitle = STRING_EMPTY;
219 _strContents = STRING_EMPTY;
220 }
221
222
223
224
225
226
227 public void endDocument( ) throws SAXException
228 {
229
230 String strDelimiter = STRING_POINT + AppPropertiesService.getProperty( PROPERTY_URL_DELIMITER );
231 String strFirstNode = AppPropertiesService.getProperty( PROPERTY_PATH_FIRST_NODE ) + STRING_SLASH;
232 String strId = strFirstNode + _strURL.split( strDelimiter )[0];
233
234
235 UrlItem url = new UrlItem( _strProdUrl );
236 url.addParameter( XPageAppService.PARAM_XPAGE_APP, AppPropertiesService.getProperty( PROPERTY_PLUGIN_NAME ) );
237 url.addParameter( AppPropertiesService.getProperty( PROPERTY_PATH_ID ), strId );
238
239
240 Locale locale = Locale.FRENCH;
241 Date dateUpdate = null;
242
243 try
244 {
245 SimpleDateFormat dateFormat = new SimpleDateFormat( "dd MMMMM yyyy", locale );
246 dateUpdate = dateFormat.parse( _strDate );
247
248 dateFormat.applyPattern( "yyyyMMdd" );
249 }
250 catch ( ParseException e )
251 {
252 dateUpdate = null;
253 }
254
255
256 SolrItem item = new SolrItem( );
257
258 item.setUrl( url.getUrl( ) );
259 item.setDate( dateUpdate );
260 item.setUid( strId + SolrConstants.CONSTANT_UNDERSCORE + SHORT_NAME );
261 item.setContent( _strContents );
262 item.setTitle( _strTitle );
263 item.setType( _strType );
264 item.setSite( _strSite );
265
266
267 _listSolrItems.add( item );
268 }
269
270
271
272
273
274
275
276
277
278
279
280 public void startElement( String uri, String localName, String qName, Attributes atts )
281 throws SAXException
282 {
283
284 _strXPath += ( STRING_SLASH + qName );
285
286
287 String strXPathCard = AppPropertiesService.getProperty( PROPERTY_XPATH_CARD );
288
289 if ( ( _strXPath != null ) && _strXPath.equals( strXPathCard ) )
290 {
291 String strAttributeUrl = AppPropertiesService.getProperty( PROPERTY_ATTRIBUTE_URL );
292 _strURL = atts.getValue( strAttributeUrl );
293 }
294 }
295
296
297
298
299
300
301
302
303
304
305 public void endElement( String uri, String localName, String qName )
306 throws SAXException
307 {
308
309 _strXPath = _strXPath.substring( 0, _strXPath.lastIndexOf( STRING_SLASH ) );
310 }
311
312
313
314
315
316
317
318
319
320
321 public void characters( char[] ch, int start, int length )
322 throws SAXException
323 {
324
325 String strXPathDate = AppPropertiesService.getProperty( PROPERTY_XPATH_DATE );
326 String strXPathTitle = AppPropertiesService.getProperty( PROPERTY_XPATH_TITLE );
327
328
329 if ( ( _strXPath != null ) && _strXPath.equals( strXPathDate ) )
330 {
331 _strDate += new String( ch, start, length );
332 }
333
334
335 else if ( ( _strXPath != null ) && _strXPath.equals( strXPathTitle ) )
336 {
337 _strTitle += new String( ch, start, length );
338 }
339
340
341 if ( ( _strContents != null ) && !_strContents.equals( STRING_EMPTY ) )
342 {
343 _strContents += ( STRING_SPACE + new String( ch, start, length ) );
344 }
345 else
346 {
347 _strContents += new String( ch, start, length );
348 }
349 }
350
351
352
353
354
355
356 public List<SolrItem> getLocalSolrItems( )
357 {
358 return _listSolrItems;
359 }
360 }