1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.directory.modules.solr.search;
35
36 import java.io.ByteArrayInputStream;
37 import java.io.IOException;
38 import java.io.InputStream;
39 import java.io.Reader;
40 import java.io.StringReader;
41 import java.nio.charset.StandardCharsets;
42 import java.util.ArrayList;
43 import java.util.Collections;
44 import java.util.List;
45
46 import org.apache.commons.lang.StringUtils;
47 import org.apache.tika.exception.TikaException;
48 import org.apache.tika.metadata.Metadata;
49 import org.apache.tika.parser.ParseContext;
50 import org.apache.tika.parser.html.HtmlParser;
51 import org.apache.tika.sax.BodyContentHandler;
52 import org.xml.sax.ContentHandler;
53 import org.xml.sax.SAXException;
54
55 import fr.paris.lutece.plugins.directory.business.Directory;
56 import fr.paris.lutece.plugins.directory.business.DirectoryFilter;
57 import fr.paris.lutece.plugins.directory.business.DirectoryHome;
58 import fr.paris.lutece.plugins.directory.business.EntryFilter;
59 import fr.paris.lutece.plugins.directory.business.EntryHome;
60 import fr.paris.lutece.plugins.directory.business.IEntry;
61 import fr.paris.lutece.plugins.directory.business.Record;
62 import fr.paris.lutece.plugins.directory.business.RecordField;
63 import fr.paris.lutece.plugins.directory.business.RecordFieldFilter;
64 import fr.paris.lutece.plugins.directory.business.RecordFieldHome;
65 import fr.paris.lutece.plugins.directory.business.RecordHome;
66 import fr.paris.lutece.plugins.directory.service.DirectoryPlugin;
67 import fr.paris.lutece.plugins.directory.utils.DirectoryIndexerUtils;
68 import fr.paris.lutece.plugins.directory.utils.DirectoryUtils;
69 import fr.paris.lutece.plugins.search.solr.business.field.Field;
70 import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexer;
71 import fr.paris.lutece.plugins.search.solr.indexer.SolrIndexerService;
72 import fr.paris.lutece.plugins.search.solr.indexer.SolrItem;
73 import fr.paris.lutece.plugins.search.solr.util.SolrConstants;
74 import fr.paris.lutece.portal.service.content.XPageAppService;
75 import fr.paris.lutece.portal.service.plugin.Plugin;
76 import fr.paris.lutece.portal.service.plugin.PluginService;
77 import fr.paris.lutece.portal.service.util.AppLogService;
78 import fr.paris.lutece.portal.service.util.AppPropertiesService;
79 import fr.paris.lutece.util.url.UrlItem;
80
81
82
83
84
85
86 public class SolrDirectoryIndexer implements SolrIndexer
87 {
88 private static final String PROPERTY_DESCRIPTION = "directory-solr.indexer.description";
89 private static final String PROPERTY_NAME = "directory-solr.indexer.name";
90 private static final String PROPERTY_VERSION = "directory-solr.indexer.version";
91 private static final String PROPERTY_INDEXER_ENABLE = "directory-solr.indexer.enable";
92
93 public static final String SHORT_NAME = "dry";
94 private static final String DIRECTORY = "directory";
95 private static final String PARAMETER_ID_DIRECTORY_RECORD = "id_directory_record";
96 private static final String PARAMETER_VIEW_DIRECTORY_RECORD = "view_directory_record";
97 private static final String ROLE_NONE = "none";
98 private static final List<String> LIST_RESSOURCES_NAME = new ArrayList<String>( );
99
100 private static final String DIRECTORY_INDEXATION_ERROR = "[SolrDirectoryIndexer] An error occured during the indexation of the record number ";
101
102 public SolrDirectoryIndexer( )
103 {
104 super( );
105
106 LIST_RESSOURCES_NAME.add( DirectoryIndexerUtils.CONSTANT_TYPE_RESOURCE );
107 }
108
109
110
111
112 public String getDescription( )
113 {
114 return AppPropertiesService.getProperty( PROPERTY_DESCRIPTION );
115 }
116
117
118
119
120 public String getName( )
121 {
122 return AppPropertiesService.getProperty( PROPERTY_NAME );
123 }
124
125
126
127
128 public String getVersion( )
129 {
130 return AppPropertiesService.getProperty( PROPERTY_VERSION );
131 }
132
133
134
135
136 public List<String> indexDocuments( )
137 {
138 Plugin plugin = PluginService.getPlugin( DirectoryPlugin.PLUGIN_NAME );
139 List<String> lstErrors = new ArrayList<String>( );
140
141
142 DirectoryFilter dirFilter = new DirectoryFilter( );
143 dirFilter.setIsIndexed( DirectoryFilter.FILTER_TRUE );
144 dirFilter.setIsDisabled( DirectoryFilter.FILTER_TRUE );
145
146 for ( Directory directory : DirectoryHome.getDirectoryList( dirFilter, plugin ) )
147 {
148 try
149 {
150 int nIdDirectory = directory.getIdDirectory( );
151
152
153 RecordFieldFilter recFilter = new RecordFieldFilter( );
154 recFilter.setIdDirectory( nIdDirectory );
155 recFilter.setIsDisabled( RecordFieldFilter.FILTER_TRUE );
156
157 List<Record> listRecord = RecordHome.getListRecord( recFilter, plugin );
158
159
160 if ( !listRecord.isEmpty( ) )
161 {
162
163 EntryFilter entryFilter = new EntryFilter( );
164 entryFilter.setIdDirectory( nIdDirectory );
165 entryFilter.setIsIndexed( EntryFilter.FILTER_TRUE );
166
167 List<IEntry> listIndexedEntry = EntryHome.getEntryList( entryFilter, plugin );
168
169 entryFilter.setIsIndexed( EntryFilter.ALL_INT );
170 entryFilter.setIsIndexedAsTitle( EntryFilter.FILTER_TRUE );
171
172 List<IEntry> listIndexedAsTitleEntry = EntryHome.getEntryList( entryFilter, plugin );
173
174 entryFilter.setIsIndexedAsTitle( EntryFilter.ALL_INT );
175 entryFilter.setIsIndexedAsSummary( EntryFilter.FILTER_TRUE );
176
177 List<IEntry> listIndexedAsSummaryEntry = EntryHome.getEntryList( entryFilter, plugin );
178
179 for ( Record record : listRecord )
180 {
181 SolrItem recordDoc = getDocument( record, listIndexedEntry, listIndexedAsTitleEntry,
182 listIndexedAsSummaryEntry, plugin );
183
184 if ( recordDoc != null )
185 {
186 SolrIndexerService.write( recordDoc );
187 }
188 }
189 }
190 }
191 catch ( Exception e )
192 {
193 lstErrors.add( SolrIndexerService.buildErrorMessage( e ) );
194 AppLogService.error( DIRECTORY_INDEXATION_ERROR + directory.getIdDirectory( ), e );
195 }
196 }
197
198 return lstErrors;
199 }
200
201
202
203
204 public boolean isEnable( )
205 {
206 return "true".equalsIgnoreCase( AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE ) );
207 }
208
209
210
211
212 public List<Field> getAdditionalFields( )
213 {
214 return new ArrayList<Field>( );
215 }
216
217
218
219
220 public List<SolrItem> getDocuments( String recordId )
221 {
222 Plugin plugin = PluginService.getPlugin( DirectoryPlugin.PLUGIN_NAME );
223
224 int nIdRecord;
225
226 try
227 {
228 nIdRecord = Integer.parseInt( recordId );
229 }
230 catch ( NumberFormatException ne )
231 {
232 AppLogService.error( recordId + " not parseable to an int", ne );
233
234 return new ArrayList<SolrItem>( 0 );
235 }
236
237 Record record = RecordHome.findByPrimaryKey( nIdRecord, plugin );
238 Directory directory = record.getDirectory( );
239
240 if ( !record.isEnabled( ) || !directory.isEnabled( ) || !directory.isIndexed( ) )
241 {
242 return new ArrayList<SolrItem>( 0 );
243 }
244
245 int nIdDirectory = directory.getIdDirectory( );
246
247
248 EntryFilter entryFilter = new EntryFilter( );
249 entryFilter.setIdDirectory( nIdDirectory );
250 entryFilter.setIsIndexed( EntryFilter.FILTER_TRUE );
251
252 List<IEntry> listIndexedEntry = EntryHome.getEntryList( entryFilter, plugin );
253
254 entryFilter.setIsIndexed( EntryFilter.ALL_INT );
255 entryFilter.setIsIndexedAsTitle( EntryFilter.FILTER_TRUE );
256
257 List<IEntry> listIndexedAsTitleEntry = EntryHome.getEntryList( entryFilter, plugin );
258
259 entryFilter.setIsIndexedAsTitle( EntryFilter.ALL_INT );
260 entryFilter.setIsIndexedAsSummary( EntryFilter.FILTER_TRUE );
261
262 List<IEntry> listIndexedAsSummaryEntry = EntryHome.getEntryList( entryFilter, plugin );
263
264 List<SolrItem> listDocument = Collections.EMPTY_LIST;
265
266 try
267 {
268 SolrItem doc = getDocument( record, listIndexedEntry, listIndexedAsTitleEntry, listIndexedAsSummaryEntry,
269 plugin );
270
271 if ( doc != null )
272 {
273 listDocument = new ArrayList<SolrItem>( 1 );
274 listDocument.add( doc );
275 }
276 }
277 catch ( IOException e )
278 {
279 throw new RuntimeException( e );
280 }
281
282 return listDocument;
283 }
284
285
286
287
288
289
290
291
292
293
294
295 private SolrItem getDocument( Record record, List<IEntry> listContentEntry, List<IEntry> listTitleEntry,
296 List<IEntry> listSummaryEntry, Plugin plugin )
297 throws IOException
298 {
299 SolrItem item = new SolrItem( );
300
301 boolean bFallback = false;
302
303
304
305 if ( listTitleEntry.isEmpty( ) && !listContentEntry.isEmpty( ) )
306 {
307 listTitleEntry.add( listContentEntry.get( 0 ) );
308 bFallback = true;
309 }
310
311 String strTitle = getContentToIndex( record, listTitleEntry, plugin );
312
313
314
315 if ( StringUtils.isBlank( strTitle ) && !bFallback && !listContentEntry.isEmpty( ) )
316 {
317 listTitleEntry.clear( );
318 listTitleEntry.add( listContentEntry.get( 0 ) );
319 strTitle = getContentToIndex( record, listTitleEntry, plugin );
320 }
321
322
323 if ( StringUtils.isBlank( strTitle ) )
324 {
325 return null;
326 }
327
328
329 item.setTitle( strTitle );
330
331 if ( !listContentEntry.isEmpty( ) )
332 {
333 String strContent = getContentToIndex( record, listContentEntry, plugin );
334
335 if ( StringUtils.isNotBlank( strContent ) )
336 {
337 HtmlParser parser = new HtmlParser( );
338 ContentHandler handler = new BodyContentHandler();
339 Metadata metadata = new Metadata();
340 InputStream stream = new ByteArrayInputStream(strContent.getBytes(StandardCharsets.UTF_8));
341 try {
342 parser.parse(stream, handler, metadata, new ParseContext());
343 } catch (SAXException e) {
344 e.printStackTrace();
345 } catch (TikaException e) {
346 e.printStackTrace();
347 }
348 item.setContent( handler.toString( ) );
349 }
350 }
351
352 if ( !listSummaryEntry.isEmpty( ) )
353 {
354 String strSummary = getContentToIndex( record, listSummaryEntry, plugin );
355
356 if ( StringUtils.isNotBlank( strSummary ) )
357 {
358
359 item.setSummary( strSummary );
360 }
361 }
362
363 String strRoleKey = record.getRoleKey( );
364
365 if ( StringUtils.isBlank( strRoleKey ) )
366 {
367 strRoleKey = ROLE_NONE;
368 }
369
370
371 item.setRole( strRoleKey );
372
373
374 item.setDate( record.getDateCreation( ) );
375
376 UrlItem url = new UrlItem( SolrIndexerService.getBaseUrl( ) );
377 url.addParameter( XPageAppService.PARAM_XPAGE_APP, DIRECTORY );
378 url.addParameter( PARAMETER_ID_DIRECTORY_RECORD, record.getIdRecord( ) );
379 url.addParameter( PARAMETER_VIEW_DIRECTORY_RECORD, "" );
380
381 item.setUrl( url.getUrl( ) );
382
383
384
385
386
387 item.setUid( getResourceUid( Integer.toString( record.getIdRecord( ) ),
388 DirectoryIndexerUtils.CONSTANT_TYPE_RESOURCE ) );
389
390
391 item.setType( DIRECTORY );
392
393
394 item.setSite( SolrIndexerService.getWebAppName( ) );
395
396 return item;
397 }
398
399
400
401
402
403
404
405
406 private String getContentToIndex( Record record, List<IEntry> listEntry, Plugin plugin )
407 {
408 List<Integer> listIdEntry = new ArrayList<Integer>( listEntry.size( ) );
409
410 for ( IEntry entry : listEntry )
411 {
412 listIdEntry.add( entry.getIdEntry( ) );
413 }
414
415 StringBuffer sb = new StringBuffer( );
416
417 List<RecordField> listField = RecordFieldHome.getRecordFieldSpecificList( listIdEntry, record.getIdRecord( ),
418 plugin, DirectoryUtils.getMapFieldsOfListEntry( listEntry, plugin ) );
419
420 for ( RecordField field : listField )
421 {
422 sb.append( RecordFieldHome.findByPrimaryKey( field.getIdRecordField( ), plugin ).getValue( ) );
423 sb.append( " " );
424 }
425
426 return sb.toString( );
427 }
428
429
430
431
432 public List<String> getResourcesName( )
433 {
434 return LIST_RESSOURCES_NAME;
435 }
436
437
438
439
440 public String getResourceUid( String strResourceId, String strResourceType )
441 {
442 StringBuffer sb = new StringBuffer( strResourceId );
443 sb.append( SolrConstants.CONSTANT_UNDERSCORE ).append( SHORT_NAME );
444
445 return sb.toString( );
446 }
447 }