1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.portal.service.search;
35
36 import fr.paris.lutece.portal.business.page.Page;
37 import fr.paris.lutece.portal.business.page.PageHome;
38 import fr.paris.lutece.portal.service.message.SiteMessageException;
39 import fr.paris.lutece.portal.service.page.IPageService;
40 import fr.paris.lutece.portal.service.spring.SpringContextService;
41 import fr.paris.lutece.portal.service.util.AppException;
42 import fr.paris.lutece.portal.service.util.AppPropertiesService;
43 import fr.paris.lutece.util.url.UrlItem;
44
45 import org.apache.commons.lang3.StringUtils;
46
47 import org.apache.lucene.document.DateTools;
48 import org.apache.lucene.document.Document;
49 import org.apache.lucene.document.Field;
50 import org.apache.lucene.document.FieldType;
51 import org.apache.lucene.document.StoredField;
52 import org.apache.lucene.document.StringField;
53 import org.apache.lucene.document.TextField;
54
55 import org.apache.tika.exception.TikaException;
56 import org.apache.tika.metadata.Metadata;
57 import org.apache.tika.parser.ParseContext;
58 import org.apache.tika.parser.html.HtmlParser;
59 import org.apache.tika.sax.BodyContentHandler;
60
61 import org.xml.sax.ContentHandler;
62 import org.xml.sax.SAXException;
63
64 import java.io.ByteArrayInputStream;
65 import java.io.IOException;
66
67 import java.util.ArrayList;
68 import java.util.List;
69
70
71
72
73 public class PageIndexer implements SearchIndexer
74 {
75 public static final String INDEX_TYPE_PAGE = "Page";
76 public static final String INDEXER_NAME = "PageIndexer";
77 protected static final String PROPERTY_PAGE_BASE_URL = "search.pageIndexer.baseUrl";
78 protected static final String PROPERTY_SEARCH_PAGE_URL = "search.pageSearch.baseUrl";
79 protected static final String PROPERTY_INDEXER_ENABLE = "search.pageIndexer.enable";
80 protected static final String PARAMETER_PAGE_ID = "page_id";
81 private static IPageService _pageService = SpringContextService.getBean( "pageService" );
82 private static final String INDEXER_DESCRIPTION = "Indexer service for pages";
83 private static final String INDEXER_VERSION = "1.0.0";
84
85
86
87
88 @Override
89 public void indexDocuments( ) throws IOException, InterruptedException, SiteMessageException
90 {
91 String strPageBaseUrl = AppPropertiesService.getProperty( PROPERTY_PAGE_BASE_URL );
92 List<Page> listPages = PageHome.getAllPages( );
93
94 for ( Page page : listPages )
95 {
96 UrlItem/url/UrlItem.html#UrlItem">UrlItem url = new UrlItem( strPageBaseUrl );
97 url.addParameter( PARAMETER_PAGE_ID, page.getId( ) );
98
99 Document doc = null;
100
101 try
102 {
103 doc = getDocument( page, url.getUrl( ) );
104 }
105 catch( Exception e )
106 {
107 String strMessage = "Page ID : " + page.getId( );
108 IndexationService.error( this, e, strMessage );
109 }
110
111 if ( doc != null )
112 {
113 IndexationService.write( doc );
114 }
115 }
116 }
117
118
119
120
121 @Override
122 public List<Document> getDocuments( String nIdDocument ) throws IOException, InterruptedException, SiteMessageException
123 {
124 ArrayList<Document> listDocuments = new ArrayList<>( );
125 String strPageBaseUrl = AppPropertiesService.getProperty( PROPERTY_PAGE_BASE_URL );
126
127 Page page = PageHome.getPage( Integer.parseInt( nIdDocument ) );
128
129 if ( ( page != null ) && ( page.getId( ) != 0 ) )
130 {
131 UrlItem/url/UrlItem.html#UrlItem">UrlItem url = new UrlItem( strPageBaseUrl );
132 url.addParameter( PARAMETER_PAGE_ID, page.getId( ) );
133
134 Document doc = getDocument( page, url.getUrl( ) );
135 listDocuments.add( doc );
136 }
137
138 return listDocuments;
139 }
140
141
142
143
144 @Override
145 public String getName( )
146 {
147 return INDEXER_NAME;
148 }
149
150
151
152
153 @Override
154 public String getVersion( )
155 {
156 return INDEXER_VERSION;
157 }
158
159
160
161
162 @Override
163 public String getDescription( )
164 {
165 return INDEXER_DESCRIPTION;
166 }
167
168
169
170
171 @Override
172 public boolean isEnable( )
173 {
174 String strEnable = AppPropertiesService.getProperty( PROPERTY_INDEXER_ENABLE, Boolean.TRUE.toString( ) );
175
176 return ( strEnable.equalsIgnoreCase( Boolean.TRUE.toString( ) ) );
177 }
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195 protected Document getDocument( Page page, String strUrl ) throws IOException, InterruptedException, SiteMessageException
196 {
197 FieldType ft = new FieldType( StringField.TYPE_STORED );
198 ft.setOmitNorms( false );
199
200 FieldType ftNotStored = new FieldType( StringField.TYPE_NOT_STORED );
201 ftNotStored.setOmitNorms( false );
202 ftNotStored.setTokenized( false );
203
204
205 Document doc = new Document( );
206
207
208
209 doc.add( new Field( SearchItem.FIELD_URL, strUrl, ft ) );
210
211
212
213
214 String strDate = DateTools.dateToString( page.getDateUpdate( ), DateTools.Resolution.DAY );
215 doc.add( new Field( SearchItem.FIELD_DATE, strDate, ft ) );
216
217
218
219
220 String strIdPage = String.valueOf( page.getId( ) );
221 doc.add( new Field( SearchItem.FIELD_UID, strIdPage, ftNotStored ) );
222
223 String strPageContent = _pageService.getPageContent( page.getId( ), 0, null );
224 ContentHandler handler = new BodyContentHandler( );
225 Metadata metadata = new Metadata( );
226
227 try
228 {
229 new HtmlParser( ).parse( new ByteArrayInputStream( strPageContent.getBytes( ) ), handler, metadata, new ParseContext( ) );
230 }
231 catch( TikaException | SAXException e )
232 {
233 throw new AppException( "Error during page parsing." );
234 }
235
236
237
238 StringBuilder sb = new StringBuilder( handler.toString( ) );
239
240
241
242 StringBuilder sbFieldContent = new StringBuilder( );
243 StringBuilder sbFieldMetadata = new StringBuilder( );
244 sbFieldContent.append( page.getName( ) ).append( " " ).append( sb.toString( ) );
245
246
247 if ( page.getDescription( ) != null )
248 {
249 sbFieldContent.append( " " ).append( page.getDescription( ) );
250 }
251
252
253 String strMetaKeywords = page.getMetaKeywords( );
254
255 if ( StringUtils.isNotBlank( strMetaKeywords ) )
256 {
257 sbFieldContent.append( " " ).append( strMetaKeywords );
258 sbFieldMetadata.append( strMetaKeywords );
259 }
260
261 doc.add( new Field( SearchItem.FIELD_CONTENTS, sbFieldContent.toString( ), TextField.TYPE_NOT_STORED ) );
262
263 if ( StringUtils.isNotBlank( page.getMetaDescription( ) ) )
264 {
265 if ( sbFieldMetadata.length( ) > 0 )
266 {
267 sbFieldMetadata.append( " " );
268 }
269
270 sbFieldMetadata.append( page.getMetaDescription( ) );
271 }
272
273 if ( sbFieldMetadata.length( ) > 0 )
274 {
275 doc.add( new Field( SearchItem.FIELD_METADATA, sbFieldMetadata.toString( ), TextField.TYPE_NOT_STORED ) );
276 }
277
278
279
280 doc.add( new Field( SearchItem.FIELD_TITLE, page.getName( ), ft ) );
281
282 if ( StringUtils.isNotBlank( page.getDescription( ) ) )
283 {
284
285
286 doc.add( new StoredField( SearchItem.FIELD_SUMMARY, page.getDescription( ) ) );
287 }
288
289 doc.add( new Field( SearchItem.FIELD_TYPE, INDEX_TYPE_PAGE, ft ) );
290 doc.add( new Field( SearchItem.FIELD_ROLE, page.getRole( ), ft ) );
291
292
293 return doc;
294 }
295
296
297
298
299 @Override
300 public List<String> getListType( )
301 {
302 List<String> listType = new ArrayList<>( );
303 listType.add( INDEX_TYPE_PAGE );
304
305 return listType;
306 }
307
308
309
310
311 @Override
312 public String getSpecificSearchAppUrl( )
313 {
314 return AppPropertiesService.getProperty( PROPERTY_SEARCH_PAGE_URL );
315 }
316 }