1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.newsletter.util;
35
36 import fr.paris.lutece.portal.service.util.AppLogService;
37 import fr.paris.lutece.portal.service.util.AppPropertiesService;
38
39 import java.io.ByteArrayInputStream;
40 import java.io.StringWriter;
41 import java.io.UnsupportedEncodingException;
42
43 import javax.xml.transform.Transformer;
44 import javax.xml.transform.TransformerConfigurationException;
45 import javax.xml.transform.TransformerException;
46 import javax.xml.transform.TransformerFactory;
47 import javax.xml.transform.dom.DOMSource;
48 import javax.xml.transform.stream.StreamResult;
49
50 import org.w3c.dom.Document;
51 import org.w3c.dom.NamedNodeMap;
52 import org.w3c.dom.Node;
53 import org.w3c.dom.NodeList;
54 import org.w3c.tidy.Tidy;
55
56
57
58
59 public class HtmlDomDocNewsletter
60 {
61 public static final String CONSTANT_STATIC_URL = "https?://[^/]+/";
62 public static final String CONSTANT_PROTOCOL_DELIMITER = ":";
63
64
65
66
67
68
69
70
71
72
73 public static final ElementUrl ELEMENT_CSS;
74
75
76
77
78
79
80
81
82
83 public static final ElementUrl ELEMENT_JAVASCRIPT;
84
85
86
87
88
89
90
91
92 public static final ElementUrl ELEMENT_IMG;
93
94
95
96
97
98
99
100
101 public static final ElementUrl ELEMENT_A;
102
103
104
105
106
107
108
109
110 public static final ElementUrl ELEMENT_FORM;
111
112 private static final String PROPERTY_LUTECE_ENCODING = "lutece.encoding";
113
114 static
115 {
116 ELEMENT_CSS = new ElementUrl( "link", "href", "rel", "stylesheet" );
117 ELEMENT_JAVASCRIPT = new ElementUrl( "script", "src", "type", "text/javascript" );
118 ELEMENT_IMG = new ElementUrl( "img", "src", null, null );
119 ELEMENT_A = new ElementUrl( "a", "href", null, null );
120 ELEMENT_FORM = new ElementUrl( "form", "action", null, null );
121 }
122
123 private Document _content;
124 private String _strBaseUrl;
125
126
127
128
129
130
131
132
133
134 public HtmlDomDocNewsletter( String strHtml, String strBaseUrl )
135 {
136
137 Tidy tidy = new Tidy( );
138 tidy.setQuiet( true );
139 tidy.setShowWarnings( false );
140
141 String strEncoding = null;
142
143 try
144 {
145 strEncoding = AppPropertiesService.getProperty( PROPERTY_LUTECE_ENCODING );
146 tidy.setInputEncoding( strEncoding );
147 _content = tidy.parseDOM( new ByteArrayInputStream( strHtml.getBytes( strEncoding ) ), null );
148 }
149 catch( UnsupportedEncodingException e )
150 {
151 AppLogService.error( "Error when parsing Html document (Newsletter) : UnsupporterEncodingException (" + strEncoding + ")", e );
152 }
153
154 _strBaseUrl = ( strBaseUrl == null ) ? "" : strBaseUrl;
155 }
156
157
158
159
160
161
162
163 public void convertAllRelativesUrls( ElementUrl elementType )
164 {
165 NodeList nodes = getDomDocument( ).getElementsByTagName( elementType.getTagName( ) );
166
167 for ( int i = 0; i < nodes.getLength( ); i++ )
168 {
169 Node node = nodes.item( i );
170 NamedNodeMap attributes = node.getAttributes( );
171
172
173 if ( elementType.getTestedAttributeName( ) != null )
174 {
175 String strRel = attributes.getNamedItem( elementType.getTestedAttributeName( ) ).getNodeValue( );
176
177 if ( !elementType.getTestedAttributeValue( ).equals( strRel ) )
178 {
179 continue;
180 }
181 }
182
183
184 Node nodeAttribute = attributes.getNamedItem( elementType.getAttributeName( ) );
185
186 if ( nodeAttribute != null )
187 {
188 String strSrc = nodeAttribute.getNodeValue( );
189
190 if ( !strSrc.matches( CONSTANT_STATIC_URL ) && !strSrc.contains( CONSTANT_PROTOCOL_DELIMITER ) )
191 {
192 nodeAttribute.setNodeValue( getBaseUrl( ) + strSrc );
193 }
194 }
195 }
196 }
197
198
199
200
201
202
203 public String getContent( )
204 {
205 DOMSource domSource = new DOMSource( _content );
206 StringWriter writer = new StringWriter( );
207 StreamResult result = new StreamResult( writer );
208 TransformerFactory tf = TransformerFactory.newInstance( );
209 Transformer transformer;
210
211 try
212 {
213 transformer = tf.newTransformer( );
214 transformer.transform( domSource, result );
215 }
216 catch( TransformerConfigurationException e )
217 {
218 AppLogService.error( e.getMessage( ) );
219
220 return null;
221 }
222 catch( TransformerException e )
223 {
224 AppLogService.error( e.getMessage( ) );
225
226 return null;
227 }
228
229 String stringResult = writer.toString( );
230
231 return stringResult;
232 }
233
234
235
236
237
238
239 protected org.w3c.dom.Document getDomDocument( )
240 {
241 return _content;
242 }
243
244
245
246
247
248
249 protected String getBaseUrl( )
250 {
251 return _strBaseUrl;
252 }
253
254
255
256
257 protected static class ElementUrl
258 {
259 private String _strTagName;
260 private String _strAttributeName;
261 private String _strTestedAttributeName;
262 private String _strTestedAttributeValue;
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277 public ElementUrl( String strTagName, String strAttributeName, String strTestedAttributeName, String strTestedAttributeValue )
278 {
279 _strTagName = strTagName;
280 _strAttributeName = strAttributeName;
281 _strTestedAttributeName = strTestedAttributeName;
282 _strTestedAttributeValue = strTestedAttributeValue;
283 }
284
285
286
287
288
289
290 public String getAttributeName( )
291 {
292 return _strAttributeName;
293 }
294
295
296
297
298
299
300 public String getTagName( )
301 {
302 return _strTagName;
303 }
304
305
306
307
308
309
310 public String getTestedAttributeName( )
311 {
312 return _strTestedAttributeName;
313 }
314
315
316
317
318
319
320 public String getTestedAttributeValue( )
321 {
322 return _strTestedAttributeValue;
323 }
324 }
325 }