View Javadoc
1   /*
2    * Copyright (c) 2002-2025, City of Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.util.xml;
35  
36  import fr.paris.lutece.portal.service.util.AppLogService;
37  import fr.paris.lutece.portal.service.util.AppPropertiesService;
38  
39  import java.io.StringReader;
40  import java.io.StringWriter;
41  
42  import java.util.ArrayList;
43  import java.util.List;
44  import java.util.Map;
45  import java.util.Map.Entry;
46  import java.util.Properties;
47  import java.util.concurrent.ConcurrentHashMap;
48  import java.util.concurrent.ConcurrentMap;
49  
50  import javax.xml.XMLConstants;
51  import javax.xml.transform.Result;
52  import javax.xml.transform.Source;
53  import javax.xml.transform.Templates;
54  import javax.xml.transform.Transformer;
55  import javax.xml.transform.TransformerConfigurationException;
56  import javax.xml.transform.TransformerException;
57  import javax.xml.transform.TransformerFactory;
58  import javax.xml.transform.TransformerFactoryConfigurationError;
59  import javax.xml.transform.stream.StreamResult;
60  import javax.xml.transform.stream.StreamSource;
61  
62  /**
63   * This class provides methods to transform XML documents using XSLT with cache
64   */
65  public final class XmlTransformer
66  {
67      private static final String ERROR_MESSAGE_XLST = "Error transforming document XSLT : ";
68      public static final String PROPERTY_TRANSFORMER_POOL_SIZE = "service.xmlTransformer.transformerPoolSize";
69      public static final int TRANSFORMER_POOL_SIZE = AppPropertiesService.getPropertyInt( PROPERTY_TRANSFORMER_POOL_SIZE, 2 );
70      public static final int MAX_TRANSFORMER_SIZE = 1000;
71      private static final List<ConcurrentMap<String, Templates>> transformersPoolList = new ArrayList<>( TRANSFORMER_POOL_SIZE );
72  
73      static
74      {
75          for ( int i = 0; i < TRANSFORMER_POOL_SIZE; i++ )
76          {
77              transformersPoolList.add( new ConcurrentHashMap<String, Templates>( MAX_TRANSFORMER_SIZE ) );
78          }
79      }
80  
81      private static final String ORACLE_ENABLE_EXTENSION_FUNCTIONS = "http://www.oracle.com/xml/jaxp/properties/enableExtensionFunctions";
82      private static final String SAXON_ALLOW_EXTERNAL_FUNCTIONS = "http://saxon.sf.net/feature/allow-external-functions";
83  
84      /**
85       * Creates a TransformerFactory hardened against XSLT-based RCE, XXE and SSRF attacks.
86       *
87       * The following security locks are applied:
88       * <ul>
89       *   <li>FEATURE_SECURE_PROCESSING — enables the JDK secure-processing mode</li>
90       *   <li>enableExtensionFunctions=false — blocks calls to Java classes from XSLT</li>
91       *   <li>ACCESS_EXTERNAL_DTD="" — prevents loading of external DTDs (JDK Xalan only)</li>
92       *   <li>ACCESS_EXTERNAL_STYLESHEET="" — blocks document(), xsl:include, xsl:import to external URIs (JDK Xalan only)</li>
93       * </ul>
94       * A restrictive URIResolver is set as an additional defense-in-depth layer.
95       * The method adapts to the underlying XSLT processor (JDK Xalan or Saxon) automatically.
96       *
97       * @return a security-hardened TransformerFactory
98       * @throws TransformerConfigurationException if a critical security feature is not supported
99       */
100     private TransformerFactory createSecureTransformerFactory( ) throws TransformerConfigurationException
101     {
102         TransformerFactory tf = TransformerFactory.newInstance( );
103         tf.setFeature( XMLConstants.FEATURE_SECURE_PROCESSING, true );
104         disableExtensionFunctions( tf );
105         setExternalAccessRestrictions( tf );
106         tf.setURIResolver( ( href, base ) -> {
107             throw new TransformerException( "External URI resolution blocked: " + href );
108         } );
109         return tf;
110     }
111 
112     /**
113      * Disables XSLT extension functions on the given TransformerFactory.
114      * Tries the Oracle/JDK property name first (for the built-in Xalan XSLTC processor),
115      * then the Saxon-specific property. At least one must succeed.
116      *
117      * @param tf the TransformerFactory to configure
118      * @throws TransformerConfigurationException if extension functions could not be disabled
119      */
120     private void disableExtensionFunctions( TransformerFactory tf ) throws TransformerConfigurationException
121     {
122         boolean bDisabled = false;
123 
124         try
125         {
126             tf.setFeature( ORACLE_ENABLE_EXTENSION_FUNCTIONS, false );
127             bDisabled = true;
128         }
129         catch( TransformerConfigurationException e )
130         {
131             AppLogService.debug( "Oracle enableExtensionFunctions not supported, trying Saxon property" );
132         }
133 
134         if ( !bDisabled )
135         {
136             try
137             {
138                 tf.setFeature( SAXON_ALLOW_EXTERNAL_FUNCTIONS, false );
139                 bDisabled = true;
140             }
141             catch( TransformerConfigurationException e )
142             {
143                 AppLogService.debug( "Saxon allow-external-functions not supported either" );
144             }
145         }
146 
147         if ( !bDisabled )
148         {
149             throw new TransformerConfigurationException(
150                     "Failed to disable XSLT extension functions: neither Oracle/JDK nor Saxon property is supported by " + tf.getClass( ).getName( ) );
151         }
152     }
153 
154     /**
155      * Restricts external DTD and stylesheet access on the given TransformerFactory.
156      * These JAXP attributes are supported by the JDK built-in Xalan processor but not by Saxon,
157      * which relies on FEATURE_SECURE_PROCESSING instead. Failures are logged but not fatal.
158      *
159      * @param tf the TransformerFactory to configure
160      */
161     private void setExternalAccessRestrictions( TransformerFactory tf )
162     {
163         try
164         {
165             tf.setAttribute( XMLConstants.ACCESS_EXTERNAL_DTD, "" );
166         }
167         catch( IllegalArgumentException e )
168         {
169             AppLogService.debug( "ACCESS_EXTERNAL_DTD not supported by {}", tf.getClass( ).getName( ) );
170         }
171 
172         try
173         {
174             tf.setAttribute( XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "" );
175         }
176         catch( IllegalArgumentException e )
177         {
178             AppLogService.debug( "ACCESS_EXTERNAL_STYLESHEET not supported by {}", tf.getClass( ).getName( ) );
179         }
180     }
181 
182     /**
183      * This method try to get a templates instance from cache or create a new one if can't.
184      *
185      * Previously (before 6.0.0) it returned directly a transformer, now it returns a templates which can create transformers cheaply.
186      * 
187      * @param stylesheet
188      *            The XML document content
189      * @param strStyleSheetId
190      *            The StyleSheet Id
191      * @return XmlTransformer object
192      * @throws TransformerException
193      */
194     private Templates getTemplates( Source stylesheet, String strStyleSheetId ) throws TransformerException
195     {
196         Templates result = null;
197 
198         if ( TRANSFORMER_POOL_SIZE > 0 )
199         {
200             int nTransformerListIndex = 0;
201 
202             do
203             {
204                 result = transformersPoolList.get( nTransformerListIndex ).remove( strStyleSheetId );
205                 nTransformerListIndex++;
206             }
207             while ( ( result == null ) && ( nTransformerListIndex < TRANSFORMER_POOL_SIZE ) );
208         }
209 
210         if ( result == null )
211         {
212             // only one thread can use transformer
213             try
214             {
215                 result = createSecureTransformerFactory( ).newTemplates( stylesheet );
216                 AppLogService.debug( " --  XML Templates instantiation : strStyleSheetId= {}", strStyleSheetId );
217             }
218             catch( TransformerConfigurationException e )
219             {
220                 String strMessage = e.getMessage( );
221 
222                 if ( e.getLocationAsString( ) != null )
223                 {
224                     strMessage += ( "- location : " + e.getLocationAsString( ) );
225                 }
226 
227                 throw new TransformerException( ERROR_MESSAGE_XLST + strMessage, e.getCause( ) );
228             }
229             catch( TransformerFactoryConfigurationError e )
230             {
231                 throw new TransformerException( ERROR_MESSAGE_XLST + e.getMessage( ), e );
232             }
233         }
234 
235         return result;
236     }
237 
238     /**
239      * Remove all Templates instance from cache. Previously (before 6.0.0) the cache stored transformers, now it stores templates.
240      */
241     public static void cleanTransformerList( )
242     {
243         for ( ConcurrentMap<String, Templates> transformerList : transformersPoolList )
244         {
245             transformerList.clear( );
246         }
247     }
248 
249     /**
250      * Gets the number of templates. Previously (before 6.0.0) the cache stored transformers, now it stores templates.
251      * 
252      * @return the transformers count
253      */
254     public static int getTransformersCount( )
255     {
256         int nCount = 0;
257 
258         for ( ConcurrentMap<String, Templates> transformerList : transformersPoolList )
259         {
260             nCount += transformerList.size( );
261         }
262 
263         return nCount;
264     }
265 
266     /**
267      * Release Transformer instance in cache. Previously (before 6.0.0) the cache stored transformers, now it stores templates.
268      * 
269      * @param templates
270      *            The XML templates
271      * @param strStyleSheetId
272      *            The StyleSheet Id
273      */
274     private void releaseTemplates( Templates templates, String strStyleSheetId )
275     {
276         if ( TRANSFORMER_POOL_SIZE > 0 )
277         {
278             Templates result = null;
279             ConcurrentMap<String, Templates> transformerList = null;
280             int nTransformerListIndex = 0;
281 
282             do
283             {
284                 transformerList = transformersPoolList.get( nTransformerListIndex );
285                 nTransformerListIndex++;
286 
287                 // This set of action is not performed atomically but it can not cause problems
288                 if ( transformerList.size( ) < MAX_TRANSFORMER_SIZE )
289                 {
290                     result = transformerList.putIfAbsent( strStyleSheetId, templates );
291                 }
292                 else
293                 {
294                     // Aggressive release ( speed up GC )
295                     transformerList.clear( );
296 
297                     AppLogService.info( "XmlTransformer : cache is full, you may need to increase cache size." );
298                 }
299             }
300             while ( ( result != null ) && ( nTransformerListIndex < TRANSFORMER_POOL_SIZE ) );
301         }
302     }
303 
304     /**
305      * Transform XML documents using XSLT with cache
306      * 
307      * @param source
308      *            The XML document content
309      * @param stylesheet
310      *            The XSL source
311      * @param strStyleSheetId
312      *            The StyleSheet Id
313      * @param params
314      *            Parameters that can be used by the XSL StyleSheet
315      * @param outputProperties
316      *            Properties to use for the XSL transform. Will overload the XSL output definition.
317      * @return The output document
318      * @throws TransformerException
319      *             The exception
320      */
321     public String transform( Source source, Source stylesheet, String strStyleSheetId, Map<String, String> params, Properties outputProperties )
322             throws TransformerException
323     {
324         Templates templates = this.getTemplates( stylesheet, strStyleSheetId );
325         Transformer transformer = templates.newTransformer( );
326         // SECURITY: must return a non-null Source, not throw. XSLTC's LoadDocument silently catches
327         // TransformerException and falls back to direct URL loading when source is null.
328         transformer.setURIResolver( ( href, base ) -> {
329             AppLogService.error( "XSLT security: blocked document() call to external URI: {}", href );
330             return new StreamSource( new StringReader( "<blocked/>" ) );
331         } );
332 
333         if ( outputProperties != null )
334         {
335             transformer.setOutputProperties( outputProperties );
336         }
337 
338         if ( params != null )
339         {
340             transformer.clearParameters( );
341 
342             for ( Entry<String, String> entry : params.entrySet( ) )
343             {
344                 transformer.setParameter( entry.getKey( ), entry.getValue( ) );
345             }
346         }
347 
348         StringWriter sw = new StringWriter( );
349         Result result = new StreamResult( sw );
350 
351         try
352         {
353             transformer.transform( source, result );
354         }
355         catch( TransformerException e )
356         {
357             String strMessage = "strStyleSheetId = " + strStyleSheetId + " " + e.getMessage( );
358 
359             if ( e.getLocationAsString( ) != null )
360             {
361                 strMessage += ( " - location : " + e.getLocationAsString( ) );
362             }
363 
364             throw new TransformerException( ERROR_MESSAGE_XLST + strMessage, e.getCause( ) );
365         }
366         finally
367         {
368             this.releaseTemplates( templates, strStyleSheetId );
369         }
370 
371         return sw.toString( );
372     }
373 }