View Javadoc
1   /*
2    * Copyright (c) 2002-2019, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.plugins.ocra2ia.service;
35  
36  import java.awt.image.BufferedImage;
37  import java.io.ByteArrayOutputStream;
38  import java.io.IOException;
39  import java.nio.file.Files;
40  import java.nio.file.Path;
41  import java.nio.file.Paths;
42  import java.util.Arrays;
43  import java.util.Date;
44  import java.util.HashMap;
45  import java.util.Locale;
46  import java.util.Map;
47  import java.util.concurrent.ExecutorService;
48  import java.util.concurrent.Executors;
49  import java.util.concurrent.TimeUnit;
50  
51  import javax.annotation.PostConstruct;
52  
53  import org.apache.commons.lang.StringUtils;
54  import org.apache.commons.lang3.ArrayUtils;
55  import org.apache.commons.lang3.RandomStringUtils;
56  import org.apache.pdfbox.pdmodel.PDDocument;
57  import org.apache.pdfbox.rendering.ImageType;
58  import org.apache.pdfbox.rendering.PDFRenderer;
59  import org.apache.pdfbox.tools.imageio.ImageIOUtil;
60  
61  import com.jacob.activeX.ActiveXComponent;
62  import com.jacob.com.Dispatch;
63  import com.jacob.com.SafeArray;
64  import com.jacob.com.Variant;
65  
66  import fr.paris.lutece.plugins.ocra2ia.exception.OcrException;
67  import fr.paris.lutece.plugins.ocra2ia.util.OcrConstants;
68  import fr.paris.lutece.plugins.ocra2ia.util.OcrResultUtils;
69  import fr.paris.lutece.portal.service.i18n.I18nService;
70  import fr.paris.lutece.portal.service.util.AppLogService;
71  import fr.paris.lutece.portal.service.util.AppPropertiesService;
72  
73  /**
74   *
75   * Ocr Service
76   *
77   */
78  public class OcrService
79  {
80  
81      /**
82       * clsid active x A2IA.
83       */
84      private String _strClsid;
85  
86      /**
87       * Load DLL Jacob and _dispatchA2iAObj.
88       */
89      @PostConstruct
90      public void init( )
91      {
92          try
93          {
94              String folder = AppPropertiesService.getProperty( OcrConstants.PROPERTY_FOLDER_DLL_JACOB );
95              // Load Jacob dll
96              System.load( folder + OcrConstants.JACOB_DLL64_FILE );
97  
98              // Laod A2ia ActiveX component with clsid
99              _strClsid = "clsid:{" + AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_CLSID ) + "}";
100 
101         }
102         catch( UnsatisfiedLinkError e )
103         {
104             AppLogService.error( "Native code Jacob library failed to load.\n" + e );
105         }
106 
107         AppLogService.info( "init OCR service done." );
108     }
109 
110     /**
111      * Perform OCR with A2iA.
112      *
113      * @param bytefileContent
114      *            file to read
115      * @param strFileExtension
116      *            image extension
117      * @param strDocumentType
118      *            document type : values allowed : Rib, TaxAssessment,Identity
119      * @return Map result of OCR
120      * @throws OcrException
121      *             the OcrException
122      *
123      */
124     public synchronized Map<String, String> proceed( byte [ ] bytefileContent, String strFileExtension, String strDocumentType ) throws OcrException
125     {
126         /**
127          * Jacob Object to wrap A2ia component.
128          */
129         // Init COM A2IA COM Object
130         ActiveXComponent comp = new ActiveXComponent( _strClsid );
131         Dispatch _dispatchA2iAObj = comp.getObject( );
132 
133         if ( StringUtils.isEmpty( _strClsid ) )
134         {
135             AppLogService.error( "Bad initialisation of OCR Service." );
136             throw new OcrException( OcrConstants.MESSAGE_INIT_ERROR );
137         }
138 
139         if ( ArrayUtils.isEmpty( bytefileContent ) || StringUtils.isEmpty( strFileExtension ) || StringUtils.isEmpty( strDocumentType ) )
140         {
141             throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_PARAMETER_MANDATORY, Locale.getDefault( ) ) );
142 
143         }
144 
145         ImageBean imageBean = setValueImageExtensionAndContent( strFileExtension, bytefileContent );
146         String strModeOcr = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_MODE_OCR, OcrConstants.OCR_MODE_MEMORY );
147         if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
148         {
149             writeImageFile( imageBean );
150         }
151 
152         Map<String, String> mapOcrServiceResults = performOcr( _dispatchA2iAObj, imageBean, strDocumentType, strModeOcr );
153 
154         mapOcrServiceResults.values( ).removeIf( StringUtils::isBlank );
155         boolean bRetry = AppPropertiesService.getPropertyBoolean( OcrConstants.PROPERTY_PDF_IMAGE_RETRY, false ) && mapOcrServiceResults.isEmpty( )
156                 && OcrConstants.EXTENSION_FILE_PDF.equalsIgnoreCase( strFileExtension );
157         if ( bRetry )
158         {
159             AppLogService.info( "the retry mechanism will be launched" );
160             try
161             {
162                 ImageBean retryImageBean = new ImageBean( );
163                 retryImageBean.setContent( transformPdfToImage( bytefileContent, OcrConstants.EXTENSION_FILE_JPEG, true ) );
164                 retryImageBean.setExtension( OcrConstants.EXTENSION_FILE_JPEG );
165                 if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
166                 {
167                     writeImageFile( retryImageBean );
168                 }
169                 mapOcrServiceResults = performOcr( _dispatchA2iAObj, retryImageBean, strDocumentType, strModeOcr );
170             }
171             catch( IOException e )
172             {
173                 AppLogService.error( e.getMessage( ) );
174             }
175 
176         }
177 
178         return mapOcrServiceResults;
179     }
180 
181     /**
182      * Launch OCR and get results.
183      *
184      * @param dispatchA2iAObj
185      *            dispatchA2iAObj
186      * @param imageBean
187      *            imageBean object
188      * @param strDocumentType
189      *            document type
190      * @param strModeOcr
191      *            ocr mode
192      * @return Map result of OCR
193      * @throws OcrException
194      *             the OcrException
195      */
196     private Map<String, String> performOcr( Dispatch dispatchA2iAObj, ImageBean imageBean, String strDocumentType, String strModeOcr ) throws OcrException
197     {
198 
199         Variant variantChannelId = null;
200         Variant variantRequestId = null;
201 
202         Map<String, String> mapOcrServiceResults = new HashMap<>( );
203 
204         try
205         {
206             AppLogService.info( "openChannelA2ia begin" );
207             variantChannelId = openChannelA2ia( dispatchA2iAObj );
208             AppLogService.info( "openChannelA2ia end" );
209             variantRequestId = openRequestA2ia( imageBean, strDocumentType, new Long( variantChannelId.toString( ) ), dispatchA2iAObj, strModeOcr );
210             AppLogService.info( "openRequestA2ia end" );
211             // run A2IA OCR engine to get result
212             AppLogService.info( "Call a2ia engine begin" );
213             Variant variantResultId = Dispatch.call( dispatchA2iAObj, "ScrGetResult", variantChannelId, variantRequestId, 60000L );
214             mapOcrServiceResults = OcrResultUtils.getOcrResults( strDocumentType, dispatchA2iAObj, variantResultId );
215             AppLogService.info( "Call a2ia engine end" );
216 
217         }
218         catch( Exception e )
219         {
220             AppLogService.error( e.getMessage( ), e );
221             throw new OcrException( e.getMessage( ) );
222         }
223         finally
224         {
225             if ( variantRequestId != null )
226             {
227                 Dispatch.call( dispatchA2iAObj, "ScrCloseRequest", new Long( variantRequestId.toString( ) ) );
228             }
229             if ( variantChannelId != null )
230             {
231                 Dispatch.call( dispatchA2iAObj, "ScrCloseChannel", new Long( variantChannelId.toString( ) ) );
232             }
233 
234             if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
235             {
236                 // delete tempory file.
237                 try
238                 {
239                     Files.delete( Paths.get( imageBean.getImagePath( ) ) );
240                     AppLogService.info( "Delete file : " + imageBean.getImagePath( ) + " done." );
241                 }
242                 catch( IOException e )
243                 {
244                     throw new OcrException( e.getMessage( ) );
245                 }
246             }
247 
248         }
249 
250         return mapOcrServiceResults;
251 
252     }
253 
254     /**
255      * Open a channel communication with A2ia.
256      * 
257      * @param _dispatchA2iAObj
258      *
259      * @return id of the channel
260      */
261     private Variant openChannelA2ia( Dispatch _dispatchA2iAObj )
262     {
263 
264         Dispatch.call( _dispatchA2iAObj, "ScrInit", "" );
265 
266         // Init Param
267         Variant variantResChannelParamId = Dispatch.call( _dispatchA2iAObj, "ScrCreateChannelParam" );
268         Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].cpuServer",
269                 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_SERVER_HOST, "" ) );
270         Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].portServer",
271                 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_SERVER_PORT, "" ) );
272         Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].paramdir",
273                 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_PARAM_DIR ) );
274 
275         // Open channel
276         Variant variantResChannelId = Dispatch.call( _dispatchA2iAObj, "ScrOpenChannelExt", new Long( variantResChannelParamId.toString( ) ), 10000L );
277 
278         return variantResChannelId;
279     }
280 
281     /**
282      * Open a request with A2ia.
283      *
284      * @param imageBean
285      *            imageBean object
286      * @param strFileExtension
287      *            image extension
288      * @param strDocumentType
289      *            document type
290      * @param lChannelId
291      *            id of the channel communication
292      * @param _dispatchA2iAObj
293      * @return id of the request
294      * @throws OcrException
295      *             the OcrException
296      */
297     private Variant openRequestA2ia( ImageBean imageBean, String strDocumentType, Long lChannelId, Dispatch _dispatchA2iAObj, String strModeOcr )
298             throws OcrException
299     {
300 
301         // Open Tbl doc
302         Variant variantTblId = Dispatch.call( _dispatchA2iAObj, "ScrOpenDocumentTable", getTblDocumentPath( strDocumentType ) );
303         Variant variantDefaultDocId = Dispatch.call( _dispatchA2iAObj, "ScrGetDefaultDocument", new Long( variantTblId.toString( ) ) );
304 
305         // Following Image Parameters required to be set correctly
306         Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, variantDefaultDocId, "image.inputFormat", imageBean.getExtension( ) );
307 
308         if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
309         {
310             // File : image file write on disk
311             Dispatch.call( _dispatchA2iAObj, "SetProperty", variantDefaultDocId, "image.imageSourceType", OcrConstants.OCR_MODE_FILE );
312             Dispatch.call( _dispatchA2iAObj, "SetProperty", variantDefaultDocId, "image.imageSourceTypeInfo.CaseFile.fileName", imageBean.getImagePath( ) );
313 
314         }
315         else
316         {
317             Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, variantDefaultDocId, "image.imageSourceType", OcrConstants.OCR_MODE_MEMORY );
318             // Then Set the buffer to the corresponding A2iA imageBuffer
319             Dispatch.call( _dispatchA2iAObj, "ScrSetBuffer", variantDefaultDocId, "image.imageSourceTypeInfo.CaseMemory.buffer",
320                     trasformImagetoJacobOject( imageBean.getContent( ) ) ); // from memory
321         }
322 
323         // Open Request
324         Variant variantReqId = Dispatch.call( _dispatchA2iAObj, "ScrOpenRequest", lChannelId, new Long( variantDefaultDocId.toString( ) ) );
325 
326         return variantReqId;
327     }
328 
329     /**
330      * Write temporary image file on disk.
331      * 
332      * @param imageBean
333      *            imageBean Object
334      * @throws OcrException
335      */
336     private void writeImageFile( ImageBean imageBean ) throws OcrException
337     {
338 
339         AppLogService.info( "Write tempory image file Start" );
340 
341         String strFolderTmpImageFile = AppPropertiesService.getProperty( OcrConstants.PROPERTY_FOLDER_TMP_IMAGE_FILE );
342         String strFileName = "tmp_" + new Date( ).getTime( ) + "_" + RandomStringUtils.randomAlphabetic( 3 ) + "." + imageBean.getExtension( );
343 
344         Path pathFile = Paths.get( strFolderTmpImageFile + strFileName );
345 
346         try
347         {
348             Files.write( pathFile, imageBean.getContent( ) );
349             imageBean.setImagePath( pathFile.toString( ) );
350         }
351         catch( IOException e )
352         {
353             AppLogService.error( "Error write image file ! " + e.getMessage( ) );
354             throw new OcrException( "Error write image file !" );
355         }
356 
357         AppLogService.info( "Write tempory image file End" );
358     }
359 
360     /**
361      * Transform each byte of the image to Jacob Variant.
362      *
363      * @param byteImageContent
364      *            image to process
365      * @return image representation in variant.
366      * @throws OcrException
367      *             the OcrException
368      */
369     private Variant trasformImagetoJacobOject( byte [ ] byteImageContent ) throws OcrException
370     {
371         AppLogService.info( "trasformImagetoJacobOject Start - byteImageContent length : " + byteImageContent.length );
372         Variant variantImageObjects = new Variant( );
373 
374         SafeArray safearray = new SafeArray( Variant.VariantVariant, byteImageContent.length );
375         // nNumberOfByte : number of byte processed by a single thread.
376         int nNumberOfByte = AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_NUMBER_OF_BYTE_BY_THREAD, byteImageContent.length );
377         // nNumberOfThread : number of thread to perform the image transformation.
378         int nNumberOfThread = ( ( nNumberOfByte > 1 ) && ( nNumberOfByte < byteImageContent.length ) ) ? byteImageContent.length / nNumberOfByte : 1;
379 
380         Runnable [ ] tabRunnableTask = new Runnable [ nNumberOfThread];
381         ExecutorService executorService = Executors.newFixedThreadPool( nNumberOfThread );
382         int nCurrentThreadPosition = 0;
383         while ( nCurrentThreadPosition < nNumberOfThread )
384         {
385             int nStart = nCurrentThreadPosition * nNumberOfByte;
386             int nEnd = ( nCurrentThreadPosition + 1 ) < nNumberOfThread ? ( nCurrentThreadPosition + 1 ) * nNumberOfByte : byteImageContent.length;
387             tabRunnableTask [nCurrentThreadPosition] = createRunnableTask( byteImageContent, safearray, nStart, nEnd );
388             nCurrentThreadPosition++;
389         }
390 
391         // process image transformation
392         for ( int i = 0; i < nNumberOfThread; i++ )
393         {
394             executorService.submit( tabRunnableTask [i] );
395         }
396         executorService.shutdown( );
397 
398         boolean bExecutionComplet = false;
399         try
400         {
401             bExecutionComplet = executorService.awaitTermination( AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_MAX_TIME_TO_PROCESS_IMAGE, 5 ),
402                     TimeUnit.SECONDS );
403         }
404         catch( InterruptedException e )
405         {
406             AppLogService.error( e.getMessage( ), e );
407         }
408         finally
409         {
410             if ( bExecutionComplet )
411             {
412                 variantImageObjects.putSafeArray( safearray );
413                 AppLogService.info( "trasformImagetoJacobOject Complet" );
414             }
415             else
416             {
417                 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_TIMEOUT_TRANSFORM_IMAGE, Locale.getDefault( ) ) );
418             }
419         }
420 
421         return variantImageObjects;
422     }
423 
424     /**
425      * Create a single task to transform part of the image into variant. Each task is executed in a dedicated thread.
426      *
427      * @param byteImageContent
428      *            image to process
429      * @param safearray
430      *            jacob safe array
431      * @param nStart
432      *            first byte to process
433      * @param nEnd
434      *            last byte to process
435      * @return a runnable task.
436      */
437     private Runnable createRunnableTask( byte [ ] byteImageContent, SafeArray safearray, int nStart, int nEnd )
438     {
439         return ( ) -> {
440             for ( int i = nStart; i < nEnd; i++ )
441             {
442                 Variant variantByteImage = new Variant( );
443                 variantByteImage.putByte( byteImageContent [i] );
444                 safearray.setVariant( i, variantByteImage );
445             }
446         };
447     }
448 
449     /**
450      * Get the tbl document associate to document type.
451      *
452      * @param strDocumentType
453      *            document type
454      * @return path to tbl document
455      * @throws OcrException
456      *             the OcrException
457      */
458     private String getTblDocumentPath( String strDocumentType ) throws OcrException
459     {
460         String strTblDocumentPath = null;
461 
462         if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_RIB ) ) )
463         {
464             strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_RIB );
465         }
466         else
467             if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_TAX ) ) )
468             {
469                 strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_TAX );
470             }
471             else
472                 if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_IDENTITY ) ) )
473                 {
474                     strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_IDENTITY );
475                 }
476                 else
477                 {
478                     AppLogService.error( "Bad value for document type" );
479                     String [ ] messageArgs = {
480                         strDocumentType
481                     };
482                     throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_DOCUMENT_TYPE_ERROR, messageArgs, Locale.getDefault( ) ) );
483                 }
484 
485         return strTblDocumentPath;
486     }
487 
488     /**
489      * Set the value for _strA2iaImgExtension and _byteImageContent
490      *
491      * @param bytefileContent
492      *            file to read
493      * @param strFileExtension
494      *            image extension
495      * @throws OcrException
496      *             the OcrException
497      */
498     private ImageBean setValueImageExtensionAndContent( String strFileExtension, byte [ ] bytefileContent ) throws OcrException
499     {
500         ImageBean result = new ImageBean( );
501 
502         // control extension
503         Arrays.asList( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_EXTENSION_FILE_AUTHORIZED ).split( "," ) )
504                 .stream( )
505                 .forEach(
506                         extension -> {
507                             if ( extension.equalsIgnoreCase( strFileExtension ) && OcrConstants.EXTENSION_FILE_TIFF.equalsIgnoreCase( strFileExtension ) )
508                             {
509                                 result.setContent( bytefileContent );
510                                 result.setExtension( OcrConstants.EXTENSION_FILE_TIFF );
511                             }
512                             else
513                                 if ( extension.equalsIgnoreCase( strFileExtension )
514                                         && ( OcrConstants.EXTENSION_FILE_JPEG.equalsIgnoreCase( strFileExtension ) || OcrConstants.EXTENSION_FILE_JPG
515                                                 .equalsIgnoreCase( strFileExtension ) ) )
516                                 {
517                                     result.setContent( bytefileContent );
518                                     result.setExtension( OcrConstants.EXTENSION_FILE_JPEG );
519                                 }
520                                 else
521                                     if ( extension.equalsIgnoreCase( strFileExtension ) && OcrConstants.EXTENSION_FILE_BMP.equalsIgnoreCase( strFileExtension ) )
522                                     {
523                                         result.setContent( bytefileContent );
524                                         result.setExtension( OcrConstants.EXTENSION_FILE_BMP );
525                                     }
526                                     else
527                                         if ( extension.equalsIgnoreCase( strFileExtension )
528                                                 && OcrConstants.EXTENSION_FILE_PNG.equalsIgnoreCase( strFileExtension ) )
529                                         {
530                                             result.setContent( bytefileContent );
531                                             result.setExtension( OcrConstants.EXTENSION_FILE_PNG );
532                                         }
533                                         else
534                                             if ( extension.equalsIgnoreCase( strFileExtension )
535                                                     && OcrConstants.EXTENSION_FILE_PDF.equalsIgnoreCase( strFileExtension ) )
536                                             {
537                                                 String strImageFormat = OcrConstants.EXTENSION_FILE_PNG.equalsIgnoreCase( AppPropertiesService
538                                                         .getProperty( OcrConstants.PROPERTY_PDF_IMAGE_FORMAT ) ) ? OcrConstants.EXTENSION_FILE_PNG
539                                                         : OcrConstants.EXTENSION_FILE_JPEG;
540                                                 try
541                                                 {
542                                                     result.setContent( transformPdfToImage( bytefileContent, strImageFormat, false ) );
543                                                 }
544                                                 catch( OcrException | IOException e )
545                                                 {
546                                                     AppLogService.error( e.getMessage( ) );
547                                                 }
548 
549                                                 result.setExtension( strImageFormat );
550                                             }
551 
552                         }
553 
554                 );
555 
556         if ( result.getExtension( ) == null )
557         {
558             AppLogService.error( "Bad value for file extension." );
559             String [ ] messageArgs = {
560                 strFileExtension
561             };
562             throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_FILE_EXTENSION_TYPE_ERROR, messageArgs, Locale.getDefault( ) ) );
563         }
564         else
565         {
566             return result;
567         }
568     }
569 
570     /**
571      * Convert pdf to image.
572      *
573      * @param pdfByteContent
574      *            pdf byte content
575      * @param strImageFormat
576      *            image format
577      * @param bOptimalImage
578      *            true to generate high quality image
579      * @return image byte content
580      * @throws OcrException
581      *             the OcrException
582      * @throws IOException
583      *             the IOException
584      */
585     private byte [ ] transformPdfToImage( byte [ ] pdfByteContent, String strImageFormat, boolean bOptimalImage ) throws OcrException, IOException
586     {
587 
588         AppLogService.info( "transformPdfToImage begin" );
589 
590         // initialize options to generate high quality image
591         int ndpi = 300;
592         float fCompressionLevel = 1;
593         ImageType imageType = ImageType.RGB;
594 
595         if ( !bOptimalImage )
596         {
597             ndpi = AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_PDF_IMAGE_QUALITY, 150 );
598             String strImageType = AppPropertiesService.getProperty( OcrConstants.PROPERTY_PDF_IMAGE_TYPE, OcrConstants.IMAGE_TYPE_RGB );
599             imageType = OcrConstants.IMAGE_TYPE_BINARY.equalsIgnoreCase( strImageType ) ? ImageType.BINARY : ImageType.RGB;
600 
601             if ( OcrConstants.EXTENSION_FILE_JPEG.equalsIgnoreCase( strImageFormat ) )
602             {
603                 try
604                 {
605                     fCompressionLevel = Float.valueOf( AppPropertiesService.getProperty( OcrConstants.PROPERTY_PDF_IMAGE_COMPRESSION_LEVEL ) );
606                     fCompressionLevel = ( ( fCompressionLevel <= 0 ) || ( fCompressionLevel > 1 ) ) ? 1 : fCompressionLevel;
607                 }
608                 catch( NumberFormatException e )
609                 {
610                     AppLogService.error( "Bad value for properties ocra2ia.pdf.image.compression.level.", e );
611                 }
612             }
613         }
614 
615         final ByteArrayOutputStream byteArrayos = new ByteArrayOutputStream( );
616         byte [ ] byteImageByteContent = null;
617 
618         final PDDocument document = PDDocument.load( pdfByteContent );
619         if ( document.getNumberOfPages( ) > 1 )
620         {
621             throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_PDF_NUMBER_PAGES_ERROR, Locale.getDefault( ) ) );
622         }
623 
624         PDFRenderer pdfRenderer = new PDFRenderer( document );
625         BufferedImage bim = pdfRenderer.renderImageWithDPI( 0, ndpi, imageType );
626         ImageIOUtil.writeImage( bim, strImageFormat, byteArrayos, 72, fCompressionLevel );
627         byteImageByteContent = byteArrayos.toByteArray( );
628         document.close( );
629 
630         AppLogService.info( "transformPdfToImage end" );
631 
632         return byteImageByteContent;
633 
634     }
635 
636     /**
637      * Image bean class
638      */
639     private class ImageBean
640     {
641         String _strExtension;
642         byte [ ] _byteContent;
643         String _imagePath;
644 
645         public String getExtension( )
646         {
647             return _strExtension;
648         }
649 
650         public void setExtension( String _strExtension )
651         {
652             this._strExtension = _strExtension;
653         }
654 
655         public byte [ ] getContent( )
656         {
657             return _byteContent;
658         }
659 
660         public void setContent( byte [ ] _byteContent )
661         {
662             this._byteContent = _byteContent;
663         }
664 
665         public String getImagePath( )
666         {
667             return _imagePath;
668         }
669 
670         public void setImagePath( String imagePath )
671         {
672             _imagePath = imagePath;
673         }
674 
675     }
676 
677 }