1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.plugins.ocra2ia.service;
35
36 import java.awt.image.BufferedImage;
37 import java.io.ByteArrayOutputStream;
38 import java.io.IOException;
39 import java.nio.file.Files;
40 import java.nio.file.Path;
41 import java.nio.file.Paths;
42 import java.util.Arrays;
43 import java.util.Date;
44 import java.util.HashMap;
45 import java.util.Locale;
46 import java.util.Map;
47 import java.util.concurrent.ExecutorService;
48 import java.util.concurrent.Executors;
49 import java.util.concurrent.TimeUnit;
50
51 import javax.annotation.PostConstruct;
52
53 import org.apache.commons.lang.StringUtils;
54 import org.apache.commons.lang3.ArrayUtils;
55 import org.apache.commons.lang3.RandomStringUtils;
56 import org.apache.pdfbox.pdmodel.PDDocument;
57 import org.apache.pdfbox.rendering.ImageType;
58 import org.apache.pdfbox.rendering.PDFRenderer;
59 import org.apache.pdfbox.tools.imageio.ImageIOUtil;
60
61 import com.jacob.activeX.ActiveXComponent;
62 import com.jacob.com.Dispatch;
63 import com.jacob.com.SafeArray;
64 import com.jacob.com.Variant;
65
66 import fr.paris.lutece.plugins.ocra2ia.exception.OcrException;
67 import fr.paris.lutece.plugins.ocra2ia.util.OcrConstants;
68 import fr.paris.lutece.plugins.ocra2ia.util.OcrResultUtils;
69 import fr.paris.lutece.portal.service.i18n.I18nService;
70 import fr.paris.lutece.portal.service.util.AppLogService;
71 import fr.paris.lutece.portal.service.util.AppPropertiesService;
72
73
74
75
76
77
78 public class OcrService
79 {
80
81
82
83
84 private String _strClsid;
85
86
87
88
89 @PostConstruct
90 public void init( )
91 {
92 try
93 {
94 String folder = AppPropertiesService.getProperty( OcrConstants.PROPERTY_FOLDER_DLL_JACOB );
95
96 System.load( folder + OcrConstants.JACOB_DLL64_FILE );
97
98
99 _strClsid = "clsid:{" + AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_CLSID ) + "}";
100
101 }
102 catch( UnsatisfiedLinkError e )
103 {
104 AppLogService.error( "Native code Jacob library failed to load.\n" + e );
105 }
106
107 AppLogService.info( "init OCR service done." );
108 }
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 public synchronized Map<String, String> proceed( byte [ ] bytefileContent, String strFileExtension, String strDocumentType ) throws OcrException
125 {
126
127
128
129
130 ActiveXComponent comp = new ActiveXComponent( _strClsid );
131 Dispatch _dispatchA2iAObj = comp.getObject( );
132
133 if ( StringUtils.isEmpty( _strClsid ) )
134 {
135 AppLogService.error( "Bad initialisation of OCR Service." );
136 throw new OcrException( OcrConstants.MESSAGE_INIT_ERROR );
137 }
138
139 if ( ArrayUtils.isEmpty( bytefileContent ) || StringUtils.isEmpty( strFileExtension ) || StringUtils.isEmpty( strDocumentType ) )
140 {
141 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_PARAMETER_MANDATORY, Locale.getDefault( ) ) );
142
143 }
144
145 ImageBean imageBean = setValueImageExtensionAndContent( strFileExtension, bytefileContent );
146 String strModeOcr = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_MODE_OCR, OcrConstants.OCR_MODE_MEMORY );
147 if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
148 {
149 writeImageFile( imageBean );
150 }
151
152 Map<String, String> mapOcrServiceResults = performOcr( _dispatchA2iAObj, imageBean, strDocumentType, strModeOcr );
153
154 mapOcrServiceResults.values( ).removeIf( StringUtils::isBlank );
155 boolean bRetry = AppPropertiesService.getPropertyBoolean( OcrConstants.PROPERTY_PDF_IMAGE_RETRY, false ) && mapOcrServiceResults.isEmpty( )
156 && OcrConstants.EXTENSION_FILE_PDF.equalsIgnoreCase( strFileExtension );
157 if ( bRetry )
158 {
159 AppLogService.info( "the retry mechanism will be launched" );
160 try
161 {
162 ImageBean retryImageBean = new ImageBean( );
163 retryImageBean.setContent( transformPdfToImage( bytefileContent, OcrConstants.EXTENSION_FILE_JPEG, true ) );
164 retryImageBean.setExtension( OcrConstants.EXTENSION_FILE_JPEG );
165 if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
166 {
167 writeImageFile( retryImageBean );
168 }
169 mapOcrServiceResults = performOcr( _dispatchA2iAObj, retryImageBean, strDocumentType, strModeOcr );
170 }
171 catch( IOException e )
172 {
173 AppLogService.error( e.getMessage( ) );
174 }
175
176 }
177
178 return mapOcrServiceResults;
179 }
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196 private Map<String, String> performOcr( Dispatch dispatchA2iAObj, ImageBean imageBean, String strDocumentType, String strModeOcr ) throws OcrException
197 {
198
199 Variant variantChannelId = null;
200 Variant variantRequestId = null;
201
202 Map<String, String> mapOcrServiceResults = new HashMap<>( );
203
204 try
205 {
206 AppLogService.info( "openChannelA2ia begin" );
207 variantChannelId = openChannelA2ia( dispatchA2iAObj );
208 AppLogService.info( "openChannelA2ia end" );
209 variantRequestId = openRequestA2ia( imageBean, strDocumentType, new Long( variantChannelId.toString( ) ), dispatchA2iAObj, strModeOcr );
210 AppLogService.info( "openRequestA2ia end" );
211
212 AppLogService.info( "Call a2ia engine begin" );
213 Variant variantResultId = Dispatch.call( dispatchA2iAObj, "ScrGetResult", variantChannelId, variantRequestId, 60000L );
214 mapOcrServiceResults = OcrResultUtils.getOcrResults( strDocumentType, dispatchA2iAObj, variantResultId );
215 AppLogService.info( "Call a2ia engine end" );
216
217 }
218 catch( Exception e )
219 {
220 AppLogService.error( e.getMessage( ), e );
221 throw new OcrException( e.getMessage( ) );
222 }
223 finally
224 {
225 if ( variantRequestId != null )
226 {
227 Dispatch.call( dispatchA2iAObj, "ScrCloseRequest", new Long( variantRequestId.toString( ) ) );
228 }
229 if ( variantChannelId != null )
230 {
231 Dispatch.call( dispatchA2iAObj, "ScrCloseChannel", new Long( variantChannelId.toString( ) ) );
232 }
233
234 if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
235 {
236
237 try
238 {
239 Files.delete( Paths.get( imageBean.getImagePath( ) ) );
240 AppLogService.info( "Delete file : " + imageBean.getImagePath( ) + " done." );
241 }
242 catch( IOException e )
243 {
244 throw new OcrException( e.getMessage( ) );
245 }
246 }
247
248 }
249
250 return mapOcrServiceResults;
251
252 }
253
254
255
256
257
258
259
260
261 private Variant openChannelA2ia( Dispatch _dispatchA2iAObj )
262 {
263
264 Dispatch.call( _dispatchA2iAObj, "ScrInit", "" );
265
266
267 Variant variantResChannelParamId = Dispatch.call( _dispatchA2iAObj, "ScrCreateChannelParam" );
268 Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].cpuServer",
269 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_SERVER_HOST, "" ) );
270 Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].portServer",
271 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_SERVER_PORT, "" ) );
272 Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, new Long( variantResChannelParamId.toString( ) ), "cpu[1].paramdir",
273 AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_PARAM_DIR ) );
274
275
276 Variant variantResChannelId = Dispatch.call( _dispatchA2iAObj, "ScrOpenChannelExt", new Long( variantResChannelParamId.toString( ) ), 10000L );
277
278 return variantResChannelId;
279 }
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297 private Variant openRequestA2ia( ImageBean imageBean, String strDocumentType, Long lChannelId, Dispatch _dispatchA2iAObj, String strModeOcr )
298 throws OcrException
299 {
300
301
302 Variant variantTblId = Dispatch.call( _dispatchA2iAObj, "ScrOpenDocumentTable", getTblDocumentPath( strDocumentType ) );
303 Variant variantDefaultDocId = Dispatch.call( _dispatchA2iAObj, "ScrGetDefaultDocument", new Long( variantTblId.toString( ) ) );
304
305
306 Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, variantDefaultDocId, "image.inputFormat", imageBean.getExtension( ) );
307
308 if ( OcrConstants.OCR_MODE_FILE.equalsIgnoreCase( strModeOcr ) )
309 {
310
311 Dispatch.call( _dispatchA2iAObj, "SetProperty", variantDefaultDocId, "image.imageSourceType", OcrConstants.OCR_MODE_FILE );
312 Dispatch.call( _dispatchA2iAObj, "SetProperty", variantDefaultDocId, "image.imageSourceTypeInfo.CaseFile.fileName", imageBean.getImagePath( ) );
313
314 }
315 else
316 {
317 Dispatch.call( _dispatchA2iAObj, OcrConstants.SET_PROPERTY_A2IA, variantDefaultDocId, "image.imageSourceType", OcrConstants.OCR_MODE_MEMORY );
318
319 Dispatch.call( _dispatchA2iAObj, "ScrSetBuffer", variantDefaultDocId, "image.imageSourceTypeInfo.CaseMemory.buffer",
320 trasformImagetoJacobOject( imageBean.getContent( ) ) );
321 }
322
323
324 Variant variantReqId = Dispatch.call( _dispatchA2iAObj, "ScrOpenRequest", lChannelId, new Long( variantDefaultDocId.toString( ) ) );
325
326 return variantReqId;
327 }
328
329
330
331
332
333
334
335
336 private void writeImageFile( ImageBean imageBean ) throws OcrException
337 {
338
339 AppLogService.info( "Write tempory image file Start" );
340
341 String strFolderTmpImageFile = AppPropertiesService.getProperty( OcrConstants.PROPERTY_FOLDER_TMP_IMAGE_FILE );
342 String strFileName = "tmp_" + new Date( ).getTime( ) + "_" + RandomStringUtils.randomAlphabetic( 3 ) + "." + imageBean.getExtension( );
343
344 Path pathFile = Paths.get( strFolderTmpImageFile + strFileName );
345
346 try
347 {
348 Files.write( pathFile, imageBean.getContent( ) );
349 imageBean.setImagePath( pathFile.toString( ) );
350 }
351 catch( IOException e )
352 {
353 AppLogService.error( "Error write image file ! " + e.getMessage( ) );
354 throw new OcrException( "Error write image file !" );
355 }
356
357 AppLogService.info( "Write tempory image file End" );
358 }
359
360
361
362
363
364
365
366
367
368
369 private Variant trasformImagetoJacobOject( byte [ ] byteImageContent ) throws OcrException
370 {
371 AppLogService.info( "trasformImagetoJacobOject Start - byteImageContent length : " + byteImageContent.length );
372 Variant variantImageObjects = new Variant( );
373
374 SafeArray safearray = new SafeArray( Variant.VariantVariant, byteImageContent.length );
375
376 int nNumberOfByte = AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_NUMBER_OF_BYTE_BY_THREAD, byteImageContent.length );
377
378 int nNumberOfThread = ( ( nNumberOfByte > 1 ) && ( nNumberOfByte < byteImageContent.length ) ) ? byteImageContent.length / nNumberOfByte : 1;
379
380 Runnable [ ] tabRunnableTask = new Runnable [ nNumberOfThread];
381 ExecutorService executorService = Executors.newFixedThreadPool( nNumberOfThread );
382 int nCurrentThreadPosition = 0;
383 while ( nCurrentThreadPosition < nNumberOfThread )
384 {
385 int nStart = nCurrentThreadPosition * nNumberOfByte;
386 int nEnd = ( nCurrentThreadPosition + 1 ) < nNumberOfThread ? ( nCurrentThreadPosition + 1 ) * nNumberOfByte : byteImageContent.length;
387 tabRunnableTask [nCurrentThreadPosition] = createRunnableTask( byteImageContent, safearray, nStart, nEnd );
388 nCurrentThreadPosition++;
389 }
390
391
392 for ( int i = 0; i < nNumberOfThread; i++ )
393 {
394 executorService.submit( tabRunnableTask [i] );
395 }
396 executorService.shutdown( );
397
398 boolean bExecutionComplet = false;
399 try
400 {
401 bExecutionComplet = executorService.awaitTermination( AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_MAX_TIME_TO_PROCESS_IMAGE, 5 ),
402 TimeUnit.SECONDS );
403 }
404 catch( InterruptedException e )
405 {
406 AppLogService.error( e.getMessage( ), e );
407 }
408 finally
409 {
410 if ( bExecutionComplet )
411 {
412 variantImageObjects.putSafeArray( safearray );
413 AppLogService.info( "trasformImagetoJacobOject Complet" );
414 }
415 else
416 {
417 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_TIMEOUT_TRANSFORM_IMAGE, Locale.getDefault( ) ) );
418 }
419 }
420
421 return variantImageObjects;
422 }
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437 private Runnable createRunnableTask( byte [ ] byteImageContent, SafeArray safearray, int nStart, int nEnd )
438 {
439 return ( ) -> {
440 for ( int i = nStart; i < nEnd; i++ )
441 {
442 Variant variantByteImage = new Variant( );
443 variantByteImage.putByte( byteImageContent [i] );
444 safearray.setVariant( i, variantByteImage );
445 }
446 };
447 }
448
449
450
451
452
453
454
455
456
457
458 private String getTblDocumentPath( String strDocumentType ) throws OcrException
459 {
460 String strTblDocumentPath = null;
461
462 if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_RIB ) ) )
463 {
464 strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_RIB );
465 }
466 else
467 if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_TAX ) ) )
468 {
469 strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_TAX );
470 }
471 else
472 if ( strDocumentType.equalsIgnoreCase( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_DOCUMENT_IDENTITY ) ) )
473 {
474 strTblDocumentPath = AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_TBL_IDENTITY );
475 }
476 else
477 {
478 AppLogService.error( "Bad value for document type" );
479 String [ ] messageArgs = {
480 strDocumentType
481 };
482 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_DOCUMENT_TYPE_ERROR, messageArgs, Locale.getDefault( ) ) );
483 }
484
485 return strTblDocumentPath;
486 }
487
488
489
490
491
492
493
494
495
496
497
498 private ImageBean setValueImageExtensionAndContent( String strFileExtension, byte [ ] bytefileContent ) throws OcrException
499 {
500 ImageBean result = new ImageBean( );
501
502
503 Arrays.asList( AppPropertiesService.getProperty( OcrConstants.PROPERTY_A2IA_EXTENSION_FILE_AUTHORIZED ).split( "," ) )
504 .stream( )
505 .forEach(
506 extension -> {
507 if ( extension.equalsIgnoreCase( strFileExtension ) && OcrConstants.EXTENSION_FILE_TIFF.equalsIgnoreCase( strFileExtension ) )
508 {
509 result.setContent( bytefileContent );
510 result.setExtension( OcrConstants.EXTENSION_FILE_TIFF );
511 }
512 else
513 if ( extension.equalsIgnoreCase( strFileExtension )
514 && ( OcrConstants.EXTENSION_FILE_JPEG.equalsIgnoreCase( strFileExtension ) || OcrConstants.EXTENSION_FILE_JPG
515 .equalsIgnoreCase( strFileExtension ) ) )
516 {
517 result.setContent( bytefileContent );
518 result.setExtension( OcrConstants.EXTENSION_FILE_JPEG );
519 }
520 else
521 if ( extension.equalsIgnoreCase( strFileExtension ) && OcrConstants.EXTENSION_FILE_BMP.equalsIgnoreCase( strFileExtension ) )
522 {
523 result.setContent( bytefileContent );
524 result.setExtension( OcrConstants.EXTENSION_FILE_BMP );
525 }
526 else
527 if ( extension.equalsIgnoreCase( strFileExtension )
528 && OcrConstants.EXTENSION_FILE_PNG.equalsIgnoreCase( strFileExtension ) )
529 {
530 result.setContent( bytefileContent );
531 result.setExtension( OcrConstants.EXTENSION_FILE_PNG );
532 }
533 else
534 if ( extension.equalsIgnoreCase( strFileExtension )
535 && OcrConstants.EXTENSION_FILE_PDF.equalsIgnoreCase( strFileExtension ) )
536 {
537 String strImageFormat = OcrConstants.EXTENSION_FILE_PNG.equalsIgnoreCase( AppPropertiesService
538 .getProperty( OcrConstants.PROPERTY_PDF_IMAGE_FORMAT ) ) ? OcrConstants.EXTENSION_FILE_PNG
539 : OcrConstants.EXTENSION_FILE_JPEG;
540 try
541 {
542 result.setContent( transformPdfToImage( bytefileContent, strImageFormat, false ) );
543 }
544 catch( OcrException | IOException e )
545 {
546 AppLogService.error( e.getMessage( ) );
547 }
548
549 result.setExtension( strImageFormat );
550 }
551
552 }
553
554 );
555
556 if ( result.getExtension( ) == null )
557 {
558 AppLogService.error( "Bad value for file extension." );
559 String [ ] messageArgs = {
560 strFileExtension
561 };
562 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_FILE_EXTENSION_TYPE_ERROR, messageArgs, Locale.getDefault( ) ) );
563 }
564 else
565 {
566 return result;
567 }
568 }
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 private byte [ ] transformPdfToImage( byte [ ] pdfByteContent, String strImageFormat, boolean bOptimalImage ) throws OcrException, IOException
586 {
587
588 AppLogService.info( "transformPdfToImage begin" );
589
590
591 int ndpi = 300;
592 float fCompressionLevel = 1;
593 ImageType imageType = ImageType.RGB;
594
595 if ( !bOptimalImage )
596 {
597 ndpi = AppPropertiesService.getPropertyInt( OcrConstants.PROPERTY_PDF_IMAGE_QUALITY, 150 );
598 String strImageType = AppPropertiesService.getProperty( OcrConstants.PROPERTY_PDF_IMAGE_TYPE, OcrConstants.IMAGE_TYPE_RGB );
599 imageType = OcrConstants.IMAGE_TYPE_BINARY.equalsIgnoreCase( strImageType ) ? ImageType.BINARY : ImageType.RGB;
600
601 if ( OcrConstants.EXTENSION_FILE_JPEG.equalsIgnoreCase( strImageFormat ) )
602 {
603 try
604 {
605 fCompressionLevel = Float.valueOf( AppPropertiesService.getProperty( OcrConstants.PROPERTY_PDF_IMAGE_COMPRESSION_LEVEL ) );
606 fCompressionLevel = ( ( fCompressionLevel <= 0 ) || ( fCompressionLevel > 1 ) ) ? 1 : fCompressionLevel;
607 }
608 catch( NumberFormatException e )
609 {
610 AppLogService.error( "Bad value for properties ocra2ia.pdf.image.compression.level.", e );
611 }
612 }
613 }
614
615 final ByteArrayOutputStream byteArrayos = new ByteArrayOutputStream( );
616 byte [ ] byteImageByteContent = null;
617
618 final PDDocument document = PDDocument.load( pdfByteContent );
619 if ( document.getNumberOfPages( ) > 1 )
620 {
621 throw new OcrException( I18nService.getLocalizedString( OcrConstants.MESSAGE_PDF_NUMBER_PAGES_ERROR, Locale.getDefault( ) ) );
622 }
623
624 PDFRenderer pdfRenderer = new PDFRenderer( document );
625 BufferedImage bim = pdfRenderer.renderImageWithDPI( 0, ndpi, imageType );
626 ImageIOUtil.writeImage( bim, strImageFormat, byteArrayos, 72, fCompressionLevel );
627 byteImageByteContent = byteArrayos.toByteArray( );
628 document.close( );
629
630 AppLogService.info( "transformPdfToImage end" );
631
632 return byteImageByteContent;
633
634 }
635
636
637
638
639 private class ImageBean
640 {
641 String _strExtension;
642 byte [ ] _byteContent;
643 String _imagePath;
644
645 public String getExtension( )
646 {
647 return _strExtension;
648 }
649
650 public void setExtension( String _strExtension )
651 {
652 this._strExtension = _strExtension;
653 }
654
655 public byte [ ] getContent( )
656 {
657 return _byteContent;
658 }
659
660 public void setContent( byte [ ] _byteContent )
661 {
662 this._byteContent = _byteContent;
663 }
664
665 public String getImagePath( )
666 {
667 return _imagePath;
668 }
669
670 public void setImagePath( String imagePath )
671 {
672 _imagePath = imagePath;
673 }
674
675 }
676
677 }