1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package fr.paris.lutece.nlptools;
35
36 import java.io.File;
37 import java.io.IOException;
38 import java.util.ArrayList;
39 import java.util.List;
40
41
42
43
44 public class NLPAnonymizer
45 {
46 private static final String PREFIX_OUTPUT_FILE = "anonymized-";
47 private static final String PREFIX_LOG_FILE = "log-";
48
49 public static void main( String [ ] args ) throws IOException
50 {
51 if ( args.length < 1 )
52 {
53 System.out.println( "Please give a file path as argument " );
54 System.exit( 0 );
55 }
56
57 List<Finder> listFinders = new ArrayList<>( );
58 EmailFindertml#EmailFinder">EmailFinder emailFinder = new EmailFinder( "#Email#" );
59 listFinders.add( emailFinder );
60
61 PhoneNumberFindertml#PhoneNumberFinder">PhoneNumberFinder phoneFinder = new PhoneNumberFinder( "#PhoneNumber#" );
62 listFinders.add( phoneFinder );
63
64 PersonNameFinderhtml#PersonNameFinder">PersonNameFinder nameFinder = new PersonNameFinder( "#PersonName#", "en" );
65
66
67 String strInputFile = args [0];
68 String strInput = FileUtils.readFileContent( strInputFile );
69
70 StringBuilder sbLogs = new StringBuilder( );
71 for ( Finder finder : listFinders )
72 {
73 try
74 {
75 System.out.println( "Start running " + finder.getClass( ).getName( ) + " ...");
76 finder.findOccurrences( strInput );
77 List<String> listEntities = finder.getFoundEntities( );
78 log( sbLogs, "- " + listEntities.size( ) + " entities found by " + finder.getClass( ).getName( ) );
79
80 for ( String strEntity : listEntities )
81 {
82 log( sbLogs, "'" + strEntity + "'" );
83 }
84 }
85 catch( FinderException ex )
86 {
87 log( sbLogs, ex.getMessage( ) );
88 }
89 }
90
91 String strOutput = strInput;
92 for ( Finder finder : listFinders )
93 {
94 try
95 {
96 strOutput = finder.replaceOccurrences( strOutput );
97 }
98 catch( FinderException ex )
99 {
100 log( sbLogs, ex.getMessage( ) );
101 }
102 }
103
104 String strOutputFile = getOutputFile( strInputFile, PREFIX_OUTPUT_FILE );
105 FileUtils.writeFile( strOutputFile, strOutput );
106 String strLogFile = getOutputFile( strInputFile, PREFIX_LOG_FILE );
107 FileUtils.writeFile( strLogFile, sbLogs.toString( ) );
108
109 System.exit( 0 );
110 }
111
112 private static String getOutputFile( String strFilePath, String strPrefix )
113 {
114 File file = new File( strFilePath );
115 String strPath = file.getAbsolutePath( ).substring( 0, strFilePath.lastIndexOf( file.getName( ) ) );
116 return strPath + strPrefix + file.getName( );
117
118 }
119
120 private static void log( StringBuilder sbLogs, String strLog )
121 {
122 sbLogs.append( strLog ).append( '\n' );
123 System.out.println( strLog );
124 }
125 }