View Javadoc
1   /*
2    * Copyright (c) 2002-2020, Mairie de Paris
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    *
9    *  1. Redistributions of source code must retain the above copyright notice
10   *     and the following disclaimer.
11   *
12   *  2. Redistributions in binary form must reproduce the above copyright notice
13   *     and the following disclaimer in the documentation and/or other materials
14   *     provided with the distribution.
15   *
16   *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
17   *     contributors may be used to endorse or promote products derived from
18   *     this software without specific prior written permission.
19   *
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   * POSSIBILITY OF SUCH DAMAGE.
31   *
32   * License 1.0
33   */
34  package fr.paris.lutece.nlptools;
35  
36  import java.io.File;
37  import java.io.IOException;
38  import java.util.ArrayList;
39  import java.util.List;
40  
41  /**
42   * NLPAnonymizer
43   */
44  public class NLPAnonymizer
45  {
46      private static final String PREFIX_OUTPUT_FILE = "anonymized-";
47      private static final String PREFIX_LOG_FILE = "log-";
48  
49      public static void main( String [ ] args ) throws IOException
50      {
51          if ( args.length < 1 )
52          {
53              System.out.println( "Please give a file path as argument " );
54              System.exit( 0 );
55          }
56  
57          List<Finder> listFinders = new ArrayList<>( );
58          EmailFindertml#EmailFinder">EmailFinder emailFinder = new EmailFinder( "#Email#" );
59          listFinders.add( emailFinder );
60  
61          PhoneNumberFindertml#PhoneNumberFinder">PhoneNumberFinder phoneFinder = new PhoneNumberFinder( "#PhoneNumber#" );
62          listFinders.add( phoneFinder );
63  
64          PersonNameFinderhtml#PersonNameFinder">PersonNameFinder nameFinder = new PersonNameFinder( "#PersonName#", "en" );
65  //        listFinders.add( nameFinder );
66  
67          String strInputFile = args [0];
68          String strInput = FileUtils.readFileContent( strInputFile );
69  
70          StringBuilder sbLogs = new StringBuilder( );
71          for ( Finder finder : listFinders )
72          {
73              try
74              {
75                  System.out.println( "Start running " + finder.getClass( ).getName( ) + " ...");
76                  finder.findOccurrences( strInput );
77                  List<String> listEntities = finder.getFoundEntities( );
78                  log( sbLogs, "- " + listEntities.size( ) + " entities found by " + finder.getClass( ).getName( ) );
79  
80                  for ( String strEntity : listEntities )
81                  {
82                      log( sbLogs, "'" + strEntity + "'" );
83                  }
84              }
85              catch( FinderException ex )
86              {
87                  log( sbLogs, ex.getMessage( ) );
88              }
89          }
90  
91          String strOutput = strInput;
92          for ( Finder finder : listFinders )
93          {
94              try
95              {
96                  strOutput = finder.replaceOccurrences( strOutput );
97              }
98              catch( FinderException ex )
99              {
100                 log( sbLogs, ex.getMessage( ) );
101             }
102         }
103 
104         String strOutputFile = getOutputFile( strInputFile, PREFIX_OUTPUT_FILE );
105         FileUtils.writeFile( strOutputFile, strOutput );
106         String strLogFile = getOutputFile( strInputFile, PREFIX_LOG_FILE );
107         FileUtils.writeFile( strLogFile, sbLogs.toString( ) );
108 
109         System.exit( 0 );
110     }
111 
112     private static String getOutputFile( String strFilePath, String strPrefix )
113     {
114         File file = new File( strFilePath );
115         String strPath = file.getAbsolutePath( ).substring( 0, strFilePath.lastIndexOf( file.getName( ) ) );
116         return strPath + strPrefix + file.getName( );
117 
118     }
119 
120     private static void log( StringBuilder sbLogs, String strLog )
121     {
122         sbLogs.append( strLog ).append( '\n' );
123         System.out.println( strLog );
124     }
125 }