NLPAnonymizer.java

/*
 * Copyright (c) 2002-2020, Mairie de Paris
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice
 *     and the following disclaimer.
 *
 *  2. Redistributions in binary form must reproduce the above copyright notice
 *     and the following disclaimer in the documentation and/or other materials
 *     provided with the distribution.
 *
 *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
 *     contributors may be used to endorse or promote products derived from
 *     this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * License 1.0
 */
package fr.paris.lutece.nlptools;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * NLPAnonymizer
 */
public class NLPAnonymizer
{
    private static final String PREFIX_OUTPUT_FILE = "anonymized-";
    private static final String PREFIX_LOG_FILE = "log-";

    public static void main( String [ ] args ) throws IOException
    {
        if ( args.length < 1 )
        {
            System.out.println( "Please give a file path as argument " );
            System.exit( 0 );
        }

        List<Finder> listFinders = new ArrayList<>( );
        EmailFinder emailFinder = new EmailFinder( "#Email#" );
        listFinders.add( emailFinder );

        PhoneNumberFinder phoneFinder = new PhoneNumberFinder( "#PhoneNumber#" );
        listFinders.add( phoneFinder );

        PersonNameFinder nameFinder = new PersonNameFinder( "#PersonName#", "en" );
//        listFinders.add( nameFinder );

        String strInputFile = args [0];
        String strInput = FileUtils.readFileContent( strInputFile );

        StringBuilder sbLogs = new StringBuilder( );
        for ( Finder finder : listFinders )
        {
            try
            {
                System.out.println( "Start running " + finder.getClass( ).getName( ) + " ...");
                finder.findOccurrences( strInput );
                List<String> listEntities = finder.getFoundEntities( );
                log( sbLogs, "- " + listEntities.size( ) + " entities found by " + finder.getClass( ).getName( ) );

                for ( String strEntity : listEntities )
                {
                    log( sbLogs, "'" + strEntity + "'" );
                }
            }
            catch( FinderException ex )
            {
                log( sbLogs, ex.getMessage( ) );
            }
        }

        String strOutput = strInput;
        for ( Finder finder : listFinders )
        {
            try
            {
                strOutput = finder.replaceOccurrences( strOutput );
            }
            catch( FinderException ex )
            {
                log( sbLogs, ex.getMessage( ) );
            }
        }

        String strOutputFile = getOutputFile( strInputFile, PREFIX_OUTPUT_FILE );
        FileUtils.writeFile( strOutputFile, strOutput );
        String strLogFile = getOutputFile( strInputFile, PREFIX_LOG_FILE );
        FileUtils.writeFile( strLogFile, sbLogs.toString( ) );

        System.exit( 0 );
    }

    private static String getOutputFile( String strFilePath, String strPrefix )
    {
        File file = new File( strFilePath );
        String strPath = file.getAbsolutePath( ).substring( 0, strFilePath.lastIndexOf( file.getName( ) ) );
        return strPath + strPrefix + file.getName( );

    }

    private static void log( StringBuilder sbLogs, String strLog )
    {
        sbLogs.append( strLog ).append( '\n' );
        System.out.println( strLog );
    }
}