LuteceHtmlParser.java

/*
 * Copyright (c) 2002-2023, City of Paris
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice
 *     and the following disclaimer.
 *
 *  2. Redistributions in binary form must reproduce the above copyright notice
 *     and the following disclaimer in the documentation and/or other materials
 *     provided with the distribution.
 *
 *  3. Neither the name of 'Mairie de Paris' nor 'Lutece' nor the names of its
 *     contributors may be used to endorse or promote products derived from
 *     this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * License 1.0
 */
package fr.paris.lutece.plugins.wiki.service.parser;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.util.List;

public class LuteceHtmlParser
{
    private final static String STYLE = "style";
    private final static String WIKI_ALIGN_CONTENT_VAL = "wiki-align-content-val-";
    private final static String SUB_LINK_CONTAINER = "subLinkContainer";
    public static String parseHtml( String htmlFromEditor, String wikiPageUrl, String pageTitle )
    {
        htmlFromEditor = SpecialChar.renderWiki( htmlFromEditor );
        Document parser = Jsoup.parse( htmlFromEditor );
        Element doc = parser.body( );
        List<Element> headers = doc.select( "h1, h2, h3, h4, h5, h6" );
        for ( Element header : headers )
        {
            String headerText = header.text( );
            headerText = headerText.replaceAll( " ", "_" );
           // replace all non-alphanumeric characters with nothing
            headerText = headerText.replaceAll( "[^A-Za-z0-9_]", "" );
            headerText = headerText.toLowerCase( );
            header.attr( "id", headerText );
        }
        List<Element> preElements = doc.select( "pre" );
        for ( Element preElement : preElements )
        {
            preElement.attr( STYLE, "background-color: #2f3241" );
        }
        doc.select( ".toastui-editor-md-preview-highlight" ).forEach( element -> element.removeClass( "toastui-editor-md-preview-highlight" ) );
        if ( doc.select( ".ProseMirror" ).text( ).contains( WIKI_ALIGN_CONTENT_VAL ) )
        {
            String alignmentValue = doc.select( ".ProseMirror" ).text( ).split( WIKI_ALIGN_CONTENT_VAL ) [1].substring( 0, 1 );
            doc.select( ".toastui-editor-contents" ).addClass( WIKI_ALIGN_CONTENT_VAL + alignmentValue );
        }
        Element toc = doc.select( ".toc" ).first( );
        if ( toc != null )
        {
            Element tableOfContent = createTableOfContent( doc, wikiPageUrl, pageTitle );
            doc.select( ".toc" ).remove( );

            Element flexDiv = new Element( "div" );
            flexDiv.addClass( "wiki-nav-content-wrapper" );
            flexDiv.appendChild( tableOfContent );
            Element contentDiv = new Element( "div" );
            contentDiv.append( parser.body( ).outerHtml( ) );
            flexDiv.appendChild( contentDiv );
            doc = flexDiv;

            return SpecialChar.reverseRender( doc.outerHtml( ) );
        }
        else
        {
            Element contentDiv = new Element( "div" );
            contentDiv.append( parser.body( ).outerHtml( ) );
            return SpecialChar.reverseRender( contentDiv.outerHtml( ) );
        }
    }

    public static Element createTableOfContent( Element doc, String wikiPageUrl, String pageTitle )
    {
        Element tableOfContent = new Element( "ul" );
        tableOfContent.addClass( "nav" );
        tableOfContent.addClass( "flex-column" );
        tableOfContent.addClass( "wiki-topic-nav" );
        Element titleElement = new Element( "a" );
        titleElement.addClass( "nav-link" );
        titleElement.attr( "href", wikiPageUrl );
        titleElement.attr( STYLE, "font-weight: bold; font-size: 1.5rem;" );
        titleElement.text( pageTitle );
        tableOfContent.appendChild( titleElement );
        List<Element> headers = doc.select( "h1, h2, h3" );
        for ( int i = 0; i < headers.size( ); i++ )
        {
            Element header = headers.get( i );
            String headerText = header.text( );
            String headerLevel = header.tagName( );
            Element linkElement = new Element( "a" );
            linkElement.attr( "href", wikiPageUrl + "#" + header.id( ) );
            linkElement.addClass( "nav-link" );
            linkElement.text( headerText );
            Element navItem = new Element( "li" );
            if ( headerLevel.equals( "h1" ) )
            {
                navItem.addClass( "nav-item" );
                linkElement.attr( STYLE, "font-weight: bold;" );
                if ( i + 1 < headers.size( ) )
                {
                    if ( headers.get( i + 1 ).tagName( ).equals( "h1" ) )
                    {
                        tableOfContent.appendChild( linkElement );
                    }
                    else
                        if ( headers.get( i + 1 ).tagName( ).equals( "h2" ) || headers.get( i + 1 ).tagName( ).equals( "h3" ) )
                        {
                            Element divContainer = new Element( "div" );
                            divContainer.attr( STYLE, "display: flex; flex-direction: row; spacing: 5px;" );
                            divContainer.appendChild( linkElement );
                            navItem.appendChild( divContainer );
                            tableOfContent.appendChild( navItem );
                            Element subLinkContainer = new Element( "ul" );
                            subLinkContainer.addClass( SUB_LINK_CONTAINER );
                            tableOfContent.appendChild( subLinkContainer );
                        }
                        else
                        {
                            tableOfContent.appendChild( linkElement );
                        }
                }
            }
            if ( headerLevel.equals( "h2" ) || headerLevel.equals( "h3" ) )
            {
                List<Element> subLinkContainers = tableOfContent.getElementsByClass( SUB_LINK_CONTAINER );
                if ( subLinkContainers.isEmpty( ) )
                {
                    Element subLinkContainer = new Element( "ul" );
                    subLinkContainer.addClass( SUB_LINK_CONTAINER );
                    subLinkContainer.appendChild( linkElement );
                    tableOfContent.appendChild( subLinkContainer );
                }
                else
                {
                    Element subLinkContainer = subLinkContainers.get( subLinkContainers.size( ) - 1 );
                    subLinkContainer.appendChild( linkElement );
                }
            }
        }
        return tableOfContent;
    }

}