View Javadoc
1   /*
2    * Copyright 2007-2009 Yaroslav Stavnichiy, yarosla@gmail.com
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Latest version of this software can be obtained from:
17   *
18   *     http://t4-wiki-parser.googlecode.com
19   *
20   * If you make use of this code, I'd appreciate hearing about it.
21   * Comments, suggestions, and bug reports welcome: yarosla@gmail.com
22   */
23  package ys.wikiparser;
24  
25  import static ys.wikiparser.Utils.*;
26  import com.vladsch.flexmark.parser.Parser;
27  import com.vladsch.flexmark.html.HtmlRenderer;
28  import com.vladsch.flexmark.util.data.MutableDataSet;
29  import com.vladsch.flexmark.util.misc.Extension;
30  import java.net.*;
31  
32  import java.util.HashSet;
33  import java.util.List;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  import java.util.ArrayList;
37  /**
38   * WikiParser.renderXHTML() is the main method of this class. It takes wiki-text and returns XHTML.
39   *
40   * WikiParser's behavior can be customized by overriding appendXxx() methods, which should make integration of this class into any wiki/blog/forum software easy
41   * and painless.
42   *
43   * @author Yaroslav Stavnichiy (yarosla@gmail.com)
44   *
45   */
46  public class WikiParser
47  {
48      private static final int MAX_LIST_LEVELS = 100;
49      private static final String [ ] ESCAPED_INLINE_SEQUENCES = {
50              "{{{", "{{", "}}}", "**", "//", "__", "##", "\\\\", "[[", "<<<", "~", "--", "|"
51      };
52      private static final String LIST_CHARS = "*-#>:!";
53      private static final String [ ] LIST_OPEN = {
54              "<ul><li>", "<ul><li>", "<ol><li>", "<blockquote>", "<div class='wiki_indent'>", "<div class='wiki_center'>"
55      };
56      private static final String [ ] LIST_CLOSE = {
57              "</li></ul>\n", "</li></ul>\n", "</li></ol>\n", "</blockquote>\n", "</div>\n", "</div>\n"
58      };
59      private static final String FORMAT_CHARS = "*/_#";
60      private static final String [ ] FORMAT_DELIM = {
61              "**", "//", "__", "##"
62      };
63      private static final String [ ] FORMAT_TAG_OPEN = {
64              "<strong>", "<em>", "<span class=\"wiki_underline\">", "<tt>"
65      };
66      private static final String [ ] FORMAT_TAG_CLOSE = {
67              "</strong>", "</em>", "</span>", "</tt>"
68      };
69      public static String CUSTOM_INPUTS_TO_REEMPLACE = "//CustomInputToReEmplace//";
70      private int wikiLength;
71      private char [ ] wikiChars;
72      protected StringBuilder sb = new StringBuilder( );
73      protected StringBuilder toc = new StringBuilder( );
74      protected int tocLevel = 0;
75      private HashSet<String> tocAnchorIds = new HashSet<String>( );
76      private String wikiText;
77      private int pos = 0;
78      private int listLevel = -1;
79      private char [ ] listLevels = new char [ MAX_LIST_LEVELS + 1]; // max number of levels allowed
80      private boolean blockquoteBR = false;
81      private boolean inTable = false;
82      private int mediawikiTableLevel = 0;
83      protected int HEADING_LEVEL_SHIFT = 1; // make =h2, ==h3, ...
84      protected String HEADING_ID_PREFIX = null;
85      private String _strTableClass = "";
86      private String _strParentTableClass = "";
87      private String _strTocClass = "toc";
88      public String markdown = "";
89      protected WikiParser( )
90      {
91          // for use by subclasses only
92          // subclasses should call parse() to complete construction
93      }
94  
95      protected WikiParser( String wikiText )
96      {
97          parse( wikiText );
98      }
99  
100     protected void setTableClass( String strClass )
101     {
102         _strTableClass = strClass;
103     }
104 
105 
106     protected void setParentTableClass( String strParentClass )
107     {
108         _strParentTableClass = strParentClass;
109     }
110 
111 
112     protected void setTocClass( String strClass )
113     {
114         _strTocClass = strClass;
115     }
116 
117     public static String renderXHTML( String wikiText )
118     {
119         return new WikiParser( wikiText ).toString( );
120     }
121 
122     protected void parse( String wikiText )
123     {
124         wikiText = preprocessWikiText( wikiText );
125 
126         this.wikiText = wikiText;
127         wikiLength = this.wikiText.length( );
128         wikiChars = new char [ wikiLength];
129         this.wikiText.getChars( 0, wikiLength, wikiChars, 0 );
130 
131         while ( parseBlock( ) )
132             ;
133 
134         closeListsAndTables( );
135 
136         while ( mediawikiTableLevel-- > 0 )
137             sb.append( "</td></tr></table></div>\n" );
138 
139         completeTOC( );
140     }
141     protected void parseMD( String wikiText )
142     {
143         MutableDataSet options = new MutableDataSet( );
144 
145 
146         List<Extension> extensions =
147                 java.util.Arrays.asList(
148                         com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension.create(),
149                     /* Input and Output Example of escaped.character.EscapedCharacterExtension.create(),
150 
151                     ```markdown
152                     This is a text with escaped character: \*star\*
153                     ```
154                     **Output:**
155                     ```html
156                     <p>This is a text with escaped character: *star*</p>
157                     ```
158                      */
159                         com.vladsch.flexmark.ext.media.tags.MediaTagsExtension.create( ),
160                       /* Input and Output Example of com.vladsch.flexmark.ext.media.tags.MediaTagsExtension.create( ),
161                         ```markdown
162                         Here is a video link: ![Video](http://example.com/video.mp4)
163                         ```
164                         **Output:**
165                         ```html
166                         <p>Here is a video link: <video src="http://example.com/video.mp4" controls></video></p>
167                         ```
168                          */
169 
170                         // marche pas
171                     //    com.vladsch.flexmark.ext.xwiki.macros.MacroExtension.create( ),
172                         /* Input and Output Example of com.vladsch.flexmark.ext.xwiki.macros.MacroExtension.create( ),
173                         ```markdown
174                         This is a macro in XWiki syntax: {{info}}This is an info macro in XWiki syntax.{{/info}}
175                                 ```
176                                 **Output:**
177                                 ```html
178                                 <div class="info">This  is an info macro in XWiki syntax.</div>
179                                 ```
180                          */
181 
182                         // marche pas
183                      //   com.vladsch.flexmark.ext.enumerated.reference.EnumeratedReferenceExtension.create( ),
184                         /* Input and Output Example of com.vladsch.flexmark.ext.enumerated.reference.EnumeratedReferenceExtension
185                         ```markdown
186                         This is a reference: [(1)]
187                          [(1)]: This is the enumerated reference.
188                         *
189                         **Output:**
190                         ```html
191                         <p>This is a reference: <a href="#enumerated-reference-1" id="enumerated-reference-link-1">(1)</a></p>
192                         <p id="enumerated-reference-1">This is the enumerated reference.</p>
193                          */
194 
195 
196                         com.vladsch.flexmark.ext.tables.TablesExtension.create( ),
197                         /* Input and Output Example of com.vladsch.flexmark.ext.tables.TablesExtension
198                         **Input (Markdown with table):**
199                         ```markdown
200                         | Header 1 | Header 2 |
201                         |----------|----------|
202                         | Cell 1   | Cell 2   |
203                         | Cell 3   | Cell 4   |
204                         ```
205                         **Output (HTML):**
206 
207                         ```html
208                         <table>
209                         <thead>
210                         <tr>
211                         <th>Header 1</th>
212                         <th>Header 2</th>
213                         </tr>
214                         </thead>
215                         <tbody>
216                         <tr>
217                         <td>Cell 1</td>
218                         <td>Cell 2</td>
219                         </tr>
220                         <tr>
221                         <td>Cell 3</td>
222                         <td>Cell 4</td>
223                         </tr>
224                         </tbody>
225                         </table>
226                         ```
227                          */
228                         com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension.create( ),
229                         /* Input and Output Example of com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension
230                         **Input (Markdown with strikethrough):**
231                         * **Input (Markdown with strikethrough):**
232 
233                         ```markdown
234                         This is a ~~strikethrough~~ text.
235                         ```
236 
237                         **Output (HTML):**
238 
239                         ```html
240                         <p>This is a <del>strikethrough</del> text.</p>
241                         ```
242                          */
243 
244 
245                         com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension.create( ),
246                         /* Input and Output Example of com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension
247                           **Input (Markdown with task list):**
248                             ```markdown
249                             - [ ] Task 1
250                             - [x] Task 2
251                             - [ ] Task 3
252                             ```
253 
254                             **Output (HTML):**
255 
256                             ```html
257                             <ul>
258                             <li class="task-list-item"><input disabled="" type="checkbox"> Task 1</li>
259                             <li class="task-list-item"><input checked="" disabled="" type="checkbox"> Task 2</li>
260                             <li class="task-list-item"><input disabled="" type="checkbox"> Task 3</li>
261                             </ul>
262                             ```
263                          */
264 
265 // images a ajouter aux liens suivant /img/heart.png
266 //                        com.vladsch.flexmark.ext.emoji.EmojiExtension.create( ),
267                         /* Input and Output Example of com.vladsch.flexmark.ext.emoji.EmojiExtension
268                         **Input (Markdown with emoji):**
269                         ```markdown
270                         * :smile:
271                         * :heart:
272                         * :+1:
273                         * :-1:
274                         * :100:
275                         *
276                          ** Output (HTML):**
277                          *
278                          ```html
279                                               <p><img src="/img/smile.png" alt="emoji people:smile" height="20" width="20" align="absmiddle">
280                         <img src="/img/heart.png" alt="emoji people:heart" height="20" width="20" align="absmiddle">
281                         <img src="/img/plus1.png" alt="emoji people:+1" height="20" width="20" align="absmiddle">
282                         <img src="/img/-1.png" alt="emoji people:-1" height="20" width="20" align="absmiddle">
283                         <img src="/img/100.png" alt="emoji symbols:100" height="20" width="20" align="absmiddle"></p>
284                          */
285 
286 
287                       // fonction pas
288                         // com.vladsch.flexmark.ext.toc.TocExtension.create( ),
289                         /* Input and Output Example of com.vladsch.flexmark.ext.toc.TocExtension
290                           com.vladsch.flexmark.ext.toc.TocExtension.create()
291 
292                         # Heading 1
293 
294                         ## Subheading 1.1
295 
296                         ### Subheading 1.1.1
297 
298                         ## Subheading 1.2
299 
300                         # Heading 2
301 
302                         ## Subheading 2.1
303 
304                         <div class="toc">
305                           <ul>
306                             <li><a href="#heading-1">Heading 1</a>
307                               <ul>
308                                 <li><a href="#subheading-1-1">Subheading 1.1</a>
309                                   <ul>
310                                     <li><a href="#subheading-1-1-1">Subheading 1.1.1</a></li>
311                                   </ul>
312                                 </li>
313                                 <li><a href="#subheading-1-2">Subheading 1.2</a></li>
314                               </ul>
315                             </li>
316                             <li><a href="#heading-2">Heading 2</a>
317                               <ul>
318                                 <li><a href="#subheading-2-1">Subheading 2.1</a></li>
319                               </ul>
320                             </li>
321                           </ul>
322                         </div>
323                         <h1 id="heading-1">Heading 1</h1>
324                         <h2 id="subheading-1-1">Subheading 1.1</h2>
325                         <h3 id="subheading-1-1-1">Subheading 1.1.1</h3>
326                         <h2 id="subheading-1-2">Subheading 1.2</h2>
327                         <h1 id="heading-2">Heading 2</h1>
328                         <h2 id="subheading-2-1">Subheading 2.1</h2>
329                          */
330 
331 
332                         com.vladsch.flexmark.ext.typographic.TypographicExtension.create( ),
333                         /* Input and Output Example of com.vladsch.flexmark.ext.typographic.TypographicExtension
334                         **Input (Markdown with typographic characters):**
335                             ```markdown
336                             "Hello, World!"
337 
338                             'Hello, World!'
339 
340                             --Hello, World!--
341 
342                             ...Hello, World...
343                             ```
344 
345                             **Output (HTML):**
346 
347                             ```html
348                             <p>“Hello, World!”</p>
349 
350                             <p>‘Hello, World!’</p>
351 
352                             <p>–Hello, World!–</p>
353 
354                             <p>…Hello, World…</p>
355                             ```
356                          */
357                         com.vladsch.flexmark.ext.youtube.embedded.YouTubeLinkExtension.create( ),
358                         /* Input and Output Example of com.vladsch.flexmark.ext.youtube.embedded.YouTubeLinkExtension
359                         ```markdown
360                             Check out this cool video: @[](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
361                             ```
362                             **Output:**
363 
364                             The output will be an HTML string with the YouTube link converted into an embedded YouTube video.
365 
366                             ```html
367                             <p>Check out this cool video: <iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>
368                             ```
369                          */
370 
371                        com.vladsch.flexmark.ext.macros.MacrosExtension.create( ),
372                         /* Input and Output Example of com.vladsch.flexmark.ext.macros.MacrosExtension
373                         https://github.com/vsch/flexmark-java/wiki/Macros-Extension
374                         ```markdown
375                          >>>blockMacro
376                             1. item 1
377                             1. item 2
378                             <<<
379 
380                             <<<blockMacro>>>
381                             ```
382                             **Output:**
383 
384                             ```html
385                         <p>Paragraph with a
386                         <ol>
387                           <li>item 1</li>
388                           <li>item 2</li>
389                         </ol>
390                         inserted.</p>                            ```
391                          */
392                         com.vladsch.flexmark.ext.definition.DefinitionExtension.create( ),
393                          /* Input and Output Example of com.vladsch.flexmark.ext.definition.DefinitionExtension
394                         ```markdown
395                             Term 1
396                             :   Definition 1
397 
398                             Term 2
399                             :   Definition 2
400                             ```
401                             **Output:**
402 
403                             The output will be an HTML string with the definition list converted into a definition list.
404 
405                             ```html
406                             <dl>
407                             <dt>Term 1</dt>
408                             <dd>Definition 1</dd>
409                             <dt>Term 2</dt>
410                             <dd>Definition 2</dd>
411                             </dl>
412                             ```
413                          */
414 
415                          com.vladsch.flexmark.ext.autolink.AutolinkExtension.create( )
416                        /* Input and Output Example of com.vladsch.flexmark.ext.autolink.AutolinkExtension
417                         ```markdown
418                             This is a link: <http://www.google.com>
419                             ```
420                             **Output:**
421 
422                             The output will be an HTML string with the link converted into an HTML anchor tag.
423 
424                             ```html
425                             <p>This is a link: <a href="http://www.google.com">http://www.google.com</a></p>
426                             ```
427                          */
428 
429    //                 com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension.create( )
430                         /* Input and Output Example of com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension
431 
432                         ```markdown
433                             ## Table of Contents
434                             [TOC]
435                             ```
436                             **Output:**
437 
438                             The output will be an HTML string with the table of contents converted into a list of links to the headings in the document.
439 
440                             ```html
441                             <h2 id="table-of-contents">Table of Contents</h2>
442                             <div class="toc">
443                             <ul>
444                             <li><a href="#heading-1">Heading 1</a></li>
445                             <li><a href="#heading-2">Heading 2</a></li>
446                             </ul>
447                             </div>
448                             <h1 id="heading-1">Heading 1</h1>
449                             <h2 id="heading-2">Heading 2</h2>
450                             ```
451                          */
452 
453                         // fonctionne pas
454                     //    com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension.create( ),
455                         /* Input and Output Example of com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension
456                         ```markdown
457                             *[HTML]: Hyper Text Markup Language
458                             The abbreviation for HTML is *[HTML].
459                             ```
460                             **Output:**
461 
462                             The output will be an HTML string with the abbreviation converted
463 
464                                 ```html
465                                 <p>The abbreviation for <abbr title="Hyper Text Markup Language">HTML</abbr> is <abbr title="Hyper Text Markup Language">HTML</abbr>.</p>
466                                 ```
467                          */
468 
469 
470            //             com.vladsch.flexmark.ext.admonition.AdmonitionExtension.create( ),
471                        /* Input and Output Example of com.vladsch.flexmark.ext.admonition.AdmonitionExtension
472                         ```markdown
473                             !!! note
474                             This is a note admonition.
475                             ```
476                             **Output:**
477 
478                             The output will be an HTML string with the admonition converted into a styled block.
479 
480                             ```html
481                                <div class="adm-block adm-note">
482                                 <div class="adm-heading">
483                                 <svg class="adm-icon"><use xlink:href="#adm-note"></use></svg><span>Note</span>
484                                 </div>
485                                 <div class="adm-body">
486                                 <p>This is a note admonition.
487                                 This Wiki aims to be a very simple collaborative tool fully integrated to your Lutece site</p>
488                                 </div>
489                                 </div>
490 
491                             ```
492                         */
493 
494                         // fonctionne pas
495                     //    com.vladsch.flexmark.ext.aside.AsideExtension.create( ),
496                         /* Input and Output Example of com.vladsch.flexmark.ext.aside.AsideExtension
497                         ```markdown
498                             ::: aside
499                             This is an aside block.
500                             :::
501                             ```
502                             **Output:**
503 
504                             The output will be an HTML string with the aside block converted into a styled block.
505 
506                             ```html
507                             <div class="aside">
508                             <p>This is an aside block.</p>
509                             </div>
510                             ```
511                          */
512 
513                         // pour ajouter des class, id et autres attributs aux balises html
514               //          com.vladsch.flexmark.ext.attributes.AttributesExtension.create( ),
515                         /* Input and Output Example of com.vladsch.flexmark.ext.attributes.AttributesExtension
516                         https://github.com/vsch/flexmark-java/wiki/Attributes-Extension
517                         ```markdown
518                             This is a paragraph with a custom attribute {#my-id}
519                             ```info {#not-id} not {title="Title" caption="Cap"} {caption="Caption"}
520                             ```
521                             ```
522                             **Output:**
523 
524                             The output will be an HTML string with the custom attribute converted.
525 
526                             ```html
527                             <p id="my-id">This is a paragraph with a custom attribute</p>
528                             <pre title="Title" caption="Caption"><code class="language-info"></code></pre>
529                             ```
530                          */
531 
532 
533 
534 
535                         // github flavoured markdown extentions
536                 //        com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension.create( ),
537                 //        com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension.create( ),
538                         //        com.vladsch.flexmark.ext.gfm.issues.GfmIssuesExtension.create( ),
539 
540 
541                  // Gitlab markdown extention fonctionne pas
542                    //    com.vladsch.flexmark.ext.gitlab.GitLabExtension.create()
543                   /*
544                   **Input (GitLab-flavored Markdown):**
545 
546                     ```markdown
547                     # Heading
548 
549                     Hello @username, this is a test.
550 
551                     - [ ] Task 1
552                     - [x] Task 2
553 
554                     See issue #123 for more details.
555 
556                     ```
557 
558                     **Output (HTML):**
559 
560                     ```html
561                     <h1>Heading</h1>
562 
563                     <p>Hello <a href="/username">@username</a>, this is a test.</p>
564 
565                     <ul>
566                       <li><input type="checkbox" disabled> Task 1</li>
567                       <li><input type="checkbox" disabled checked> Task 2</li>
568                     </ul>
569 
570                     <p>See issue <a href="/issues/123">#123</a> for more details.</p>
571 
572 
573                     ```
574                    */
575 
576                         // Set the EXTANCHORLINKS extension to prevent HTML conversion
577                    //     com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension.create( )
578                 );
579         options.set(Parser.EXTENSIONS, extensions);
580         options.set(HtmlRenderer.INDENT_SIZE, 2);
581         options.set(HtmlRenderer.PERCENT_ENCODE_URLS, true);
582         options.set(HtmlRenderer.GENERATE_HEADER_ID, true);
583         options.set(HtmlRenderer.RENDER_HEADER_ID, true);
584         options.set(HtmlRenderer.ESCAPE_HTML, true);
585         options.set(HtmlRenderer.ESCAPE_INLINE_HTML, false);
586         
587         String _strTableClass = "table";
588 
589         // Configure tables with custom classes
590         if (_strTableClass != null && !_strTableClass.isEmpty()) {
591             options.set(com.vladsch.flexmark.ext.tables.TablesExtension.CLASS_NAME, _strTableClass);
592         }
593         
594         Parser parser = Parser.builder(options).build();
595         HtmlRenderer renderer = HtmlRenderer.builder(options).build();
596         markdown = wikiText;
597         List<String> customInputsHtml = extractCustomInputs();
598 
599         com.vladsch.flexmark.util.ast.Node document = parser.parse(markdown);
600         String html = renderer.render(document);
601         String htmlWithCustomInputs = remplaceCustomInputs( html, customInputsHtml );
602         sb = new StringBuilder( htmlWithCustomInputs);
603 
604     }
605     /**
606      * Preprocess wiki text before parsing
607      * @param wikiText
608      * @return
609      */
610     public String remplaceCustomInputs( String html, List<String> customInputsHtml )
611     {
612         for ( int i = 0; i < customInputsHtml.size( ); i++ )
613         {
614             html = html.replace( CUSTOM_INPUTS_TO_REEMPLACE +"_"+ i, customInputsHtml.get( i ) );
615         }
616         return html;
617     }
618     /**
619      * get custom inputs from the wiki text
620      * @param wikiText
621      * @return
622      */
623     public List<String> extractCustomInputs ()
624     {
625         // Check what is writen at first between two $$ markers to find custom inputs names
626         List <String> customInputsHtml = new ArrayList <>( );
627         String regex = "\\$\\$(.*?)\\$\\$";
628         Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
629         Matcher matcher = pattern.matcher(markdown);
630         int iteration = 0;
631         while (matcher.find()) {
632             // get customInputName if there is one (only letters and numbers)
633             String customInput = matcher.group(1).trim();
634               String customInputName = "";
635             for ( int i = 0; i < customInput.length( ); i++ )
636             {
637 
638                 char c = customInput.charAt( i );
639                 String regexText = "[a-zA-Z0-9]";
640                 if (Character.toString(c).matches(regexText))
641                 {
642                     customInputName += c;
643                 }
644                 else {
645                     break;
646                 }
647             }
648             // if the custom input is not empty extract params and fill the template thought MarkdownCustomInputs.renderCustomInHtml ( customInput, customInputName );
649             if(customInputName != null && !customInputName.isEmpty())
650             {
651                 String html = MarkdownCustomInputs.renderCustomInHtml( customInput, customInputName );
652 
653                 if(html != null && !html.isEmpty())
654                 {
655                     customInputsHtml.add(html);
656                     // Reemplace the custom input by a marker, to reemplace it with the html after the markdown is converted to html
657                     int startIntpot = markdown.indexOf("$$");
658                     int endPostion = markdown.indexOf("$$", startIntpot + 2);
659                     markdown = markdown.substring(0, startIntpot) + markdown.substring(endPostion + 2);
660                     markdown = markdown.substring(0, startIntpot) + CUSTOM_INPUTS_TO_REEMPLACE +"_"+ iteration + markdown.substring(startIntpot);
661                     iteration++;
662                 }
663             }
664         }
665       return customInputsHtml;
666     }
667 
668     public String toString( )
669     {
670         return sb.toString( );
671     }
672 
673     private void closeListsAndTables( )
674     {
675         // close unclosed lists
676         while ( listLevel >= 0 )
677         {
678             sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
679         }
680 
681         if ( inTable )
682         {
683             sb.append( "</table></div>\n" );
684             inTable = false;
685         }
686     }
687 
688     private boolean parseBlock( )
689     {
690         for ( ; ( pos < wikiLength ) && ( wikiChars [pos] <= ' ' ) && ( wikiChars [pos] != '\n' ); pos++ )
691             ; // skip whitespace
692 
693         if ( pos >= wikiLength )
694         {
695             return false;
696         }
697 
698         char c = wikiChars [pos];
699 
700         if ( c == '\n' )
701         { // blank line => end of list/table; no other meaning
702             closeListsAndTables( );
703             pos++;
704 
705             return true;
706         }
707 
708         if ( c == '|' )
709         { // table
710 
711             if ( mediawikiTableLevel > 0 )
712             {
713                 int pp = pos + 1;
714 
715                 if ( pp < wikiLength )
716                 {
717                     boolean newRow = false;
718                     boolean endTable = false;
719 
720                     if ( wikiChars [pp] == '-' )
721                     { // mediawiki-table new row
722                         newRow = true;
723                         pp++;
724                     }
725                     else
726                         if ( wikiChars [pp] == '}' )
727                         { // mediawiki-table end table
728                             endTable = true;
729                             pp++;
730                         }
731 
732                     for ( ; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
733                         ; // skip spaces
734 
735                     if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
736                     { // nothing else on the line => it's mediawiki-table markup
737                         closeListsAndTables( ); // close lists if any
738                         sb.append( newRow ? "</td></tr>\n<tr><td>" : ( endTable ? "</td></tr></table></div>\n" : "</td>\n<td>" ) );
739 
740                         if ( endTable )
741                         {
742                             mediawikiTableLevel--;
743                         }
744 
745                         pos = pp + 1;
746 
747                         return pp < wikiLength;
748                     }
749                 }
750             }
751 
752             if ( !inTable )
753             {
754                 closeListsAndTables( ); // close lists if any
755                 sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\" >" );
756                 inTable = true;
757             }
758 
759             pos = parseTableRow( pos + 1 );
760 
761             return true;
762         }
763         else
764         {
765             if ( inTable )
766             {
767                 sb.append( "</table></div>\n" );
768                 inTable = false;
769             }
770         }
771 
772         if ( ( listLevel >= 0 ) || ( LIST_CHARS.indexOf( c ) >= 0 ) )
773         { // lists
774 
775             int lc;
776 
777             // count list level
778             for ( lc = 0; ( lc <= listLevel ) && ( ( pos + lc ) < wikiLength ) && ( wikiChars [pos + lc] == listLevels [lc] ); lc++ )
779                 ;
780 
781             if ( lc <= listLevel )
782             { // end list block(s)
783 
784                 do
785                 {
786                     sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
787                 }
788                 while ( lc <= listLevel );
789 
790                 // list(s) closed => retry from the same position
791                 blockquoteBR = true;
792 
793                 return true;
794             }
795             else
796             {
797                 if ( ( pos + lc ) >= wikiLength )
798                 {
799                     return false;
800                 }
801 
802                 char cc = wikiChars [pos + lc];
803                 int listType = LIST_CHARS.indexOf( cc );
804 
805                 if ( ( listType >= 0 ) && ( ( pos + lc + 1 ) < wikiLength ) && ( wikiChars [pos + lc + 1] != cc ) && ( listLevel < MAX_LIST_LEVELS ) )
806                 { // new list block
807                     sb.append( LIST_OPEN [listType] );
808                     listLevels [++listLevel] = cc;
809                     blockquoteBR = true;
810                     pos = parseListItem( pos + lc + 1 );
811 
812                     return true;
813                 }
814                 else
815                     if ( listLevel >= 0 )
816                     { // list item - same level
817 
818                         if ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) )
819                         {
820                             sb.append( '\n' );
821                         }
822                         else
823                             if ( listLevels [listLevel] == '!' )
824                             {
825                                 sb.append( "</div>\n<div class='wiki_center'>" );
826                             }
827                             else
828                             {
829                                 sb.append( "</li>\n<li>" );
830                             }
831 
832                         pos = parseListItem( pos + lc );
833 
834                         return true;
835                     }
836             }
837         }
838 
839         if ( c == '=' )
840         { // heading
841 
842             int hc;
843 
844             // count heading level
845             for ( hc = 1; ( hc < 6 ) && ( ( pos + hc ) < wikiLength ) && ( wikiChars [pos + hc] == '=' ); hc++ )
846                 ;
847 
848             if ( ( pos + hc ) >= wikiLength )
849             {
850                 return false;
851             }
852 
853             int p;
854 
855             for ( p = pos + hc; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
856                 ; // skip spaces
857 
858             String tagName = "h" + ( hc + HEADING_LEVEL_SHIFT );
859             sb.append( "<" + tagName + " id=''>" ); // real id to be inserted after parsing this item
860 
861             int hStart = sb.length( );
862             pos = parseItem( p, wikiText.substring( pos, pos + hc ), ContextType.HEADER );
863 
864             String hText = sb.substring( hStart, sb.length( ) );
865             sb.append( "</" + tagName + ">\n" );
866 
867             String anchorId = generateTOCAnchorId( hc, hText );
868             sb.insert( hStart - 2, anchorId );
869             appendTOCItem( hc, anchorId, hText );
870 
871             return true;
872         }
873         else
874             if ( c == '{' )
875             { // nowiki-block?
876 
877                 if ( ( ( pos + 2 ) < wikiLength ) && ( wikiChars [pos + 1] == '{' ) && ( wikiChars [pos + 2] == '{' ) )
878                 {
879                     int startNowiki = pos + 3;
880                     int endNowiki = findEndOfNowiki( startNowiki );
881                     int endPos = endNowiki + 3;
882 
883                     if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
884                     { // block <pre>
885 
886                         if ( wikiChars [startNowiki] == '\n' )
887                         {
888                             startNowiki++; // skip the very first '\n'
889                         }
890 
891                         if ( wikiChars [endNowiki - 1] == '\n' )
892                         {
893                             endNowiki--; // omit the very last '\n'
894                         }
895 
896                         // sb.append( "<pre>" );
897                         appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
898                         // sb.append( "</pre>\n" );
899                         pos = endPos;
900 
901                         return true;
902                     }
903 
904                     // else inline <nowiki> - proceed to regular paragraph handling
905                 }
906                 else
907                     if ( ( ( pos + 1 ) < wikiLength ) && ( wikiChars [pos + 1] == '|' ) )
908                     { // mediawiki-table?
909 
910                         int pp;
911 
912                         for ( pp = pos + 2; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
913                             ; // skip spaces
914 
915                         if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
916                         { // yes, it's start of a table
917                             sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\"><tr><td>" );
918                             mediawikiTableLevel++;
919                             pos = pp + 1;
920 
921                             return pp < wikiLength;
922                         }
923                     }
924             }
925             else
926                 if ( ( c == '-' ) && wikiText.startsWith( "----", pos ) )
927                 {
928                     int p;
929 
930                     for ( p = pos + 4; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
931                         ; // skip spaces
932 
933                     if ( ( p == wikiLength ) || ( wikiChars [p] == '\n' ) )
934                     {
935                         sb.append( "\n<hr/>\n" );
936                         pos = p;
937 
938                         return true;
939                     }
940                 }
941                 else
942                     if ( c == '~' )
943                     { // block-level escaping: '*' '-' '#' '>' ':' '!' '|' '='
944 
945                         if ( ( pos + 1 ) < wikiLength )
946                         {
947                             char nc = wikiChars [pos + 1];
948 
949                             if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
950                             { // can't be inline markup
951                                 pos++; // skip '~' and proceed to regular paragraph handling
952                                 c = nc;
953                             }
954                             else
955                                 if ( ( nc == '*' ) || ( nc == '#' ) )
956                                 { // might be inline markup so need to double check
957 
958                                     char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
959 
960                                     if ( nnc != nc )
961                                     {
962                                         pos++; // skip '~' and proceed to regular paragraph handling
963                                         c = nc;
964                                     }
965 
966                                     // otherwise escaping will be done at line level
967                                 }
968                                 else
969                                     if ( nc == '{' )
970                                     { // might be inline {{{ markup so need to double check
971 
972                                         char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
973 
974                                         if ( nnc == '|' )
975                                         { // mediawiki-table?
976                                             pos++; // skip '~' and proceed to regular paragraph handling
977                                             c = nc;
978                                         }
979 
980                                         // otherwise escaping will be done at line level
981                                     }
982                         }
983                     }
984 
985         sb.append( "<p>" );
986         pos = parseItem( pos, null, ContextType.PARAGRAPH );
987         sb.append( "</p>\n" );
988 
989         return true;
990     }
991 
992     /**
993      * Finds first closing '}}}' for nowiki block or span. Skips escaped sequences: '~}}}'.
994      *
995      * @param startBlock
996      *            points to first char after '{{{'
997      * @return position of first '}' in closing '}}}'
998      */
999     private int findEndOfNowiki( int startBlock )
1000     {
1001         // NOTE: this method could step back one char from startBlock position
1002         int endBlock = startBlock - 3;
1003 
1004         do
1005         {
1006             endBlock = wikiText.indexOf( "}}}", endBlock + 3 );
1007 
1008             if ( endBlock < 0 )
1009             {
1010                 return wikiLength; // no matching '}}}' found
1011             }
1012 
1013             while ( ( ( endBlock + 3 ) < wikiLength ) && ( wikiChars [endBlock + 3] == '}' ) )
1014                 endBlock++; // shift to end of sequence of more than 3x'}' (eg. '}}}}}')
1015         }
1016         while ( wikiChars [endBlock - 1] == '~' );
1017 
1018         return endBlock;
1019     }
1020 
1021     /**
1022      * Greedy version of findEndOfNowiki(). It finds the last possible closing '}}}' before next opening '{{{'. Also uses escapes '~{{{' and '~}}}'.
1023      *
1024      * @param startBlock
1025      *            points to first char after '{{{'
1026      * @return position of first '}' in closing '}}}'
1027      */
1028     @SuppressWarnings( "unused" )
1029     private int findEndOfNowikiGreedy( int startBlock )
1030     {
1031         // NOTE: this method could step back one char from startBlock position
1032         int nextBlock = startBlock - 3;
1033 
1034         do
1035         {
1036             do
1037             {
1038                 nextBlock = wikiText.indexOf( "{{{", nextBlock + 3 );
1039             }
1040             while ( ( nextBlock > 0 ) && ( wikiChars [nextBlock - 1] == '~' ) );
1041 
1042             if ( nextBlock < 0 )
1043             {
1044                 nextBlock = wikiLength;
1045             }
1046 
1047             int endBlock = wikiText.lastIndexOf( "}}}", nextBlock );
1048 
1049             if ( ( endBlock >= startBlock ) && ( wikiChars [endBlock - 1] != '~' ) )
1050             {
1051                 return endBlock;
1052             }
1053         }
1054         while ( nextBlock < wikiLength );
1055 
1056         return wikiLength;
1057     }
1058 
1059     /**
1060      * @param start
1061      *            points to first char after pipe '|'
1062      * @return
1063      */
1064     private int parseTableRow( int start )
1065     {
1066         if ( start >= wikiLength )
1067         {
1068             return wikiLength;
1069         }
1070 
1071         sb.append( "<tr>" );
1072 
1073         boolean endOfRow = false;
1074 
1075         do
1076         {
1077             int colspan = 0;
1078 
1079             while ( ( ( start + colspan ) < wikiLength ) && ( wikiChars [start + colspan] == '|' ) )
1080                 colspan++;
1081 
1082             start += colspan;
1083             colspan++;
1084 
1085             boolean th = ( start < wikiLength ) && ( wikiChars [start] == '=' );
1086             start += ( th ? 1 : 0 );
1087 
1088             while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1089                 start++; // trim whitespace from the start
1090 
1091             if ( ( start >= wikiLength ) || ( wikiChars [start] == '\n' ) )
1092             { // skip last empty column
1093                 start++; // eat '\n'
1094 
1095                 break;
1096             }
1097 
1098             sb.append( th ? "<th" : "<td" );
1099 
1100             if ( colspan > 1 )
1101             {
1102                 sb.append( " colspan=\"" + colspan + "\"" );
1103             }
1104 
1105             sb.append( '>' );
1106 
1107             try
1108             {
1109                 parseItemThrow( start, null, ContextType.TABLE_CELL );
1110             }
1111             catch( EndOfSubContextException e )
1112             { // end of cell
1113                 start = e.position;
1114 
1115                 if ( start >= wikiLength )
1116                 {
1117                     endOfRow = true;
1118                 }
1119                 else
1120                     if ( wikiChars [start] == '\n' )
1121                     {
1122                         start++; // eat '\n'
1123                         endOfRow = true;
1124                     }
1125             }
1126             catch( EndOfContextException e )
1127             {
1128                 start = e.position;
1129                 endOfRow = true;
1130             }
1131 
1132             sb.append( th ? "</th>" : "</td>" );
1133         }
1134         while ( !endOfRow /* && start<wikiLength && wikiChars[start]!='\n' */);
1135 
1136         sb.append( "</tr>\n" );
1137 
1138         return start;
1139     }
1140 
1141     /**
1142      * Same as parseItem(); blank line adds &lt;br/&gt;&lt;br/&gt;
1143      *
1144      * @param start
1145      */
1146     private int parseListItem( int start )
1147     {
1148         while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1149             start++; // skip spaces
1150 
1151         int end = parseItem( start, null, ContextType.LIST_ITEM );
1152 
1153         if ( ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) ) && ( wikiText.substring( start, end ).trim( ).length( ) == 0 ) )
1154         { // empty line within blockquote/div
1155 
1156             if ( !blockquoteBR )
1157             {
1158                 sb.append( "<br/><br/>" );
1159                 blockquoteBR = true;
1160             }
1161         }
1162         else
1163         {
1164             blockquoteBR = false;
1165         }
1166 
1167         return end;
1168     }
1169 
1170     /**
1171      * @param p
1172      *            points to first slash in suspected URI (scheme://etc)
1173      * @param start
1174      *            points to beginning of parsed item
1175      * @param end
1176      *            points to end of parsed item
1177      *
1178      * @return array of two integer offsets [begin_uri, end_uri] if matched, null otherwise
1179      */
1180     private int [ ] checkURI( int p, int start, int end )
1181     {
1182         if ( ( p > start ) && ( wikiChars [p - 1] == ':' ) )
1183         { // "://" found
1184 
1185             int pb = p - 1;
1186 
1187             while ( ( pb > start ) && isLatinLetterOrDigit( wikiChars [pb - 1] ) )
1188                 pb--;
1189 
1190             int pe = p + 2;
1191 
1192             while ( ( pe < end ) && isUrlChar( wikiChars [pe] ) )
1193                 pe++;
1194 
1195             URI uri = null;
1196 
1197             do
1198             {
1199                 while ( ( pe > ( p + 2 ) ) && ( ",.;:?!%)".indexOf( wikiChars [pe - 1] ) >= 0 ) )
1200                     pe--; // don't want these chars at the end of URI
1201 
1202                 try
1203                 { // verify URL syntax
1204                     uri = new URI( wikiText.substring( pb, pe ) );
1205                 }
1206                 catch( URISyntaxException e )
1207                 {
1208                     pe--; // try chopping from the end
1209                 }
1210             }
1211             while ( ( uri == null ) && ( pe > ( p + 2 ) ) );
1212 
1213             if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1214             {
1215                 int [ ] offs = {
1216                         pb, pe
1217                 };
1218 
1219                 return offs;
1220             }
1221         }
1222 
1223         return null;
1224     }
1225 
1226     private int parseItem( int start, String delimiter, ContextType context )
1227     {
1228         try
1229         {
1230             return parseItemThrow( start, delimiter, context );
1231         }
1232         catch( EndOfContextException e )
1233         {
1234             return e.position;
1235         }
1236     }
1237 
1238     private int parseItemThrow( int start, String delimiter, ContextType context ) throws EndOfContextException
1239     {
1240         StringBuilder tb = new StringBuilder( );
1241 
1242         boolean specialCaseDelimiterHandling = "//".equals( delimiter );
1243         int p = start;
1244         int end = wikiLength;
1245 
1246         try
1247         {
1248             nextChar: while ( true )
1249             {
1250                 if ( p >= end )
1251                 {
1252                     throw new EndOfContextException( end ); // break;
1253                 }
1254 
1255                 if ( ( delimiter != null ) && wikiText.startsWith( delimiter, p ) )
1256                 {
1257                     if ( !specialCaseDelimiterHandling || ( checkURI( p, start, end ) == null ) )
1258                     {
1259                         p += delimiter.length( );
1260 
1261                         return p;
1262                     }
1263                 }
1264 
1265                 char c = wikiChars [p];
1266                 boolean atLineStart = false;
1267 
1268                 // context-defined break test
1269                 if ( c == '\n' )
1270                 {
1271                     if ( ( context == ContextType.HEADER ) || ( context == ContextType.TABLE_CELL ) )
1272                     {
1273                         p++;
1274                         throw new EndOfContextException( p );
1275                     }
1276 
1277                     if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\n' ) )
1278                     { // blank line delimits everything
1279                         p++; // eat one '\n' and leave another one unparsed so parseBlock() can close all lists
1280                         throw new EndOfContextException( p );
1281                     }
1282 
1283                     for ( p++; ( p < end ) && ( wikiChars [p] <= ' ' ) && ( wikiChars [p] != '\n' ); p++ )
1284                         ; // skip whitespace
1285 
1286                     if ( p >= end )
1287                     {
1288                         throw new EndOfContextException( p ); // end of text reached
1289                     }
1290 
1291                     c = wikiChars [p];
1292                     atLineStart = true;
1293 
1294                     if ( ( c == '-' ) && wikiText.startsWith( "----", p ) )
1295                     { // check for ---- <hr>
1296 
1297                         int pp;
1298 
1299                         for ( pp = p + 4; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1300                             ; // skip spaces
1301 
1302                         if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1303                         {
1304                             throw new EndOfContextException( p ); // yes, it's <hr>
1305                         }
1306                     }
1307 
1308                     if ( LIST_CHARS.indexOf( c ) >= 0 )
1309                     { // start of list item?
1310 
1311                         if ( FORMAT_CHARS.indexOf( c ) < 0 )
1312                         {
1313                             throw new EndOfContextException( p );
1314                         }
1315 
1316                         // here we have a list char, which also happen to be a format char
1317                         if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] != c ) )
1318                         {
1319                             throw new EndOfContextException( p ); // format chars go in pairs
1320                         }
1321 
1322                         if ( /* context==ContextType.LIST_ITEM */
1323                         ( listLevel >= 0 ) && ( c == listLevels [0] ) )
1324                         {
1325                             // c matches current list's first level, so it must be new list item
1326                             throw new EndOfContextException( p );
1327                         }
1328 
1329                         // otherwise it must be just formatting sequence => no break of context
1330                     }
1331                     else
1332                         if ( c == '=' )
1333                         { // header
1334                             throw new EndOfContextException( p );
1335                         }
1336                         else
1337                             if ( c == '|' )
1338                             { // table or mediawiki-table
1339                                 throw new EndOfContextException( p );
1340                             }
1341                             else
1342                                 if ( c == '{' )
1343                                 { // mediawiki-table?
1344 
1345                                     if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '|' ) )
1346                                     {
1347                                         int pp;
1348 
1349                                         for ( pp = p + 2; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1350                                             ; // skip spaces
1351 
1352                                         if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1353                                         {
1354                                             throw new EndOfContextException( p ); // yes, it's start of a table
1355                                         }
1356                                     }
1357                                 }
1358 
1359                     // if none matched add '\n' to text buffer
1360                     tb.append( '\n' );
1361 
1362                     // p and c already shifted past the '\n' and whitespace after, so go on
1363                 }
1364                 else
1365                     if ( c == '|' )
1366                     {
1367                         if ( context == ContextType.TABLE_CELL )
1368                         {
1369                             p++;
1370                             throw new EndOfSubContextException( p );
1371                         }
1372                     }
1373 
1374                 int formatType;
1375 
1376                 if ( c == '{' )
1377                 {
1378                     if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '{' ) )
1379                     {
1380                         if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '{' ) )
1381                         { // inline or block <nowiki>
1382                             appendText( tb.toString( ) );
1383                             tb.delete( 0, tb.length( ) ); // flush text buffer
1384 
1385                             int startNowiki = p + 3;
1386                             int endNowiki = findEndOfNowiki( startNowiki );
1387                             p = endNowiki + 3;
1388 
1389                             if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
1390                             { // block <pre>
1391 
1392                                 if ( wikiChars [startNowiki] == '\n' )
1393                                 {
1394                                     startNowiki++; // skip the very first '\n'
1395                                 }
1396 
1397                                 if ( wikiChars [endNowiki - 1] == '\n' )
1398                                 {
1399                                     endNowiki--; // omit the very last '\n'
1400                                 }
1401 
1402                                 if ( context == ContextType.PARAGRAPH )
1403                                 {
1404                                     sb.append( "</p>" ); // break the paragraph because XHTML does not allow <pre> children of <p>
1405                                 }
1406 
1407                                 // sb.append( "<pre>" );
1408                                 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1409                                 // sb.append( "</pre>\n" );
1410 
1411                                 if ( context == ContextType.PARAGRAPH )
1412                                 {
1413                                     sb.append( "<p>" ); // continue the paragraph
1414                                                         // if (context==ContextType.NOWIKI_BLOCK) return p; // in this context return immediately after nowiki
1415                                 }
1416                             }
1417                             else
1418                             { // inline <nowiki>
1419                                 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1420                             }
1421 
1422                             continue;
1423                         }
1424                         else
1425                             if ( ( p + 2 ) < end )
1426                             { // {{image}}
1427 
1428                                 int endImg = wikiText.indexOf( "}}", p + 2 );
1429 
1430                                 if ( ( endImg >= 0 ) && ( endImg < end ) )
1431                                 {
1432                                     appendText( tb.toString( ) );
1433                                     tb.delete( 0, tb.length( ) ); // flush text buffer
1434                                     appendImage( wikiText.substring( p + 2, endImg ) );
1435                                     p = endImg + 2;
1436 
1437                                     continue;
1438                                 }
1439                             }
1440                     }
1441                 }
1442                 else
1443                     if ( c == '[' )
1444                     {
1445                         if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '[' ) )
1446                         { // [[link]]
1447 
1448                             int endLink = wikiText.indexOf( "]]", p + 2 );
1449 
1450                             if ( ( endLink >= 0 ) && ( endLink < end ) )
1451                             {
1452                                 appendText( tb.toString( ) );
1453                                 tb.delete( 0, tb.length( ) ); // flush text buffer
1454                                 appendLink( wikiText.substring( p + 2, endLink ) );
1455                                 p = endLink + 2;
1456 
1457                                 continue;
1458                             }
1459                         }
1460                     }
1461                     else
1462                         if ( c == '\\' )
1463                         {
1464                             if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\\' ) )
1465                             { // \\ = <br/>
1466                                 appendText( tb.toString( ) );
1467                                 tb.delete( 0, tb.length( ) ); // flush text buffer
1468                                 sb.append( "<br/>" );
1469                                 p += 2;
1470 
1471                                 continue;
1472                             }
1473                         }
1474                         else
1475                             if ( c == '<' )
1476                             {
1477                                 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '<' ) )
1478                                 {
1479                                     if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '<' ) )
1480                                     { // <<<macro>>>
1481 
1482                                         int endMacro = wikiText.indexOf( ">>>", p + 3 );
1483 
1484                                         if ( ( endMacro >= 0 ) && ( endMacro < end ) )
1485                                         {
1486                                             appendText( tb.toString( ) );
1487                                             tb.delete( 0, tb.length( ) ); // flush text buffer
1488                                             appendMacro( wikiText.substring( p + 3, endMacro ) );
1489                                             p = endMacro + 3;
1490 
1491                                             continue;
1492                                         }
1493                                     }
1494                                 }
1495                             }
1496                             else
1497                                 if ( ( formatType = FORMAT_CHARS.indexOf( c ) ) >= 0 )
1498                                 {
1499                                     if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == c ) )
1500                                     {
1501                                         appendText( tb.toString( ) );
1502                                         tb.delete( 0, tb.length( ) ); // flush text buffer
1503 
1504                                         if ( c == '/' )
1505                                         { // special case for "//" - check if it is part of URL (scheme://etc)
1506 
1507                                             int [ ] uriOffs = checkURI( p, start, end );
1508 
1509                                             if ( uriOffs != null )
1510                                             {
1511                                                 int pb = uriOffs [0];
1512                                                 int pe = uriOffs [1];
1513 
1514                                                 if ( ( pb > start ) && ( wikiChars [pb - 1] == '~' ) )
1515                                                 {
1516                                                     sb.delete( sb.length( ) - ( p - pb + 1 ), sb.length( ) ); // roll back URL + tilde
1517                                                     sb.append( escapeHTML( wikiText.substring( pb, pe ) ) );
1518                                                 }
1519                                                 else
1520                                                 {
1521                                                     sb.delete( sb.length( ) - ( p - pb ), sb.length( ) ); // roll back URL
1522                                                     appendLink( wikiText.substring( pb, pe ) );
1523                                                 }
1524 
1525                                                 p = pe;
1526 
1527                                                 continue;
1528                                             }
1529                                         }
1530 
1531                                         sb.append( FORMAT_TAG_OPEN [formatType] );
1532 
1533                                         try
1534                                         {
1535                                             p = parseItemThrow( p + 2, FORMAT_DELIM [formatType], context );
1536                                         }
1537                                         finally
1538                                         {
1539                                             sb.append( FORMAT_TAG_CLOSE [formatType] );
1540                                         }
1541 
1542                                         continue;
1543                                     }
1544                                 }
1545                                 else
1546                                     if ( c == '~' )
1547                                     { // escape
1548                                       // most start line escapes are dealt with in parseBlock()
1549 
1550                                         if ( atLineStart )
1551                                         {
1552                                             // same as block-level escaping: '*' '-' '#' '>' ':' '|' '='
1553                                             if ( ( p + 1 ) < end )
1554                                             {
1555                                                 char nc = wikiChars [p + 1];
1556 
1557                                                 if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
1558                                                 { // can't be inline markup
1559                                                     tb.append( nc );
1560                                                     p += 2; // skip '~' and nc
1561 
1562                                                     continue nextChar;
1563                                                 }
1564                                                 else
1565                                                     if ( ( nc == '*' ) || ( nc == '#' ) )
1566                                                     { // might be inline markup so need to double check
1567 
1568                                                         char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1569 
1570                                                         if ( nnc != nc )
1571                                                         {
1572                                                             tb.append( nc );
1573                                                             p += 2; // skip '~' and nc
1574 
1575                                                             continue nextChar;
1576                                                         }
1577 
1578                                                         // otherwise escaping will be done at line level
1579                                                     }
1580                                                     else
1581                                                         if ( nc == '{' )
1582                                                         { // might be inline {{{ markup so need to double check
1583 
1584                                                             char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1585 
1586                                                             if ( nnc == '|' )
1587                                                             { // mediawiki-table?
1588                                                                 tb.append( nc );
1589                                                                 tb.append( nnc );
1590                                                                 p += 3; // skip '~', nc and nnc
1591 
1592                                                                 continue nextChar;
1593                                                             }
1594 
1595                                                             // otherwise escaping will be done as usual at line level
1596                                                         }
1597                                             }
1598                                         }
1599 
1600                                         for ( String e : ESCAPED_INLINE_SEQUENCES )
1601                                         {
1602                                             if ( wikiText.startsWith( e, p + 1 ) )
1603                                             {
1604                                                 tb.append( e );
1605                                                 p += ( 1 + e.length( ) );
1606 
1607                                                 continue nextChar;
1608                                             }
1609                                         }
1610                                     }
1611                                     else
1612                                         if ( c == '-' )
1613                                         { // ' -- ' => &ndash;
1614 
1615                                             if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 1] == '-' ) && ( wikiChars [p + 2] == ' ' ) && ( p > start )
1616                                                     && ( wikiChars [p - 1] == ' ' ) )
1617                                             {
1618                                                 // appendText(tb.toString()); tb.delete(0, tb.length()); // flush text buffer
1619                                                 // sb.append("&ndash; ");
1620                                                 tb.append( "&ndash; " ); // &ndash; = "\u2013 "
1621                                                 p += 3;
1622 
1623                                                 continue;
1624                                             }
1625                                         }
1626 
1627                 tb.append( c );
1628                 p++;
1629             }
1630         }
1631         finally
1632         {
1633             appendText( tb.toString( ) );
1634             tb.delete( 0, tb.length( ) ); // flush text buffer
1635         }
1636     }
1637 
1638     protected void appendMacro( String text )
1639     {
1640         if ( "TOC".equals( text ) )
1641         {
1642             sb.append( "!!!TOC!!!" ); // put TOC placeholder for replacing it later with real TOC
1643         }
1644         else
1645         {
1646             sb.append( "&lt;&lt;&lt;Macro:" );
1647             sb.append( escapeHTML( unescapeHTML( text ) ) );
1648             sb.append( "&gt;&gt;&gt;" );
1649         }
1650     }
1651 
1652     protected void appendLink( String text )
1653     {
1654         String [ ] link = split( text, '|' );
1655         URI uri = null;
1656 
1657         try
1658         { // validate URI
1659             uri = new URI( link [0].trim( ) );
1660         }
1661         catch( URISyntaxException e )
1662         {
1663         }
1664 
1665         if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1666         {
1667             sb.append( "<a href=\"" + escapeHTML( uri.toString( ) ) + "\" rel=\"nofollow\">" );
1668             sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1669             sb.append( "</a>" );
1670         }
1671         else
1672         {
1673             sb.append( "<a href=\"#\" title=\"Internal link\">" );
1674             sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1675             sb.append( "</a>" );
1676         }
1677     }
1678 
1679     protected void appendImage( String text )
1680     {
1681         String [ ] link = split( text, '|' );
1682         URI uri = null;
1683 
1684         try
1685         { // validate URI
1686             uri = new URI( link [0].trim( ) );
1687         }
1688         catch( URISyntaxException e )
1689         {
1690         }
1691 
1692         if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1693         {
1694             String alt = escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) );
1695             sb.append( "<img src=\"" + escapeHTML( uri.toString( ) ) + "\" alt=\"" + alt + "\" title=\"" + alt + "\" />" );
1696         }
1697         else
1698         {
1699             sb.append( "&lt;&lt;&lt;Internal image(?): " );
1700             sb.append( escapeHTML( unescapeHTML( text ) ) );
1701             sb.append( "&gt;&gt;&gt;" );
1702         }
1703     }
1704 
1705     protected void appendText( String text )
1706     {
1707         sb.append( escapeHTML( unescapeHTML( text ) ) );
1708     }
1709 
1710     protected String generateTOCAnchorId( int hLevel, String text )
1711     {
1712         int i = 0;
1713         String id = ( ( HEADING_ID_PREFIX != null ) ? HEADING_ID_PREFIX : ( "H" + hLevel + "_" ) )
1714                 + translit( text.replaceAll( "<.+?>", "" ) ).trim( ).replaceAll( "\\s+", "_" ).replaceAll( "[^a-zA-Z0-9_-]", "" );
1715 
1716         while ( tocAnchorIds.contains( id ) )
1717         { // avoid duplicates
1718             i++;
1719             id = text + "_" + i;
1720         }
1721 
1722         tocAnchorIds.add( id );
1723 
1724         return id;
1725     }
1726 
1727     protected void appendTOCItem( int level, String anchorId, String text )
1728     {
1729         if ( level > tocLevel )
1730         {
1731             while ( level > tocLevel )
1732             {
1733                 toc.append( "<ul><li>" );
1734                 tocLevel++;
1735             }
1736         }
1737         else
1738         {
1739             while ( level < tocLevel )
1740             {
1741                 toc.append( "</li></ul>" );
1742                 tocLevel--;
1743             }
1744 
1745             toc.append( "</li>\n<li>" );
1746         }
1747 
1748         toc.append( "<a href='#page_url#" + anchorId + "'>" + text + "</a>" );
1749     }
1750 
1751     protected void completeTOC( )
1752     {
1753         while ( 0 < tocLevel )
1754         {
1755             toc.append( "</li></ul>" );
1756             tocLevel--;
1757         }
1758 
1759         int idx;
1760         String tocDiv = "<div class=\"" + _strTocClass + "\">" + toc.toString( ) + "</div>";
1761 
1762         while ( ( idx = sb.indexOf( "!!!TOC!!!" ) ) >= 0 )
1763         {
1764             sb.replace( idx, idx + 9, tocDiv );
1765         }
1766     }
1767 
1768     protected void appendNowiki( String text )
1769     {
1770         sb.append( escapeHTML( replaceString( replaceString( text, "~{{{", "{{{" ), "~}}}", "}}}" ) ) );
1771     }
1772 
1773     private static class EndOfContextException extends Exception
1774     {
1775         private static final long serialVersionUID = 1L;
1776         int position;
1777 
1778         public EndOfContextException( int position )
1779         {
1780             super( );
1781             this.position = position;
1782         }
1783     }
1784 
1785     private static class EndOfSubContextException extends EndOfContextException
1786     {
1787         private static final long serialVersionUID = 1L;
1788 
1789         public EndOfSubContextException( int position )
1790         {
1791             super( position );
1792         }
1793     }
1794 
1795     private static enum ContextType
1796     {
1797         PARAGRAPH, LIST_ITEM, TABLE_CELL, HEADER, NOWIKI_BLOCK;
1798     }
1799 }