1 /*
2 * Copyright 2007-2009 Yaroslav Stavnichiy, yarosla@gmail.com
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Latest version of this software can be obtained from:
17 *
18 * http://t4-wiki-parser.googlecode.com
19 *
20 * If you make use of this code, I'd appreciate hearing about it.
21 * Comments, suggestions, and bug reports welcome: yarosla@gmail.com
22 */
23 package ys.wikiparser;
24
25 import static ys.wikiparser.Utils.*;
26 import com.vladsch.flexmark.parser.Parser;
27 import com.vladsch.flexmark.html.HtmlRenderer;
28 import com.vladsch.flexmark.util.data.MutableDataSet;
29 import com.vladsch.flexmark.util.misc.Extension;
30 import java.net.*;
31
32 import java.util.HashSet;
33 import java.util.List;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36 import java.util.ArrayList;
37 /**
38 * WikiParser.renderXHTML() is the main method of this class. It takes wiki-text and returns XHTML.
39 *
40 * WikiParser's behavior can be customized by overriding appendXxx() methods, which should make integration of this class into any wiki/blog/forum software easy
41 * and painless.
42 *
43 * @author Yaroslav Stavnichiy (yarosla@gmail.com)
44 *
45 */
46 public class WikiParser
47 {
48 private static final int MAX_LIST_LEVELS = 100;
49 private static final String [ ] ESCAPED_INLINE_SEQUENCES = {
50 "{{{", "{{", "}}}", "**", "//", "__", "##", "\\\\", "[[", "<<<", "~", "--", "|"
51 };
52 private static final String LIST_CHARS = "*-#>:!";
53 private static final String [ ] LIST_OPEN = {
54 "<ul><li>", "<ul><li>", "<ol><li>", "<blockquote>", "<div class='wiki_indent'>", "<div class='wiki_center'>"
55 };
56 private static final String [ ] LIST_CLOSE = {
57 "</li></ul>\n", "</li></ul>\n", "</li></ol>\n", "</blockquote>\n", "</div>\n", "</div>\n"
58 };
59 private static final String FORMAT_CHARS = "*/_#";
60 private static final String [ ] FORMAT_DELIM = {
61 "**", "//", "__", "##"
62 };
63 private static final String [ ] FORMAT_TAG_OPEN = {
64 "<strong>", "<em>", "<span class=\"wiki_underline\">", "<tt>"
65 };
66 private static final String [ ] FORMAT_TAG_CLOSE = {
67 "</strong>", "</em>", "</span>", "</tt>"
68 };
69 public static String CUSTOM_INPUTS_TO_REEMPLACE = "//CustomInputToReEmplace//";
70 private int wikiLength;
71 private char [ ] wikiChars;
72 protected StringBuilder sb = new StringBuilder( );
73 protected StringBuilder toc = new StringBuilder( );
74 protected int tocLevel = 0;
75 private HashSet<String> tocAnchorIds = new HashSet<String>( );
76 private String wikiText;
77 private int pos = 0;
78 private int listLevel = -1;
79 private char [ ] listLevels = new char [ MAX_LIST_LEVELS + 1]; // max number of levels allowed
80 private boolean blockquoteBR = false;
81 private boolean inTable = false;
82 private int mediawikiTableLevel = 0;
83 protected int HEADING_LEVEL_SHIFT = 1; // make =h2, ==h3, ...
84 protected String HEADING_ID_PREFIX = null;
85 private String _strTableClass = "";
86 private String _strParentTableClass = "";
87 private String _strTocClass = "toc";
88 public String markdown = "";
89 protected WikiParser( )
90 {
91 // for use by subclasses only
92 // subclasses should call parse() to complete construction
93 }
94
95 protected WikiParser( String wikiText )
96 {
97 parse( wikiText );
98 }
99
100 protected void setTableClass( String strClass )
101 {
102 _strTableClass = strClass;
103 }
104
105
106 protected void setParentTableClass( String strParentClass )
107 {
108 _strParentTableClass = strParentClass;
109 }
110
111
112 protected void setTocClass( String strClass )
113 {
114 _strTocClass = strClass;
115 }
116
117 public static String renderXHTML( String wikiText )
118 {
119 return new WikiParser( wikiText ).toString( );
120 }
121
122 protected void parse( String wikiText )
123 {
124 wikiText = preprocessWikiText( wikiText );
125
126 this.wikiText = wikiText;
127 wikiLength = this.wikiText.length( );
128 wikiChars = new char [ wikiLength];
129 this.wikiText.getChars( 0, wikiLength, wikiChars, 0 );
130
131 while ( parseBlock( ) )
132 ;
133
134 closeListsAndTables( );
135
136 while ( mediawikiTableLevel-- > 0 )
137 sb.append( "</td></tr></table></div>\n" );
138
139 completeTOC( );
140 }
141 protected void parseMD( String wikiText )
142 {
143 MutableDataSet options = new MutableDataSet( );
144
145
146 List<Extension> extensions =
147 java.util.Arrays.asList(
148 com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension.create(),
149 /* Input and Output Example of escaped.character.EscapedCharacterExtension.create(),
150
151 ```markdown
152 This is a text with escaped character: \*star\*
153 ```
154 **Output:**
155 ```html
156 <p>This is a text with escaped character: *star*</p>
157 ```
158 */
159 com.vladsch.flexmark.ext.media.tags.MediaTagsExtension.create( ),
160 /* Input and Output Example of com.vladsch.flexmark.ext.media.tags.MediaTagsExtension.create( ),
161 ```markdown
162 Here is a video link: 
163 ```
164 **Output:**
165 ```html
166 <p>Here is a video link: <video src="http://example.com/video.mp4" controls></video></p>
167 ```
168 */
169
170 // marche pas
171 // com.vladsch.flexmark.ext.xwiki.macros.MacroExtension.create( ),
172 /* Input and Output Example of com.vladsch.flexmark.ext.xwiki.macros.MacroExtension.create( ),
173 ```markdown
174 This is a macro in XWiki syntax: {{info}}This is an info macro in XWiki syntax.{{/info}}
175 ```
176 **Output:**
177 ```html
178 <div class="info">This is an info macro in XWiki syntax.</div>
179 ```
180 */
181
182 // marche pas
183 // com.vladsch.flexmark.ext.enumerated.reference.EnumeratedReferenceExtension.create( ),
184 /* Input and Output Example of com.vladsch.flexmark.ext.enumerated.reference.EnumeratedReferenceExtension
185 ```markdown
186 This is a reference: [(1)]
187 [(1)]: This is the enumerated reference.
188 *
189 **Output:**
190 ```html
191 <p>This is a reference: <a href="#enumerated-reference-1" id="enumerated-reference-link-1">(1)</a></p>
192 <p id="enumerated-reference-1">This is the enumerated reference.</p>
193 */
194
195
196 com.vladsch.flexmark.ext.tables.TablesExtension.create( ),
197 /* Input and Output Example of com.vladsch.flexmark.ext.tables.TablesExtension
198 **Input (Markdown with table):**
199 ```markdown
200 | Header 1 | Header 2 |
201 |----------|----------|
202 | Cell 1 | Cell 2 |
203 | Cell 3 | Cell 4 |
204 ```
205 **Output (HTML):**
206
207 ```html
208 <table>
209 <thead>
210 <tr>
211 <th>Header 1</th>
212 <th>Header 2</th>
213 </tr>
214 </thead>
215 <tbody>
216 <tr>
217 <td>Cell 1</td>
218 <td>Cell 2</td>
219 </tr>
220 <tr>
221 <td>Cell 3</td>
222 <td>Cell 4</td>
223 </tr>
224 </tbody>
225 </table>
226 ```
227 */
228 com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension.create( ),
229 /* Input and Output Example of com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension
230 **Input (Markdown with strikethrough):**
231 * **Input (Markdown with strikethrough):**
232
233 ```markdown
234 This is a ~~strikethrough~~ text.
235 ```
236
237 **Output (HTML):**
238
239 ```html
240 <p>This is a <del>strikethrough</del> text.</p>
241 ```
242 */
243
244
245 com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension.create( ),
246 /* Input and Output Example of com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension
247 **Input (Markdown with task list):**
248 ```markdown
249 - [ ] Task 1
250 - [x] Task 2
251 - [ ] Task 3
252 ```
253
254 **Output (HTML):**
255
256 ```html
257 <ul>
258 <li class="task-list-item"><input disabled="" type="checkbox"> Task 1</li>
259 <li class="task-list-item"><input checked="" disabled="" type="checkbox"> Task 2</li>
260 <li class="task-list-item"><input disabled="" type="checkbox"> Task 3</li>
261 </ul>
262 ```
263 */
264
265 // images a ajouter aux liens suivant /img/heart.png
266 // com.vladsch.flexmark.ext.emoji.EmojiExtension.create( ),
267 /* Input and Output Example of com.vladsch.flexmark.ext.emoji.EmojiExtension
268 **Input (Markdown with emoji):**
269 ```markdown
270 * :smile:
271 * :heart:
272 * :+1:
273 * :-1:
274 * :100:
275 *
276 ** Output (HTML):**
277 *
278 ```html
279 <p><img src="/img/smile.png" alt="emoji people:smile" height="20" width="20" align="absmiddle">
280 <img src="/img/heart.png" alt="emoji people:heart" height="20" width="20" align="absmiddle">
281 <img src="/img/plus1.png" alt="emoji people:+1" height="20" width="20" align="absmiddle">
282 <img src="/img/-1.png" alt="emoji people:-1" height="20" width="20" align="absmiddle">
283 <img src="/img/100.png" alt="emoji symbols:100" height="20" width="20" align="absmiddle"></p>
284 */
285
286
287 // fonction pas
288 // com.vladsch.flexmark.ext.toc.TocExtension.create( ),
289 /* Input and Output Example of com.vladsch.flexmark.ext.toc.TocExtension
290 com.vladsch.flexmark.ext.toc.TocExtension.create()
291
292 # Heading 1
293
294 ## Subheading 1.1
295
296 ### Subheading 1.1.1
297
298 ## Subheading 1.2
299
300 # Heading 2
301
302 ## Subheading 2.1
303
304 <div class="toc">
305 <ul>
306 <li><a href="#heading-1">Heading 1</a>
307 <ul>
308 <li><a href="#subheading-1-1">Subheading 1.1</a>
309 <ul>
310 <li><a href="#subheading-1-1-1">Subheading 1.1.1</a></li>
311 </ul>
312 </li>
313 <li><a href="#subheading-1-2">Subheading 1.2</a></li>
314 </ul>
315 </li>
316 <li><a href="#heading-2">Heading 2</a>
317 <ul>
318 <li><a href="#subheading-2-1">Subheading 2.1</a></li>
319 </ul>
320 </li>
321 </ul>
322 </div>
323 <h1 id="heading-1">Heading 1</h1>
324 <h2 id="subheading-1-1">Subheading 1.1</h2>
325 <h3 id="subheading-1-1-1">Subheading 1.1.1</h3>
326 <h2 id="subheading-1-2">Subheading 1.2</h2>
327 <h1 id="heading-2">Heading 2</h1>
328 <h2 id="subheading-2-1">Subheading 2.1</h2>
329 */
330
331
332 com.vladsch.flexmark.ext.typographic.TypographicExtension.create( ),
333 /* Input and Output Example of com.vladsch.flexmark.ext.typographic.TypographicExtension
334 **Input (Markdown with typographic characters):**
335 ```markdown
336 "Hello, World!"
337
338 'Hello, World!'
339
340 --Hello, World!--
341
342 ...Hello, World...
343 ```
344
345 **Output (HTML):**
346
347 ```html
348 <p>“Hello, World!”</p>
349
350 <p>‘Hello, World!’</p>
351
352 <p>–Hello, World!–</p>
353
354 <p>…Hello, World…</p>
355 ```
356 */
357 com.vladsch.flexmark.ext.youtube.embedded.YouTubeLinkExtension.create( ),
358 /* Input and Output Example of com.vladsch.flexmark.ext.youtube.embedded.YouTubeLinkExtension
359 ```markdown
360 Check out this cool video: @[](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
361 ```
362 **Output:**
363
364 The output will be an HTML string with the YouTube link converted into an embedded YouTube video.
365
366 ```html
367 <p>Check out this cool video: <iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>
368 ```
369 */
370
371 com.vladsch.flexmark.ext.macros.MacrosExtension.create( ),
372 /* Input and Output Example of com.vladsch.flexmark.ext.macros.MacrosExtension
373 https://github.com/vsch/flexmark-java/wiki/Macros-Extension
374 ```markdown
375 >>>blockMacro
376 1. item 1
377 1. item 2
378 <<<
379
380 <<<blockMacro>>>
381 ```
382 **Output:**
383
384 ```html
385 <p>Paragraph with a
386 <ol>
387 <li>item 1</li>
388 <li>item 2</li>
389 </ol>
390 inserted.</p> ```
391 */
392 com.vladsch.flexmark.ext.definition.DefinitionExtension.create( ),
393 /* Input and Output Example of com.vladsch.flexmark.ext.definition.DefinitionExtension
394 ```markdown
395 Term 1
396 : Definition 1
397
398 Term 2
399 : Definition 2
400 ```
401 **Output:**
402
403 The output will be an HTML string with the definition list converted into a definition list.
404
405 ```html
406 <dl>
407 <dt>Term 1</dt>
408 <dd>Definition 1</dd>
409 <dt>Term 2</dt>
410 <dd>Definition 2</dd>
411 </dl>
412 ```
413 */
414
415 com.vladsch.flexmark.ext.autolink.AutolinkExtension.create( )
416 /* Input and Output Example of com.vladsch.flexmark.ext.autolink.AutolinkExtension
417 ```markdown
418 This is a link: <http://www.google.com>
419 ```
420 **Output:**
421
422 The output will be an HTML string with the link converted into an HTML anchor tag.
423
424 ```html
425 <p>This is a link: <a href="http://www.google.com">http://www.google.com</a></p>
426 ```
427 */
428
429 // com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension.create( )
430 /* Input and Output Example of com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension
431
432 ```markdown
433 ## Table of Contents
434 [TOC]
435 ```
436 **Output:**
437
438 The output will be an HTML string with the table of contents converted into a list of links to the headings in the document.
439
440 ```html
441 <h2 id="table-of-contents">Table of Contents</h2>
442 <div class="toc">
443 <ul>
444 <li><a href="#heading-1">Heading 1</a></li>
445 <li><a href="#heading-2">Heading 2</a></li>
446 </ul>
447 </div>
448 <h1 id="heading-1">Heading 1</h1>
449 <h2 id="heading-2">Heading 2</h2>
450 ```
451 */
452
453 // fonctionne pas
454 // com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension.create( ),
455 /* Input and Output Example of com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension
456 ```markdown
457 *[HTML]: Hyper Text Markup Language
458 The abbreviation for HTML is *[HTML].
459 ```
460 **Output:**
461
462 The output will be an HTML string with the abbreviation converted
463
464 ```html
465 <p>The abbreviation for <abbr title="Hyper Text Markup Language">HTML</abbr> is <abbr title="Hyper Text Markup Language">HTML</abbr>.</p>
466 ```
467 */
468
469
470 // com.vladsch.flexmark.ext.admonition.AdmonitionExtension.create( ),
471 /* Input and Output Example of com.vladsch.flexmark.ext.admonition.AdmonitionExtension
472 ```markdown
473 !!! note
474 This is a note admonition.
475 ```
476 **Output:**
477
478 The output will be an HTML string with the admonition converted into a styled block.
479
480 ```html
481 <div class="adm-block adm-note">
482 <div class="adm-heading">
483 <svg class="adm-icon"><use xlink:href="#adm-note"></use></svg><span>Note</span>
484 </div>
485 <div class="adm-body">
486 <p>This is a note admonition.
487 This Wiki aims to be a very simple collaborative tool fully integrated to your Lutece site</p>
488 </div>
489 </div>
490
491 ```
492 */
493
494 // fonctionne pas
495 // com.vladsch.flexmark.ext.aside.AsideExtension.create( ),
496 /* Input and Output Example of com.vladsch.flexmark.ext.aside.AsideExtension
497 ```markdown
498 ::: aside
499 This is an aside block.
500 :::
501 ```
502 **Output:**
503
504 The output will be an HTML string with the aside block converted into a styled block.
505
506 ```html
507 <div class="aside">
508 <p>This is an aside block.</p>
509 </div>
510 ```
511 */
512
513 // pour ajouter des class, id et autres attributs aux balises html
514 // com.vladsch.flexmark.ext.attributes.AttributesExtension.create( ),
515 /* Input and Output Example of com.vladsch.flexmark.ext.attributes.AttributesExtension
516 https://github.com/vsch/flexmark-java/wiki/Attributes-Extension
517 ```markdown
518 This is a paragraph with a custom attribute {#my-id}
519 ```info {#not-id} not {title="Title" caption="Cap"} {caption="Caption"}
520 ```
521 ```
522 **Output:**
523
524 The output will be an HTML string with the custom attribute converted.
525
526 ```html
527 <p id="my-id">This is a paragraph with a custom attribute</p>
528 <pre title="Title" caption="Caption"><code class="language-info"></code></pre>
529 ```
530 */
531
532
533
534
535 // github flavoured markdown extentions
536 // com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension.create( ),
537 // com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension.create( ),
538 // com.vladsch.flexmark.ext.gfm.issues.GfmIssuesExtension.create( ),
539
540
541 // Gitlab markdown extention fonctionne pas
542 // com.vladsch.flexmark.ext.gitlab.GitLabExtension.create()
543 /*
544 **Input (GitLab-flavored Markdown):**
545
546 ```markdown
547 # Heading
548
549 Hello @username, this is a test.
550
551 - [ ] Task 1
552 - [x] Task 2
553
554 See issue #123 for more details.
555
556 ```
557
558 **Output (HTML):**
559
560 ```html
561 <h1>Heading</h1>
562
563 <p>Hello <a href="/username">@username</a>, this is a test.</p>
564
565 <ul>
566 <li><input type="checkbox" disabled> Task 1</li>
567 <li><input type="checkbox" disabled checked> Task 2</li>
568 </ul>
569
570 <p>See issue <a href="/issues/123">#123</a> for more details.</p>
571
572
573 ```
574 */
575
576 // Set the EXTANCHORLINKS extension to prevent HTML conversion
577 // com.vladsch.flexmark.ext.anchorlink.AnchorLinkExtension.create( )
578 );
579 options.set(Parser.EXTENSIONS, extensions);
580 options.set(HtmlRenderer.INDENT_SIZE, 2);
581 options.set(HtmlRenderer.PERCENT_ENCODE_URLS, true);
582 options.set(HtmlRenderer.GENERATE_HEADER_ID, true);
583 options.set(HtmlRenderer.RENDER_HEADER_ID, true);
584 options.set(HtmlRenderer.ESCAPE_HTML, true);
585 options.set(HtmlRenderer.ESCAPE_INLINE_HTML, false);
586
587 String _strTableClass = "table";
588
589 // Configure tables with custom classes
590 if (_strTableClass != null && !_strTableClass.isEmpty()) {
591 options.set(com.vladsch.flexmark.ext.tables.TablesExtension.CLASS_NAME, _strTableClass);
592 }
593
594 Parser parser = Parser.builder(options).build();
595 HtmlRenderer renderer = HtmlRenderer.builder(options).build();
596 markdown = wikiText;
597 List<String> customInputsHtml = extractCustomInputs();
598
599 com.vladsch.flexmark.util.ast.Node document = parser.parse(markdown);
600 String html = renderer.render(document);
601 String htmlWithCustomInputs = remplaceCustomInputs( html, customInputsHtml );
602 sb = new StringBuilder( htmlWithCustomInputs);
603
604 }
605 /**
606 * Preprocess wiki text before parsing
607 * @param wikiText
608 * @return
609 */
610 public String remplaceCustomInputs( String html, List<String> customInputsHtml )
611 {
612 for ( int i = 0; i < customInputsHtml.size( ); i++ )
613 {
614 html = html.replace( CUSTOM_INPUTS_TO_REEMPLACE +"_"+ i, customInputsHtml.get( i ) );
615 }
616 return html;
617 }
618 /**
619 * get custom inputs from the wiki text
620 * @param wikiText
621 * @return
622 */
623 public List<String> extractCustomInputs ()
624 {
625 // Check what is writen at first between two $$ markers to find custom inputs names
626 List <String> customInputsHtml = new ArrayList <>( );
627 String regex = "\\$\\$(.*?)\\$\\$";
628 Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
629 Matcher matcher = pattern.matcher(markdown);
630 int iteration = 0;
631 while (matcher.find()) {
632 // get customInputName if there is one (only letters and numbers)
633 String customInput = matcher.group(1).trim();
634 String customInputName = "";
635 for ( int i = 0; i < customInput.length( ); i++ )
636 {
637
638 char c = customInput.charAt( i );
639 String regexText = "[a-zA-Z0-9]";
640 if (Character.toString(c).matches(regexText))
641 {
642 customInputName += c;
643 }
644 else {
645 break;
646 }
647 }
648 // if the custom input is not empty extract params and fill the template thought MarkdownCustomInputs.renderCustomInHtml ( customInput, customInputName );
649 if(customInputName != null && !customInputName.isEmpty())
650 {
651 String html = MarkdownCustomInputs.renderCustomInHtml( customInput, customInputName );
652
653 if(html != null && !html.isEmpty())
654 {
655 customInputsHtml.add(html);
656 // Reemplace the custom input by a marker, to reemplace it with the html after the markdown is converted to html
657 int startIntpot = markdown.indexOf("$$");
658 int endPostion = markdown.indexOf("$$", startIntpot + 2);
659 markdown = markdown.substring(0, startIntpot) + markdown.substring(endPostion + 2);
660 markdown = markdown.substring(0, startIntpot) + CUSTOM_INPUTS_TO_REEMPLACE +"_"+ iteration + markdown.substring(startIntpot);
661 iteration++;
662 }
663 }
664 }
665 return customInputsHtml;
666 }
667
668 public String toString( )
669 {
670 return sb.toString( );
671 }
672
673 private void closeListsAndTables( )
674 {
675 // close unclosed lists
676 while ( listLevel >= 0 )
677 {
678 sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
679 }
680
681 if ( inTable )
682 {
683 sb.append( "</table></div>\n" );
684 inTable = false;
685 }
686 }
687
688 private boolean parseBlock( )
689 {
690 for ( ; ( pos < wikiLength ) && ( wikiChars [pos] <= ' ' ) && ( wikiChars [pos] != '\n' ); pos++ )
691 ; // skip whitespace
692
693 if ( pos >= wikiLength )
694 {
695 return false;
696 }
697
698 char c = wikiChars [pos];
699
700 if ( c == '\n' )
701 { // blank line => end of list/table; no other meaning
702 closeListsAndTables( );
703 pos++;
704
705 return true;
706 }
707
708 if ( c == '|' )
709 { // table
710
711 if ( mediawikiTableLevel > 0 )
712 {
713 int pp = pos + 1;
714
715 if ( pp < wikiLength )
716 {
717 boolean newRow = false;
718 boolean endTable = false;
719
720 if ( wikiChars [pp] == '-' )
721 { // mediawiki-table new row
722 newRow = true;
723 pp++;
724 }
725 else
726 if ( wikiChars [pp] == '}' )
727 { // mediawiki-table end table
728 endTable = true;
729 pp++;
730 }
731
732 for ( ; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
733 ; // skip spaces
734
735 if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
736 { // nothing else on the line => it's mediawiki-table markup
737 closeListsAndTables( ); // close lists if any
738 sb.append( newRow ? "</td></tr>\n<tr><td>" : ( endTable ? "</td></tr></table></div>\n" : "</td>\n<td>" ) );
739
740 if ( endTable )
741 {
742 mediawikiTableLevel--;
743 }
744
745 pos = pp + 1;
746
747 return pp < wikiLength;
748 }
749 }
750 }
751
752 if ( !inTable )
753 {
754 closeListsAndTables( ); // close lists if any
755 sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\" >" );
756 inTable = true;
757 }
758
759 pos = parseTableRow( pos + 1 );
760
761 return true;
762 }
763 else
764 {
765 if ( inTable )
766 {
767 sb.append( "</table></div>\n" );
768 inTable = false;
769 }
770 }
771
772 if ( ( listLevel >= 0 ) || ( LIST_CHARS.indexOf( c ) >= 0 ) )
773 { // lists
774
775 int lc;
776
777 // count list level
778 for ( lc = 0; ( lc <= listLevel ) && ( ( pos + lc ) < wikiLength ) && ( wikiChars [pos + lc] == listLevels [lc] ); lc++ )
779 ;
780
781 if ( lc <= listLevel )
782 { // end list block(s)
783
784 do
785 {
786 sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
787 }
788 while ( lc <= listLevel );
789
790 // list(s) closed => retry from the same position
791 blockquoteBR = true;
792
793 return true;
794 }
795 else
796 {
797 if ( ( pos + lc ) >= wikiLength )
798 {
799 return false;
800 }
801
802 char cc = wikiChars [pos + lc];
803 int listType = LIST_CHARS.indexOf( cc );
804
805 if ( ( listType >= 0 ) && ( ( pos + lc + 1 ) < wikiLength ) && ( wikiChars [pos + lc + 1] != cc ) && ( listLevel < MAX_LIST_LEVELS ) )
806 { // new list block
807 sb.append( LIST_OPEN [listType] );
808 listLevels [++listLevel] = cc;
809 blockquoteBR = true;
810 pos = parseListItem( pos + lc + 1 );
811
812 return true;
813 }
814 else
815 if ( listLevel >= 0 )
816 { // list item - same level
817
818 if ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) )
819 {
820 sb.append( '\n' );
821 }
822 else
823 if ( listLevels [listLevel] == '!' )
824 {
825 sb.append( "</div>\n<div class='wiki_center'>" );
826 }
827 else
828 {
829 sb.append( "</li>\n<li>" );
830 }
831
832 pos = parseListItem( pos + lc );
833
834 return true;
835 }
836 }
837 }
838
839 if ( c == '=' )
840 { // heading
841
842 int hc;
843
844 // count heading level
845 for ( hc = 1; ( hc < 6 ) && ( ( pos + hc ) < wikiLength ) && ( wikiChars [pos + hc] == '=' ); hc++ )
846 ;
847
848 if ( ( pos + hc ) >= wikiLength )
849 {
850 return false;
851 }
852
853 int p;
854
855 for ( p = pos + hc; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
856 ; // skip spaces
857
858 String tagName = "h" + ( hc + HEADING_LEVEL_SHIFT );
859 sb.append( "<" + tagName + " id=''>" ); // real id to be inserted after parsing this item
860
861 int hStart = sb.length( );
862 pos = parseItem( p, wikiText.substring( pos, pos + hc ), ContextType.HEADER );
863
864 String hText = sb.substring( hStart, sb.length( ) );
865 sb.append( "</" + tagName + ">\n" );
866
867 String anchorId = generateTOCAnchorId( hc, hText );
868 sb.insert( hStart - 2, anchorId );
869 appendTOCItem( hc, anchorId, hText );
870
871 return true;
872 }
873 else
874 if ( c == '{' )
875 { // nowiki-block?
876
877 if ( ( ( pos + 2 ) < wikiLength ) && ( wikiChars [pos + 1] == '{' ) && ( wikiChars [pos + 2] == '{' ) )
878 {
879 int startNowiki = pos + 3;
880 int endNowiki = findEndOfNowiki( startNowiki );
881 int endPos = endNowiki + 3;
882
883 if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
884 { // block <pre>
885
886 if ( wikiChars [startNowiki] == '\n' )
887 {
888 startNowiki++; // skip the very first '\n'
889 }
890
891 if ( wikiChars [endNowiki - 1] == '\n' )
892 {
893 endNowiki--; // omit the very last '\n'
894 }
895
896 // sb.append( "<pre>" );
897 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
898 // sb.append( "</pre>\n" );
899 pos = endPos;
900
901 return true;
902 }
903
904 // else inline <nowiki> - proceed to regular paragraph handling
905 }
906 else
907 if ( ( ( pos + 1 ) < wikiLength ) && ( wikiChars [pos + 1] == '|' ) )
908 { // mediawiki-table?
909
910 int pp;
911
912 for ( pp = pos + 2; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
913 ; // skip spaces
914
915 if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
916 { // yes, it's start of a table
917 sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\"><tr><td>" );
918 mediawikiTableLevel++;
919 pos = pp + 1;
920
921 return pp < wikiLength;
922 }
923 }
924 }
925 else
926 if ( ( c == '-' ) && wikiText.startsWith( "----", pos ) )
927 {
928 int p;
929
930 for ( p = pos + 4; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
931 ; // skip spaces
932
933 if ( ( p == wikiLength ) || ( wikiChars [p] == '\n' ) )
934 {
935 sb.append( "\n<hr/>\n" );
936 pos = p;
937
938 return true;
939 }
940 }
941 else
942 if ( c == '~' )
943 { // block-level escaping: '*' '-' '#' '>' ':' '!' '|' '='
944
945 if ( ( pos + 1 ) < wikiLength )
946 {
947 char nc = wikiChars [pos + 1];
948
949 if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
950 { // can't be inline markup
951 pos++; // skip '~' and proceed to regular paragraph handling
952 c = nc;
953 }
954 else
955 if ( ( nc == '*' ) || ( nc == '#' ) )
956 { // might be inline markup so need to double check
957
958 char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
959
960 if ( nnc != nc )
961 {
962 pos++; // skip '~' and proceed to regular paragraph handling
963 c = nc;
964 }
965
966 // otherwise escaping will be done at line level
967 }
968 else
969 if ( nc == '{' )
970 { // might be inline {{{ markup so need to double check
971
972 char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
973
974 if ( nnc == '|' )
975 { // mediawiki-table?
976 pos++; // skip '~' and proceed to regular paragraph handling
977 c = nc;
978 }
979
980 // otherwise escaping will be done at line level
981 }
982 }
983 }
984
985 sb.append( "<p>" );
986 pos = parseItem( pos, null, ContextType.PARAGRAPH );
987 sb.append( "</p>\n" );
988
989 return true;
990 }
991
992 /**
993 * Finds first closing '}}}' for nowiki block or span. Skips escaped sequences: '~}}}'.
994 *
995 * @param startBlock
996 * points to first char after '{{{'
997 * @return position of first '}' in closing '}}}'
998 */
999 private int findEndOfNowiki( int startBlock )
1000 {
1001 // NOTE: this method could step back one char from startBlock position
1002 int endBlock = startBlock - 3;
1003
1004 do
1005 {
1006 endBlock = wikiText.indexOf( "}}}", endBlock + 3 );
1007
1008 if ( endBlock < 0 )
1009 {
1010 return wikiLength; // no matching '}}}' found
1011 }
1012
1013 while ( ( ( endBlock + 3 ) < wikiLength ) && ( wikiChars [endBlock + 3] == '}' ) )
1014 endBlock++; // shift to end of sequence of more than 3x'}' (eg. '}}}}}')
1015 }
1016 while ( wikiChars [endBlock - 1] == '~' );
1017
1018 return endBlock;
1019 }
1020
1021 /**
1022 * Greedy version of findEndOfNowiki(). It finds the last possible closing '}}}' before next opening '{{{'. Also uses escapes '~{{{' and '~}}}'.
1023 *
1024 * @param startBlock
1025 * points to first char after '{{{'
1026 * @return position of first '}' in closing '}}}'
1027 */
1028 @SuppressWarnings( "unused" )
1029 private int findEndOfNowikiGreedy( int startBlock )
1030 {
1031 // NOTE: this method could step back one char from startBlock position
1032 int nextBlock = startBlock - 3;
1033
1034 do
1035 {
1036 do
1037 {
1038 nextBlock = wikiText.indexOf( "{{{", nextBlock + 3 );
1039 }
1040 while ( ( nextBlock > 0 ) && ( wikiChars [nextBlock - 1] == '~' ) );
1041
1042 if ( nextBlock < 0 )
1043 {
1044 nextBlock = wikiLength;
1045 }
1046
1047 int endBlock = wikiText.lastIndexOf( "}}}", nextBlock );
1048
1049 if ( ( endBlock >= startBlock ) && ( wikiChars [endBlock - 1] != '~' ) )
1050 {
1051 return endBlock;
1052 }
1053 }
1054 while ( nextBlock < wikiLength );
1055
1056 return wikiLength;
1057 }
1058
1059 /**
1060 * @param start
1061 * points to first char after pipe '|'
1062 * @return
1063 */
1064 private int parseTableRow( int start )
1065 {
1066 if ( start >= wikiLength )
1067 {
1068 return wikiLength;
1069 }
1070
1071 sb.append( "<tr>" );
1072
1073 boolean endOfRow = false;
1074
1075 do
1076 {
1077 int colspan = 0;
1078
1079 while ( ( ( start + colspan ) < wikiLength ) && ( wikiChars [start + colspan] == '|' ) )
1080 colspan++;
1081
1082 start += colspan;
1083 colspan++;
1084
1085 boolean th = ( start < wikiLength ) && ( wikiChars [start] == '=' );
1086 start += ( th ? 1 : 0 );
1087
1088 while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1089 start++; // trim whitespace from the start
1090
1091 if ( ( start >= wikiLength ) || ( wikiChars [start] == '\n' ) )
1092 { // skip last empty column
1093 start++; // eat '\n'
1094
1095 break;
1096 }
1097
1098 sb.append( th ? "<th" : "<td" );
1099
1100 if ( colspan > 1 )
1101 {
1102 sb.append( " colspan=\"" + colspan + "\"" );
1103 }
1104
1105 sb.append( '>' );
1106
1107 try
1108 {
1109 parseItemThrow( start, null, ContextType.TABLE_CELL );
1110 }
1111 catch( EndOfSubContextException e )
1112 { // end of cell
1113 start = e.position;
1114
1115 if ( start >= wikiLength )
1116 {
1117 endOfRow = true;
1118 }
1119 else
1120 if ( wikiChars [start] == '\n' )
1121 {
1122 start++; // eat '\n'
1123 endOfRow = true;
1124 }
1125 }
1126 catch( EndOfContextException e )
1127 {
1128 start = e.position;
1129 endOfRow = true;
1130 }
1131
1132 sb.append( th ? "</th>" : "</td>" );
1133 }
1134 while ( !endOfRow /* && start<wikiLength && wikiChars[start]!='\n' */);
1135
1136 sb.append( "</tr>\n" );
1137
1138 return start;
1139 }
1140
1141 /**
1142 * Same as parseItem(); blank line adds <br/><br/>
1143 *
1144 * @param start
1145 */
1146 private int parseListItem( int start )
1147 {
1148 while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1149 start++; // skip spaces
1150
1151 int end = parseItem( start, null, ContextType.LIST_ITEM );
1152
1153 if ( ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) ) && ( wikiText.substring( start, end ).trim( ).length( ) == 0 ) )
1154 { // empty line within blockquote/div
1155
1156 if ( !blockquoteBR )
1157 {
1158 sb.append( "<br/><br/>" );
1159 blockquoteBR = true;
1160 }
1161 }
1162 else
1163 {
1164 blockquoteBR = false;
1165 }
1166
1167 return end;
1168 }
1169
1170 /**
1171 * @param p
1172 * points to first slash in suspected URI (scheme://etc)
1173 * @param start
1174 * points to beginning of parsed item
1175 * @param end
1176 * points to end of parsed item
1177 *
1178 * @return array of two integer offsets [begin_uri, end_uri] if matched, null otherwise
1179 */
1180 private int [ ] checkURI( int p, int start, int end )
1181 {
1182 if ( ( p > start ) && ( wikiChars [p - 1] == ':' ) )
1183 { // "://" found
1184
1185 int pb = p - 1;
1186
1187 while ( ( pb > start ) && isLatinLetterOrDigit( wikiChars [pb - 1] ) )
1188 pb--;
1189
1190 int pe = p + 2;
1191
1192 while ( ( pe < end ) && isUrlChar( wikiChars [pe] ) )
1193 pe++;
1194
1195 URI uri = null;
1196
1197 do
1198 {
1199 while ( ( pe > ( p + 2 ) ) && ( ",.;:?!%)".indexOf( wikiChars [pe - 1] ) >= 0 ) )
1200 pe--; // don't want these chars at the end of URI
1201
1202 try
1203 { // verify URL syntax
1204 uri = new URI( wikiText.substring( pb, pe ) );
1205 }
1206 catch( URISyntaxException e )
1207 {
1208 pe--; // try chopping from the end
1209 }
1210 }
1211 while ( ( uri == null ) && ( pe > ( p + 2 ) ) );
1212
1213 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1214 {
1215 int [ ] offs = {
1216 pb, pe
1217 };
1218
1219 return offs;
1220 }
1221 }
1222
1223 return null;
1224 }
1225
1226 private int parseItem( int start, String delimiter, ContextType context )
1227 {
1228 try
1229 {
1230 return parseItemThrow( start, delimiter, context );
1231 }
1232 catch( EndOfContextException e )
1233 {
1234 return e.position;
1235 }
1236 }
1237
1238 private int parseItemThrow( int start, String delimiter, ContextType context ) throws EndOfContextException
1239 {
1240 StringBuilder tb = new StringBuilder( );
1241
1242 boolean specialCaseDelimiterHandling = "//".equals( delimiter );
1243 int p = start;
1244 int end = wikiLength;
1245
1246 try
1247 {
1248 nextChar: while ( true )
1249 {
1250 if ( p >= end )
1251 {
1252 throw new EndOfContextException( end ); // break;
1253 }
1254
1255 if ( ( delimiter != null ) && wikiText.startsWith( delimiter, p ) )
1256 {
1257 if ( !specialCaseDelimiterHandling || ( checkURI( p, start, end ) == null ) )
1258 {
1259 p += delimiter.length( );
1260
1261 return p;
1262 }
1263 }
1264
1265 char c = wikiChars [p];
1266 boolean atLineStart = false;
1267
1268 // context-defined break test
1269 if ( c == '\n' )
1270 {
1271 if ( ( context == ContextType.HEADER ) || ( context == ContextType.TABLE_CELL ) )
1272 {
1273 p++;
1274 throw new EndOfContextException( p );
1275 }
1276
1277 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\n' ) )
1278 { // blank line delimits everything
1279 p++; // eat one '\n' and leave another one unparsed so parseBlock() can close all lists
1280 throw new EndOfContextException( p );
1281 }
1282
1283 for ( p++; ( p < end ) && ( wikiChars [p] <= ' ' ) && ( wikiChars [p] != '\n' ); p++ )
1284 ; // skip whitespace
1285
1286 if ( p >= end )
1287 {
1288 throw new EndOfContextException( p ); // end of text reached
1289 }
1290
1291 c = wikiChars [p];
1292 atLineStart = true;
1293
1294 if ( ( c == '-' ) && wikiText.startsWith( "----", p ) )
1295 { // check for ---- <hr>
1296
1297 int pp;
1298
1299 for ( pp = p + 4; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1300 ; // skip spaces
1301
1302 if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1303 {
1304 throw new EndOfContextException( p ); // yes, it's <hr>
1305 }
1306 }
1307
1308 if ( LIST_CHARS.indexOf( c ) >= 0 )
1309 { // start of list item?
1310
1311 if ( FORMAT_CHARS.indexOf( c ) < 0 )
1312 {
1313 throw new EndOfContextException( p );
1314 }
1315
1316 // here we have a list char, which also happen to be a format char
1317 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] != c ) )
1318 {
1319 throw new EndOfContextException( p ); // format chars go in pairs
1320 }
1321
1322 if ( /* context==ContextType.LIST_ITEM */
1323 ( listLevel >= 0 ) && ( c == listLevels [0] ) )
1324 {
1325 // c matches current list's first level, so it must be new list item
1326 throw new EndOfContextException( p );
1327 }
1328
1329 // otherwise it must be just formatting sequence => no break of context
1330 }
1331 else
1332 if ( c == '=' )
1333 { // header
1334 throw new EndOfContextException( p );
1335 }
1336 else
1337 if ( c == '|' )
1338 { // table or mediawiki-table
1339 throw new EndOfContextException( p );
1340 }
1341 else
1342 if ( c == '{' )
1343 { // mediawiki-table?
1344
1345 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '|' ) )
1346 {
1347 int pp;
1348
1349 for ( pp = p + 2; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1350 ; // skip spaces
1351
1352 if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1353 {
1354 throw new EndOfContextException( p ); // yes, it's start of a table
1355 }
1356 }
1357 }
1358
1359 // if none matched add '\n' to text buffer
1360 tb.append( '\n' );
1361
1362 // p and c already shifted past the '\n' and whitespace after, so go on
1363 }
1364 else
1365 if ( c == '|' )
1366 {
1367 if ( context == ContextType.TABLE_CELL )
1368 {
1369 p++;
1370 throw new EndOfSubContextException( p );
1371 }
1372 }
1373
1374 int formatType;
1375
1376 if ( c == '{' )
1377 {
1378 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '{' ) )
1379 {
1380 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '{' ) )
1381 { // inline or block <nowiki>
1382 appendText( tb.toString( ) );
1383 tb.delete( 0, tb.length( ) ); // flush text buffer
1384
1385 int startNowiki = p + 3;
1386 int endNowiki = findEndOfNowiki( startNowiki );
1387 p = endNowiki + 3;
1388
1389 if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
1390 { // block <pre>
1391
1392 if ( wikiChars [startNowiki] == '\n' )
1393 {
1394 startNowiki++; // skip the very first '\n'
1395 }
1396
1397 if ( wikiChars [endNowiki - 1] == '\n' )
1398 {
1399 endNowiki--; // omit the very last '\n'
1400 }
1401
1402 if ( context == ContextType.PARAGRAPH )
1403 {
1404 sb.append( "</p>" ); // break the paragraph because XHTML does not allow <pre> children of <p>
1405 }
1406
1407 // sb.append( "<pre>" );
1408 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1409 // sb.append( "</pre>\n" );
1410
1411 if ( context == ContextType.PARAGRAPH )
1412 {
1413 sb.append( "<p>" ); // continue the paragraph
1414 // if (context==ContextType.NOWIKI_BLOCK) return p; // in this context return immediately after nowiki
1415 }
1416 }
1417 else
1418 { // inline <nowiki>
1419 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1420 }
1421
1422 continue;
1423 }
1424 else
1425 if ( ( p + 2 ) < end )
1426 { // {{image}}
1427
1428 int endImg = wikiText.indexOf( "}}", p + 2 );
1429
1430 if ( ( endImg >= 0 ) && ( endImg < end ) )
1431 {
1432 appendText( tb.toString( ) );
1433 tb.delete( 0, tb.length( ) ); // flush text buffer
1434 appendImage( wikiText.substring( p + 2, endImg ) );
1435 p = endImg + 2;
1436
1437 continue;
1438 }
1439 }
1440 }
1441 }
1442 else
1443 if ( c == '[' )
1444 {
1445 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '[' ) )
1446 { // [[link]]
1447
1448 int endLink = wikiText.indexOf( "]]", p + 2 );
1449
1450 if ( ( endLink >= 0 ) && ( endLink < end ) )
1451 {
1452 appendText( tb.toString( ) );
1453 tb.delete( 0, tb.length( ) ); // flush text buffer
1454 appendLink( wikiText.substring( p + 2, endLink ) );
1455 p = endLink + 2;
1456
1457 continue;
1458 }
1459 }
1460 }
1461 else
1462 if ( c == '\\' )
1463 {
1464 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\\' ) )
1465 { // \\ = <br/>
1466 appendText( tb.toString( ) );
1467 tb.delete( 0, tb.length( ) ); // flush text buffer
1468 sb.append( "<br/>" );
1469 p += 2;
1470
1471 continue;
1472 }
1473 }
1474 else
1475 if ( c == '<' )
1476 {
1477 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '<' ) )
1478 {
1479 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '<' ) )
1480 { // <<<macro>>>
1481
1482 int endMacro = wikiText.indexOf( ">>>", p + 3 );
1483
1484 if ( ( endMacro >= 0 ) && ( endMacro < end ) )
1485 {
1486 appendText( tb.toString( ) );
1487 tb.delete( 0, tb.length( ) ); // flush text buffer
1488 appendMacro( wikiText.substring( p + 3, endMacro ) );
1489 p = endMacro + 3;
1490
1491 continue;
1492 }
1493 }
1494 }
1495 }
1496 else
1497 if ( ( formatType = FORMAT_CHARS.indexOf( c ) ) >= 0 )
1498 {
1499 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == c ) )
1500 {
1501 appendText( tb.toString( ) );
1502 tb.delete( 0, tb.length( ) ); // flush text buffer
1503
1504 if ( c == '/' )
1505 { // special case for "//" - check if it is part of URL (scheme://etc)
1506
1507 int [ ] uriOffs = checkURI( p, start, end );
1508
1509 if ( uriOffs != null )
1510 {
1511 int pb = uriOffs [0];
1512 int pe = uriOffs [1];
1513
1514 if ( ( pb > start ) && ( wikiChars [pb - 1] == '~' ) )
1515 {
1516 sb.delete( sb.length( ) - ( p - pb + 1 ), sb.length( ) ); // roll back URL + tilde
1517 sb.append( escapeHTML( wikiText.substring( pb, pe ) ) );
1518 }
1519 else
1520 {
1521 sb.delete( sb.length( ) - ( p - pb ), sb.length( ) ); // roll back URL
1522 appendLink( wikiText.substring( pb, pe ) );
1523 }
1524
1525 p = pe;
1526
1527 continue;
1528 }
1529 }
1530
1531 sb.append( FORMAT_TAG_OPEN [formatType] );
1532
1533 try
1534 {
1535 p = parseItemThrow( p + 2, FORMAT_DELIM [formatType], context );
1536 }
1537 finally
1538 {
1539 sb.append( FORMAT_TAG_CLOSE [formatType] );
1540 }
1541
1542 continue;
1543 }
1544 }
1545 else
1546 if ( c == '~' )
1547 { // escape
1548 // most start line escapes are dealt with in parseBlock()
1549
1550 if ( atLineStart )
1551 {
1552 // same as block-level escaping: '*' '-' '#' '>' ':' '|' '='
1553 if ( ( p + 1 ) < end )
1554 {
1555 char nc = wikiChars [p + 1];
1556
1557 if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
1558 { // can't be inline markup
1559 tb.append( nc );
1560 p += 2; // skip '~' and nc
1561
1562 continue nextChar;
1563 }
1564 else
1565 if ( ( nc == '*' ) || ( nc == '#' ) )
1566 { // might be inline markup so need to double check
1567
1568 char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1569
1570 if ( nnc != nc )
1571 {
1572 tb.append( nc );
1573 p += 2; // skip '~' and nc
1574
1575 continue nextChar;
1576 }
1577
1578 // otherwise escaping will be done at line level
1579 }
1580 else
1581 if ( nc == '{' )
1582 { // might be inline {{{ markup so need to double check
1583
1584 char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1585
1586 if ( nnc == '|' )
1587 { // mediawiki-table?
1588 tb.append( nc );
1589 tb.append( nnc );
1590 p += 3; // skip '~', nc and nnc
1591
1592 continue nextChar;
1593 }
1594
1595 // otherwise escaping will be done as usual at line level
1596 }
1597 }
1598 }
1599
1600 for ( String e : ESCAPED_INLINE_SEQUENCES )
1601 {
1602 if ( wikiText.startsWith( e, p + 1 ) )
1603 {
1604 tb.append( e );
1605 p += ( 1 + e.length( ) );
1606
1607 continue nextChar;
1608 }
1609 }
1610 }
1611 else
1612 if ( c == '-' )
1613 { // ' -- ' => –
1614
1615 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 1] == '-' ) && ( wikiChars [p + 2] == ' ' ) && ( p > start )
1616 && ( wikiChars [p - 1] == ' ' ) )
1617 {
1618 // appendText(tb.toString()); tb.delete(0, tb.length()); // flush text buffer
1619 // sb.append("– ");
1620 tb.append( "– " ); // – = "\u2013 "
1621 p += 3;
1622
1623 continue;
1624 }
1625 }
1626
1627 tb.append( c );
1628 p++;
1629 }
1630 }
1631 finally
1632 {
1633 appendText( tb.toString( ) );
1634 tb.delete( 0, tb.length( ) ); // flush text buffer
1635 }
1636 }
1637
1638 protected void appendMacro( String text )
1639 {
1640 if ( "TOC".equals( text ) )
1641 {
1642 sb.append( "!!!TOC!!!" ); // put TOC placeholder for replacing it later with real TOC
1643 }
1644 else
1645 {
1646 sb.append( "<<<Macro:" );
1647 sb.append( escapeHTML( unescapeHTML( text ) ) );
1648 sb.append( ">>>" );
1649 }
1650 }
1651
1652 protected void appendLink( String text )
1653 {
1654 String [ ] link = split( text, '|' );
1655 URI uri = null;
1656
1657 try
1658 { // validate URI
1659 uri = new URI( link [0].trim( ) );
1660 }
1661 catch( URISyntaxException e )
1662 {
1663 }
1664
1665 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1666 {
1667 sb.append( "<a href=\"" + escapeHTML( uri.toString( ) ) + "\" rel=\"nofollow\">" );
1668 sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1669 sb.append( "</a>" );
1670 }
1671 else
1672 {
1673 sb.append( "<a href=\"#\" title=\"Internal link\">" );
1674 sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1675 sb.append( "</a>" );
1676 }
1677 }
1678
1679 protected void appendImage( String text )
1680 {
1681 String [ ] link = split( text, '|' );
1682 URI uri = null;
1683
1684 try
1685 { // validate URI
1686 uri = new URI( link [0].trim( ) );
1687 }
1688 catch( URISyntaxException e )
1689 {
1690 }
1691
1692 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1693 {
1694 String alt = escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) );
1695 sb.append( "<img src=\"" + escapeHTML( uri.toString( ) ) + "\" alt=\"" + alt + "\" title=\"" + alt + "\" />" );
1696 }
1697 else
1698 {
1699 sb.append( "<<<Internal image(?): " );
1700 sb.append( escapeHTML( unescapeHTML( text ) ) );
1701 sb.append( ">>>" );
1702 }
1703 }
1704
1705 protected void appendText( String text )
1706 {
1707 sb.append( escapeHTML( unescapeHTML( text ) ) );
1708 }
1709
1710 protected String generateTOCAnchorId( int hLevel, String text )
1711 {
1712 int i = 0;
1713 String id = ( ( HEADING_ID_PREFIX != null ) ? HEADING_ID_PREFIX : ( "H" + hLevel + "_" ) )
1714 + translit( text.replaceAll( "<.+?>", "" ) ).trim( ).replaceAll( "\\s+", "_" ).replaceAll( "[^a-zA-Z0-9_-]", "" );
1715
1716 while ( tocAnchorIds.contains( id ) )
1717 { // avoid duplicates
1718 i++;
1719 id = text + "_" + i;
1720 }
1721
1722 tocAnchorIds.add( id );
1723
1724 return id;
1725 }
1726
1727 protected void appendTOCItem( int level, String anchorId, String text )
1728 {
1729 if ( level > tocLevel )
1730 {
1731 while ( level > tocLevel )
1732 {
1733 toc.append( "<ul><li>" );
1734 tocLevel++;
1735 }
1736 }
1737 else
1738 {
1739 while ( level < tocLevel )
1740 {
1741 toc.append( "</li></ul>" );
1742 tocLevel--;
1743 }
1744
1745 toc.append( "</li>\n<li>" );
1746 }
1747
1748 toc.append( "<a href='#page_url#" + anchorId + "'>" + text + "</a>" );
1749 }
1750
1751 protected void completeTOC( )
1752 {
1753 while ( 0 < tocLevel )
1754 {
1755 toc.append( "</li></ul>" );
1756 tocLevel--;
1757 }
1758
1759 int idx;
1760 String tocDiv = "<div class=\"" + _strTocClass + "\">" + toc.toString( ) + "</div>";
1761
1762 while ( ( idx = sb.indexOf( "!!!TOC!!!" ) ) >= 0 )
1763 {
1764 sb.replace( idx, idx + 9, tocDiv );
1765 }
1766 }
1767
1768 protected void appendNowiki( String text )
1769 {
1770 sb.append( escapeHTML( replaceString( replaceString( text, "~{{{", "{{{" ), "~}}}", "}}}" ) ) );
1771 }
1772
1773 private static class EndOfContextException extends Exception
1774 {
1775 private static final long serialVersionUID = 1L;
1776 int position;
1777
1778 public EndOfContextException( int position )
1779 {
1780 super( );
1781 this.position = position;
1782 }
1783 }
1784
1785 private static class EndOfSubContextException extends EndOfContextException
1786 {
1787 private static final long serialVersionUID = 1L;
1788
1789 public EndOfSubContextException( int position )
1790 {
1791 super( position );
1792 }
1793 }
1794
1795 private static enum ContextType
1796 {
1797 PARAGRAPH, LIST_ITEM, TABLE_CELL, HEADER, NOWIKI_BLOCK;
1798 }
1799 }