1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package ys.wikiparser;
24
25 import static ys.wikiparser.Utils.*;
26 import com.vladsch.flexmark.parser.Parser;
27 import com.vladsch.flexmark.html.HtmlRenderer;
28 import com.vladsch.flexmark.util.data.MutableDataSet;
29 import com.vladsch.flexmark.util.misc.Extension;
30 import java.net.*;
31
32 import java.util.HashSet;
33 import java.util.List;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36 import java.util.ArrayList;
37
38
39
40
41
42
43
44
45
46 public class WikiParser
47 {
48 private static final int MAX_LIST_LEVELS = 100;
49 private static final String [ ] ESCAPED_INLINE_SEQUENCES = {
50 "{{{", "{{", "}}}", "**", "//", "__", "##", "\\\\", "[[", "<<<", "~", "--", "|"
51 };
52 private static final String LIST_CHARS = "*-#>:!";
53 private static final String [ ] LIST_OPEN = {
54 "<ul><li>", "<ul><li>", "<ol><li>", "<blockquote>", "<div class='wiki_indent'>", "<div class='wiki_center'>"
55 };
56 private static final String [ ] LIST_CLOSE = {
57 "</li></ul>\n", "</li></ul>\n", "</li></ol>\n", "</blockquote>\n", "</div>\n", "</div>\n"
58 };
59 private static final String FORMAT_CHARS = "*/_#";
60 private static final String [ ] FORMAT_DELIM = {
61 "**", "//", "__", "##"
62 };
63 private static final String [ ] FORMAT_TAG_OPEN = {
64 "<strong>", "<em>", "<span class=\"wiki_underline\">", "<tt>"
65 };
66 private static final String [ ] FORMAT_TAG_CLOSE = {
67 "</strong>", "</em>", "</span>", "</tt>"
68 };
69 public static String CUSTOM_INPUTS_TO_REEMPLACE = "//CustomInputToReEmplace//";
70 private int wikiLength;
71 private char [ ] wikiChars;
72 protected StringBuilder sb = new StringBuilder( );
73 protected StringBuilder toc = new StringBuilder( );
74 protected int tocLevel = 0;
75 private HashSet<String> tocAnchorIds = new HashSet<String>( );
76 private String wikiText;
77 private int pos = 0;
78 private int listLevel = -1;
79 private char [ ] listLevels = new char [ MAX_LIST_LEVELS + 1];
80 private boolean blockquoteBR = false;
81 private boolean inTable = false;
82 private int mediawikiTableLevel = 0;
83 protected int HEADING_LEVEL_SHIFT = 1;
84 protected String HEADING_ID_PREFIX = null;
85 private String _strTableClass = "";
86 private String _strParentTableClass = "";
87 private String _strTocClass = "toc";
88 public String markdown = "";
89 protected WikiParser( )
90 {
91
92
93 }
94
95 protected WikiParser( String wikiText )
96 {
97 parse( wikiText );
98 }
99
100 protected void setTableClass( String strClass )
101 {
102 _strTableClass = strClass;
103 }
104
105
106 protected void setParentTableClass( String strParentClass )
107 {
108 _strParentTableClass = strParentClass;
109 }
110
111
112 protected void setTocClass( String strClass )
113 {
114 _strTocClass = strClass;
115 }
116
117 public static String renderXHTML( String wikiText )
118 {
119 return new WikiParser( wikiText ).toString( );
120 }
121
122 protected void parse( String wikiText )
123 {
124 wikiText = preprocessWikiText( wikiText );
125
126 this.wikiText = wikiText;
127 wikiLength = this.wikiText.length( );
128 wikiChars = new char [ wikiLength];
129 this.wikiText.getChars( 0, wikiLength, wikiChars, 0 );
130
131 while ( parseBlock( ) )
132 ;
133
134 closeListsAndTables( );
135
136 while ( mediawikiTableLevel-- > 0 )
137 sb.append( "</td></tr></table></div>\n" );
138
139 completeTOC( );
140 }
141 protected void parseMD( String wikiText )
142 {
143 MutableDataSet options = new MutableDataSet( );
144
145
146 List<Extension> extensions =
147 java.util.Arrays.asList(
148 com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension.create(),
149
150
151
152
153
154
155
156
157
158
159 com.vladsch.flexmark.ext.media.tags.MediaTagsExtension.create( ),
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196 com.vladsch.flexmark.ext.tables.TablesExtension.create( ),
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228 com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension.create( ),
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension.create( ),
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332 com.vladsch.flexmark.ext.typographic.TypographicExtension.create( ),
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357 com.vladsch.flexmark.ext.youtube.embedded.YouTubeLinkExtension.create( ),
358
359
360
361
362
363
364
365
366
367
368
369
370
371 com.vladsch.flexmark.ext.macros.MacrosExtension.create( ),
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392 com.vladsch.flexmark.ext.definition.DefinitionExtension.create( ),
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415 com.vladsch.flexmark.ext.autolink.AutolinkExtension.create( )
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578 );
579 options.set(Parser.EXTENSIONS, extensions);
580 options.set(HtmlRenderer.INDENT_SIZE, 2);
581 options.set(HtmlRenderer.PERCENT_ENCODE_URLS, true);
582 options.set(HtmlRenderer.GENERATE_HEADER_ID, true);
583 options.set(HtmlRenderer.RENDER_HEADER_ID, true);
584 options.set(HtmlRenderer.ESCAPE_HTML, true);
585 options.set(HtmlRenderer.ESCAPE_INLINE_HTML, false);
586
587 String _strTableClass = "table";
588
589
590 if (_strTableClass != null && !_strTableClass.isEmpty()) {
591 options.set(com.vladsch.flexmark.ext.tables.TablesExtension.CLASS_NAME, _strTableClass);
592 }
593
594 Parser parser = Parser.builder(options).build();
595 HtmlRenderer renderer = HtmlRenderer.builder(options).build();
596 markdown = wikiText;
597 List<String> customInputsHtml = extractCustomInputs();
598
599 com.vladsch.flexmark.util.ast.Node document = parser.parse(markdown);
600 String html = renderer.render(document);
601 String htmlWithCustomInputs = remplaceCustomInputs( html, customInputsHtml );
602 sb = new StringBuilder( htmlWithCustomInputs);
603
604 }
605
606
607
608
609
610 public String remplaceCustomInputs( String html, List<String> customInputsHtml )
611 {
612 for ( int i = 0; i < customInputsHtml.size( ); i++ )
613 {
614 html = html.replace( CUSTOM_INPUTS_TO_REEMPLACE +"_"+ i, customInputsHtml.get( i ) );
615 }
616 return html;
617 }
618
619
620
621
622
623 public List<String> extractCustomInputs ()
624 {
625
626 List <String> customInputsHtml = new ArrayList <>( );
627 String regex = "\\$\\$(.*?)\\$\\$";
628 Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
629 Matcher matcher = pattern.matcher(markdown);
630 int iteration = 0;
631 while (matcher.find()) {
632
633 String customInput = matcher.group(1).trim();
634 String customInputName = "";
635 for ( int i = 0; i < customInput.length( ); i++ )
636 {
637
638 char c = customInput.charAt( i );
639 String regexText = "[a-zA-Z0-9]";
640 if (Character.toString(c).matches(regexText))
641 {
642 customInputName += c;
643 }
644 else {
645 break;
646 }
647 }
648
649 if(customInputName != null && !customInputName.isEmpty())
650 {
651 String html = MarkdownCustomInputs.renderCustomInHtml( customInput, customInputName );
652
653 if(html != null && !html.isEmpty())
654 {
655 customInputsHtml.add(html);
656
657 int startIntpot = markdown.indexOf("$$");
658 int endPostion = markdown.indexOf("$$", startIntpot + 2);
659 markdown = markdown.substring(0, startIntpot) + markdown.substring(endPostion + 2);
660 markdown = markdown.substring(0, startIntpot) + CUSTOM_INPUTS_TO_REEMPLACE +"_"+ iteration + markdown.substring(startIntpot);
661 iteration++;
662 }
663 }
664 }
665 return customInputsHtml;
666 }
667
668 public String toString( )
669 {
670 return sb.toString( );
671 }
672
673 private void closeListsAndTables( )
674 {
675
676 while ( listLevel >= 0 )
677 {
678 sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
679 }
680
681 if ( inTable )
682 {
683 sb.append( "</table></div>\n" );
684 inTable = false;
685 }
686 }
687
688 private boolean parseBlock( )
689 {
690 for ( ; ( pos < wikiLength ) && ( wikiChars [pos] <= ' ' ) && ( wikiChars [pos] != '\n' ); pos++ )
691 ;
692
693 if ( pos >= wikiLength )
694 {
695 return false;
696 }
697
698 char c = wikiChars [pos];
699
700 if ( c == '\n' )
701 {
702 closeListsAndTables( );
703 pos++;
704
705 return true;
706 }
707
708 if ( c == '|' )
709 {
710
711 if ( mediawikiTableLevel > 0 )
712 {
713 int pp = pos + 1;
714
715 if ( pp < wikiLength )
716 {
717 boolean newRow = false;
718 boolean endTable = false;
719
720 if ( wikiChars [pp] == '-' )
721 {
722 newRow = true;
723 pp++;
724 }
725 else
726 if ( wikiChars [pp] == '}' )
727 {
728 endTable = true;
729 pp++;
730 }
731
732 for ( ; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
733 ;
734
735 if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
736 {
737 closeListsAndTables( );
738 sb.append( newRow ? "</td></tr>\n<tr><td>" : ( endTable ? "</td></tr></table></div>\n" : "</td>\n<td>" ) );
739
740 if ( endTable )
741 {
742 mediawikiTableLevel--;
743 }
744
745 pos = pp + 1;
746
747 return pp < wikiLength;
748 }
749 }
750 }
751
752 if ( !inTable )
753 {
754 closeListsAndTables( );
755 sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\" >" );
756 inTable = true;
757 }
758
759 pos = parseTableRow( pos + 1 );
760
761 return true;
762 }
763 else
764 {
765 if ( inTable )
766 {
767 sb.append( "</table></div>\n" );
768 inTable = false;
769 }
770 }
771
772 if ( ( listLevel >= 0 ) || ( LIST_CHARS.indexOf( c ) >= 0 ) )
773 {
774
775 int lc;
776
777
778 for ( lc = 0; ( lc <= listLevel ) && ( ( pos + lc ) < wikiLength ) && ( wikiChars [pos + lc] == listLevels [lc] ); lc++ )
779 ;
780
781 if ( lc <= listLevel )
782 {
783
784 do
785 {
786 sb.append( LIST_CLOSE [LIST_CHARS.indexOf( listLevels [listLevel--] )] );
787 }
788 while ( lc <= listLevel );
789
790
791 blockquoteBR = true;
792
793 return true;
794 }
795 else
796 {
797 if ( ( pos + lc ) >= wikiLength )
798 {
799 return false;
800 }
801
802 char cc = wikiChars [pos + lc];
803 int listType = LIST_CHARS.indexOf( cc );
804
805 if ( ( listType >= 0 ) && ( ( pos + lc + 1 ) < wikiLength ) && ( wikiChars [pos + lc + 1] != cc ) && ( listLevel < MAX_LIST_LEVELS ) )
806 {
807 sb.append( LIST_OPEN [listType] );
808 listLevels [++listLevel] = cc;
809 blockquoteBR = true;
810 pos = parseListItem( pos + lc + 1 );
811
812 return true;
813 }
814 else
815 if ( listLevel >= 0 )
816 {
817
818 if ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) )
819 {
820 sb.append( '\n' );
821 }
822 else
823 if ( listLevels [listLevel] == '!' )
824 {
825 sb.append( "</div>\n<div class='wiki_center'>" );
826 }
827 else
828 {
829 sb.append( "</li>\n<li>" );
830 }
831
832 pos = parseListItem( pos + lc );
833
834 return true;
835 }
836 }
837 }
838
839 if ( c == '=' )
840 {
841
842 int hc;
843
844
845 for ( hc = 1; ( hc < 6 ) && ( ( pos + hc ) < wikiLength ) && ( wikiChars [pos + hc] == '=' ); hc++ )
846 ;
847
848 if ( ( pos + hc ) >= wikiLength )
849 {
850 return false;
851 }
852
853 int p;
854
855 for ( p = pos + hc; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
856 ;
857
858 String tagName = "h" + ( hc + HEADING_LEVEL_SHIFT );
859 sb.append( "<" + tagName + " id=''>" );
860
861 int hStart = sb.length( );
862 pos = parseItem( p, wikiText.substring( pos, pos + hc ), ContextType.HEADER );
863
864 String hText = sb.substring( hStart, sb.length( ) );
865 sb.append( "</" + tagName + ">\n" );
866
867 String anchorId = generateTOCAnchorId( hc, hText );
868 sb.insert( hStart - 2, anchorId );
869 appendTOCItem( hc, anchorId, hText );
870
871 return true;
872 }
873 else
874 if ( c == '{' )
875 {
876
877 if ( ( ( pos + 2 ) < wikiLength ) && ( wikiChars [pos + 1] == '{' ) && ( wikiChars [pos + 2] == '{' ) )
878 {
879 int startNowiki = pos + 3;
880 int endNowiki = findEndOfNowiki( startNowiki );
881 int endPos = endNowiki + 3;
882
883 if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
884 {
885
886 if ( wikiChars [startNowiki] == '\n' )
887 {
888 startNowiki++;
889 }
890
891 if ( wikiChars [endNowiki - 1] == '\n' )
892 {
893 endNowiki--;
894 }
895
896
897 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
898
899 pos = endPos;
900
901 return true;
902 }
903
904
905 }
906 else
907 if ( ( ( pos + 1 ) < wikiLength ) && ( wikiChars [pos + 1] == '|' ) )
908 {
909
910 int pp;
911
912 for ( pp = pos + 2; ( pp < wikiLength ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
913 ;
914
915 if ( ( pp == wikiLength ) || ( wikiChars [pp] == '\n' ) )
916 {
917 sb.append( "<div class=\"").append( _strParentTableClass ).append( "\" >" ).append("<table class=\"" ).append( _strTableClass ).append( "\"><tr><td>" );
918 mediawikiTableLevel++;
919 pos = pp + 1;
920
921 return pp < wikiLength;
922 }
923 }
924 }
925 else
926 if ( ( c == '-' ) && wikiText.startsWith( "----", pos ) )
927 {
928 int p;
929
930 for ( p = pos + 4; ( p < wikiLength ) && ( ( wikiChars [p] == ' ' ) || ( wikiChars [p] == '\t' ) ); p++ )
931 ;
932
933 if ( ( p == wikiLength ) || ( wikiChars [p] == '\n' ) )
934 {
935 sb.append( "\n<hr/>\n" );
936 pos = p;
937
938 return true;
939 }
940 }
941 else
942 if ( c == '~' )
943 {
944
945 if ( ( pos + 1 ) < wikiLength )
946 {
947 char nc = wikiChars [pos + 1];
948
949 if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
950 {
951 pos++;
952 c = nc;
953 }
954 else
955 if ( ( nc == '*' ) || ( nc == '#' ) )
956 {
957
958 char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
959
960 if ( nnc != nc )
961 {
962 pos++;
963 c = nc;
964 }
965
966
967 }
968 else
969 if ( nc == '{' )
970 {
971
972 char nnc = ( ( pos + 2 ) < wikiLength ) ? wikiChars [pos + 2] : 0;
973
974 if ( nnc == '|' )
975 {
976 pos++;
977 c = nc;
978 }
979
980
981 }
982 }
983 }
984
985 sb.append( "<p>" );
986 pos = parseItem( pos, null, ContextType.PARAGRAPH );
987 sb.append( "</p>\n" );
988
989 return true;
990 }
991
992
993
994
995
996
997
998
999 private int findEndOfNowiki( int startBlock )
1000 {
1001
1002 int endBlock = startBlock - 3;
1003
1004 do
1005 {
1006 endBlock = wikiText.indexOf( "}}}", endBlock + 3 );
1007
1008 if ( endBlock < 0 )
1009 {
1010 return wikiLength;
1011 }
1012
1013 while ( ( ( endBlock + 3 ) < wikiLength ) && ( wikiChars [endBlock + 3] == '}' ) )
1014 endBlock++;
1015 }
1016 while ( wikiChars [endBlock - 1] == '~' );
1017
1018 return endBlock;
1019 }
1020
1021
1022
1023
1024
1025
1026
1027
1028 @SuppressWarnings( "unused" )
1029 private int findEndOfNowikiGreedy( int startBlock )
1030 {
1031
1032 int nextBlock = startBlock - 3;
1033
1034 do
1035 {
1036 do
1037 {
1038 nextBlock = wikiText.indexOf( "{{{", nextBlock + 3 );
1039 }
1040 while ( ( nextBlock > 0 ) && ( wikiChars [nextBlock - 1] == '~' ) );
1041
1042 if ( nextBlock < 0 )
1043 {
1044 nextBlock = wikiLength;
1045 }
1046
1047 int endBlock = wikiText.lastIndexOf( "}}}", nextBlock );
1048
1049 if ( ( endBlock >= startBlock ) && ( wikiChars [endBlock - 1] != '~' ) )
1050 {
1051 return endBlock;
1052 }
1053 }
1054 while ( nextBlock < wikiLength );
1055
1056 return wikiLength;
1057 }
1058
1059
1060
1061
1062
1063
1064 private int parseTableRow( int start )
1065 {
1066 if ( start >= wikiLength )
1067 {
1068 return wikiLength;
1069 }
1070
1071 sb.append( "<tr>" );
1072
1073 boolean endOfRow = false;
1074
1075 do
1076 {
1077 int colspan = 0;
1078
1079 while ( ( ( start + colspan ) < wikiLength ) && ( wikiChars [start + colspan] == '|' ) )
1080 colspan++;
1081
1082 start += colspan;
1083 colspan++;
1084
1085 boolean th = ( start < wikiLength ) && ( wikiChars [start] == '=' );
1086 start += ( th ? 1 : 0 );
1087
1088 while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1089 start++;
1090
1091 if ( ( start >= wikiLength ) || ( wikiChars [start] == '\n' ) )
1092 {
1093 start++;
1094
1095 break;
1096 }
1097
1098 sb.append( th ? "<th" : "<td" );
1099
1100 if ( colspan > 1 )
1101 {
1102 sb.append( " colspan=\"" + colspan + "\"" );
1103 }
1104
1105 sb.append( '>' );
1106
1107 try
1108 {
1109 parseItemThrow( start, null, ContextType.TABLE_CELL );
1110 }
1111 catch( EndOfSubContextException e )
1112 {
1113 start = e.position;
1114
1115 if ( start >= wikiLength )
1116 {
1117 endOfRow = true;
1118 }
1119 else
1120 if ( wikiChars [start] == '\n' )
1121 {
1122 start++;
1123 endOfRow = true;
1124 }
1125 }
1126 catch( EndOfContextException e )
1127 {
1128 start = e.position;
1129 endOfRow = true;
1130 }
1131
1132 sb.append( th ? "</th>" : "</td>" );
1133 }
1134 while ( !endOfRow );
1135
1136 sb.append( "</tr>\n" );
1137
1138 return start;
1139 }
1140
1141
1142
1143
1144
1145
1146 private int parseListItem( int start )
1147 {
1148 while ( ( start < wikiLength ) && ( wikiChars [start] <= ' ' ) && ( wikiChars [start] != '\n' ) )
1149 start++;
1150
1151 int end = parseItem( start, null, ContextType.LIST_ITEM );
1152
1153 if ( ( ( listLevels [listLevel] == '>' ) || ( listLevels [listLevel] == ':' ) ) && ( wikiText.substring( start, end ).trim( ).length( ) == 0 ) )
1154 {
1155
1156 if ( !blockquoteBR )
1157 {
1158 sb.append( "<br/><br/>" );
1159 blockquoteBR = true;
1160 }
1161 }
1162 else
1163 {
1164 blockquoteBR = false;
1165 }
1166
1167 return end;
1168 }
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180 private int [ ] checkURI( int p, int start, int end )
1181 {
1182 if ( ( p > start ) && ( wikiChars [p - 1] == ':' ) )
1183 {
1184
1185 int pb = p - 1;
1186
1187 while ( ( pb > start ) && isLatinLetterOrDigit( wikiChars [pb - 1] ) )
1188 pb--;
1189
1190 int pe = p + 2;
1191
1192 while ( ( pe < end ) && isUrlChar( wikiChars [pe] ) )
1193 pe++;
1194
1195 URI uri = null;
1196
1197 do
1198 {
1199 while ( ( pe > ( p + 2 ) ) && ( ",.;:?!%)".indexOf( wikiChars [pe - 1] ) >= 0 ) )
1200 pe--;
1201
1202 try
1203 {
1204 uri = new URI( wikiText.substring( pb, pe ) );
1205 }
1206 catch( URISyntaxException e )
1207 {
1208 pe--;
1209 }
1210 }
1211 while ( ( uri == null ) && ( pe > ( p + 2 ) ) );
1212
1213 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1214 {
1215 int [ ] offs = {
1216 pb, pe
1217 };
1218
1219 return offs;
1220 }
1221 }
1222
1223 return null;
1224 }
1225
1226 private int parseItem( int start, String delimiter, ContextType context )
1227 {
1228 try
1229 {
1230 return parseItemThrow( start, delimiter, context );
1231 }
1232 catch( EndOfContextException e )
1233 {
1234 return e.position;
1235 }
1236 }
1237
1238 private int parseItemThrow( int start, String delimiter, ContextType context ) throws EndOfContextException
1239 {
1240 StringBuilder tb = new StringBuilder( );
1241
1242 boolean specialCaseDelimiterHandling = "//".equals( delimiter );
1243 int p = start;
1244 int end = wikiLength;
1245
1246 try
1247 {
1248 nextChar: while ( true )
1249 {
1250 if ( p >= end )
1251 {
1252 throw new EndOfContextException( end );
1253 }
1254
1255 if ( ( delimiter != null ) && wikiText.startsWith( delimiter, p ) )
1256 {
1257 if ( !specialCaseDelimiterHandling || ( checkURI( p, start, end ) == null ) )
1258 {
1259 p += delimiter.length( );
1260
1261 return p;
1262 }
1263 }
1264
1265 char c = wikiChars [p];
1266 boolean atLineStart = false;
1267
1268
1269 if ( c == '\n' )
1270 {
1271 if ( ( context == ContextType.HEADER ) || ( context == ContextType.TABLE_CELL ) )
1272 {
1273 p++;
1274 throw new EndOfContextException( p );
1275 }
1276
1277 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\n' ) )
1278 {
1279 p++;
1280 throw new EndOfContextException( p );
1281 }
1282
1283 for ( p++; ( p < end ) && ( wikiChars [p] <= ' ' ) && ( wikiChars [p] != '\n' ); p++ )
1284 ;
1285
1286 if ( p >= end )
1287 {
1288 throw new EndOfContextException( p );
1289 }
1290
1291 c = wikiChars [p];
1292 atLineStart = true;
1293
1294 if ( ( c == '-' ) && wikiText.startsWith( "----", p ) )
1295 {
1296
1297 int pp;
1298
1299 for ( pp = p + 4; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1300 ;
1301
1302 if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1303 {
1304 throw new EndOfContextException( p );
1305 }
1306 }
1307
1308 if ( LIST_CHARS.indexOf( c ) >= 0 )
1309 {
1310
1311 if ( FORMAT_CHARS.indexOf( c ) < 0 )
1312 {
1313 throw new EndOfContextException( p );
1314 }
1315
1316
1317 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] != c ) )
1318 {
1319 throw new EndOfContextException( p );
1320 }
1321
1322 if (
1323 ( listLevel >= 0 ) && ( c == listLevels [0] ) )
1324 {
1325
1326 throw new EndOfContextException( p );
1327 }
1328
1329
1330 }
1331 else
1332 if ( c == '=' )
1333 {
1334 throw new EndOfContextException( p );
1335 }
1336 else
1337 if ( c == '|' )
1338 {
1339 throw new EndOfContextException( p );
1340 }
1341 else
1342 if ( c == '{' )
1343 {
1344
1345 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '|' ) )
1346 {
1347 int pp;
1348
1349 for ( pp = p + 2; ( pp < end ) && ( ( wikiChars [pp] == ' ' ) || ( wikiChars [pp] == '\t' ) ); pp++ )
1350 ;
1351
1352 if ( ( pp == end ) || ( wikiChars [pp] == '\n' ) )
1353 {
1354 throw new EndOfContextException( p );
1355 }
1356 }
1357 }
1358
1359
1360 tb.append( '\n' );
1361
1362
1363 }
1364 else
1365 if ( c == '|' )
1366 {
1367 if ( context == ContextType.TABLE_CELL )
1368 {
1369 p++;
1370 throw new EndOfSubContextException( p );
1371 }
1372 }
1373
1374 int formatType;
1375
1376 if ( c == '{' )
1377 {
1378 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '{' ) )
1379 {
1380 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '{' ) )
1381 {
1382 appendText( tb.toString( ) );
1383 tb.delete( 0, tb.length( ) );
1384
1385 int startNowiki = p + 3;
1386 int endNowiki = findEndOfNowiki( startNowiki );
1387 p = endNowiki + 3;
1388
1389 if ( wikiText.lastIndexOf( '\n', endNowiki ) >= startNowiki )
1390 {
1391
1392 if ( wikiChars [startNowiki] == '\n' )
1393 {
1394 startNowiki++;
1395 }
1396
1397 if ( wikiChars [endNowiki - 1] == '\n' )
1398 {
1399 endNowiki--;
1400 }
1401
1402 if ( context == ContextType.PARAGRAPH )
1403 {
1404 sb.append( "</p>" );
1405 }
1406
1407
1408 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1409
1410
1411 if ( context == ContextType.PARAGRAPH )
1412 {
1413 sb.append( "<p>" );
1414
1415 }
1416 }
1417 else
1418 {
1419 appendNowiki( wikiText.substring( startNowiki, endNowiki ) );
1420 }
1421
1422 continue;
1423 }
1424 else
1425 if ( ( p + 2 ) < end )
1426 {
1427
1428 int endImg = wikiText.indexOf( "}}", p + 2 );
1429
1430 if ( ( endImg >= 0 ) && ( endImg < end ) )
1431 {
1432 appendText( tb.toString( ) );
1433 tb.delete( 0, tb.length( ) );
1434 appendImage( wikiText.substring( p + 2, endImg ) );
1435 p = endImg + 2;
1436
1437 continue;
1438 }
1439 }
1440 }
1441 }
1442 else
1443 if ( c == '[' )
1444 {
1445 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '[' ) )
1446 {
1447
1448 int endLink = wikiText.indexOf( "]]", p + 2 );
1449
1450 if ( ( endLink >= 0 ) && ( endLink < end ) )
1451 {
1452 appendText( tb.toString( ) );
1453 tb.delete( 0, tb.length( ) );
1454 appendLink( wikiText.substring( p + 2, endLink ) );
1455 p = endLink + 2;
1456
1457 continue;
1458 }
1459 }
1460 }
1461 else
1462 if ( c == '\\' )
1463 {
1464 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '\\' ) )
1465 {
1466 appendText( tb.toString( ) );
1467 tb.delete( 0, tb.length( ) );
1468 sb.append( "<br/>" );
1469 p += 2;
1470
1471 continue;
1472 }
1473 }
1474 else
1475 if ( c == '<' )
1476 {
1477 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == '<' ) )
1478 {
1479 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 2] == '<' ) )
1480 {
1481
1482 int endMacro = wikiText.indexOf( ">>>", p + 3 );
1483
1484 if ( ( endMacro >= 0 ) && ( endMacro < end ) )
1485 {
1486 appendText( tb.toString( ) );
1487 tb.delete( 0, tb.length( ) );
1488 appendMacro( wikiText.substring( p + 3, endMacro ) );
1489 p = endMacro + 3;
1490
1491 continue;
1492 }
1493 }
1494 }
1495 }
1496 else
1497 if ( ( formatType = FORMAT_CHARS.indexOf( c ) ) >= 0 )
1498 {
1499 if ( ( ( p + 1 ) < end ) && ( wikiChars [p + 1] == c ) )
1500 {
1501 appendText( tb.toString( ) );
1502 tb.delete( 0, tb.length( ) );
1503
1504 if ( c == '/' )
1505 {
1506
1507 int [ ] uriOffs = checkURI( p, start, end );
1508
1509 if ( uriOffs != null )
1510 {
1511 int pb = uriOffs [0];
1512 int pe = uriOffs [1];
1513
1514 if ( ( pb > start ) && ( wikiChars [pb - 1] == '~' ) )
1515 {
1516 sb.delete( sb.length( ) - ( p - pb + 1 ), sb.length( ) );
1517 sb.append( escapeHTML( wikiText.substring( pb, pe ) ) );
1518 }
1519 else
1520 {
1521 sb.delete( sb.length( ) - ( p - pb ), sb.length( ) );
1522 appendLink( wikiText.substring( pb, pe ) );
1523 }
1524
1525 p = pe;
1526
1527 continue;
1528 }
1529 }
1530
1531 sb.append( FORMAT_TAG_OPEN [formatType] );
1532
1533 try
1534 {
1535 p = parseItemThrow( p + 2, FORMAT_DELIM [formatType], context );
1536 }
1537 finally
1538 {
1539 sb.append( FORMAT_TAG_CLOSE [formatType] );
1540 }
1541
1542 continue;
1543 }
1544 }
1545 else
1546 if ( c == '~' )
1547 {
1548
1549
1550 if ( atLineStart )
1551 {
1552
1553 if ( ( p + 1 ) < end )
1554 {
1555 char nc = wikiChars [p + 1];
1556
1557 if ( ( nc == '>' ) || ( nc == ':' ) || ( nc == '-' ) || ( nc == '|' ) || ( nc == '=' ) || ( nc == '!' ) )
1558 {
1559 tb.append( nc );
1560 p += 2;
1561
1562 continue nextChar;
1563 }
1564 else
1565 if ( ( nc == '*' ) || ( nc == '#' ) )
1566 {
1567
1568 char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1569
1570 if ( nnc != nc )
1571 {
1572 tb.append( nc );
1573 p += 2;
1574
1575 continue nextChar;
1576 }
1577
1578
1579 }
1580 else
1581 if ( nc == '{' )
1582 {
1583
1584 char nnc = ( ( p + 2 ) < end ) ? wikiChars [p + 2] : 0;
1585
1586 if ( nnc == '|' )
1587 {
1588 tb.append( nc );
1589 tb.append( nnc );
1590 p += 3;
1591
1592 continue nextChar;
1593 }
1594
1595
1596 }
1597 }
1598 }
1599
1600 for ( String e : ESCAPED_INLINE_SEQUENCES )
1601 {
1602 if ( wikiText.startsWith( e, p + 1 ) )
1603 {
1604 tb.append( e );
1605 p += ( 1 + e.length( ) );
1606
1607 continue nextChar;
1608 }
1609 }
1610 }
1611 else
1612 if ( c == '-' )
1613 {
1614
1615 if ( ( ( p + 2 ) < end ) && ( wikiChars [p + 1] == '-' ) && ( wikiChars [p + 2] == ' ' ) && ( p > start )
1616 && ( wikiChars [p - 1] == ' ' ) )
1617 {
1618
1619
1620 tb.append( "– " );
1621 p += 3;
1622
1623 continue;
1624 }
1625 }
1626
1627 tb.append( c );
1628 p++;
1629 }
1630 }
1631 finally
1632 {
1633 appendText( tb.toString( ) );
1634 tb.delete( 0, tb.length( ) );
1635 }
1636 }
1637
1638 protected void appendMacro( String text )
1639 {
1640 if ( "TOC".equals( text ) )
1641 {
1642 sb.append( "!!!TOC!!!" );
1643 }
1644 else
1645 {
1646 sb.append( "<<<Macro:" );
1647 sb.append( escapeHTML( unescapeHTML( text ) ) );
1648 sb.append( ">>>" );
1649 }
1650 }
1651
1652 protected void appendLink( String text )
1653 {
1654 String [ ] link = split( text, '|' );
1655 URI uri = null;
1656
1657 try
1658 {
1659 uri = new URI( link [0].trim( ) );
1660 }
1661 catch( URISyntaxException e )
1662 {
1663 }
1664
1665 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1666 {
1667 sb.append( "<a href=\"" + escapeHTML( uri.toString( ) ) + "\" rel=\"nofollow\">" );
1668 sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1669 sb.append( "</a>" );
1670 }
1671 else
1672 {
1673 sb.append( "<a href=\"#\" title=\"Internal link\">" );
1674 sb.append( escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) ) );
1675 sb.append( "</a>" );
1676 }
1677 }
1678
1679 protected void appendImage( String text )
1680 {
1681 String [ ] link = split( text, '|' );
1682 URI uri = null;
1683
1684 try
1685 {
1686 uri = new URI( link [0].trim( ) );
1687 }
1688 catch( URISyntaxException e )
1689 {
1690 }
1691
1692 if ( ( uri != null ) && uri.isAbsolute( ) && !uri.isOpaque( ) )
1693 {
1694 String alt = escapeHTML( unescapeHTML( ( ( link.length >= 2 ) && !isEmpty( link [1].trim( ) ) ) ? link [1] : link [0] ) );
1695 sb.append( "<img src=\"" + escapeHTML( uri.toString( ) ) + "\" alt=\"" + alt + "\" title=\"" + alt + "\" />" );
1696 }
1697 else
1698 {
1699 sb.append( "<<<Internal image(?): " );
1700 sb.append( escapeHTML( unescapeHTML( text ) ) );
1701 sb.append( ">>>" );
1702 }
1703 }
1704
1705 protected void appendText( String text )
1706 {
1707 sb.append( escapeHTML( unescapeHTML( text ) ) );
1708 }
1709
1710 protected String generateTOCAnchorId( int hLevel, String text )
1711 {
1712 int i = 0;
1713 String id = ( ( HEADING_ID_PREFIX != null ) ? HEADING_ID_PREFIX : ( "H" + hLevel + "_" ) )
1714 + translit( text.replaceAll( "<.+?>", "" ) ).trim( ).replaceAll( "\\s+", "_" ).replaceAll( "[^a-zA-Z0-9_-]", "" );
1715
1716 while ( tocAnchorIds.contains( id ) )
1717 {
1718 i++;
1719 id = text + "_" + i;
1720 }
1721
1722 tocAnchorIds.add( id );
1723
1724 return id;
1725 }
1726
1727 protected void appendTOCItem( int level, String anchorId, String text )
1728 {
1729 if ( level > tocLevel )
1730 {
1731 while ( level > tocLevel )
1732 {
1733 toc.append( "<ul><li>" );
1734 tocLevel++;
1735 }
1736 }
1737 else
1738 {
1739 while ( level < tocLevel )
1740 {
1741 toc.append( "</li></ul>" );
1742 tocLevel--;
1743 }
1744
1745 toc.append( "</li>\n<li>" );
1746 }
1747
1748 toc.append( "<a href='#page_url#" + anchorId + "'>" + text + "</a>" );
1749 }
1750
1751 protected void completeTOC( )
1752 {
1753 while ( 0 < tocLevel )
1754 {
1755 toc.append( "</li></ul>" );
1756 tocLevel--;
1757 }
1758
1759 int idx;
1760 String tocDiv = "<div class=\"" + _strTocClass + "\">" + toc.toString( ) + "</div>";
1761
1762 while ( ( idx = sb.indexOf( "!!!TOC!!!" ) ) >= 0 )
1763 {
1764 sb.replace( idx, idx + 9, tocDiv );
1765 }
1766 }
1767
1768 protected void appendNowiki( String text )
1769 {
1770 sb.append( escapeHTML( replaceString( replaceString( text, "~{{{", "{{{" ), "~}}}", "}}}" ) ) );
1771 }
1772
1773 private static class EndOfContextException extends Exception
1774 {
1775 private static final long serialVersionUID = 1L;
1776 int position;
1777
1778 public EndOfContextException( int position )
1779 {
1780 super( );
1781 this.position = position;
1782 }
1783 }
1784
1785 private static class EndOfSubContextException extends EndOfContextException
1786 {
1787 private static final long serialVersionUID = 1L;
1788
1789 public EndOfSubContextException( int position )
1790 {
1791 super( position );
1792 }
1793 }
1794
1795 private static enum ContextType
1796 {
1797 PARAGRAPH, LIST_ITEM, TABLE_CELL, HEADER, NOWIKI_BLOCK;
1798 }
1799 }