1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package ys.wikiparser;
29
30 import java.util.*;
31
32 public class Utils
33 {
34 private static HashMap<String, Character> entities = null;
35 private static final String translitTable = "àaábâvãgädåe¸eæzhçzèiéyêkëlìmínîoïpðrñsòtóuôfõhöts÷chøshùschüûyúýeþyuÿyaÀAÁBÂVÃGÄDÅE¨EÆZHÇZÈIÉYÊKËLÌMÍNÎOÏPÐRÑSÒTÓUÔFÕHÖTS×CHØSHÙSCHÜÛYÚÝEÞYUßYA";
36
37 public static boolean isUrlChar( char c )
38 {
39
40
41
42
43
44 if ( isLatinLetterOrDigit( c ) )
45 {
46 return true;
47 }
48
49 return "/?@&=+,-_.!~()%#;:$*".indexOf( c ) >= 0;
50 }
51
52 public static boolean isLatinLetterOrDigit( char c )
53 {
54 return ( ( c >= 'a' ) && ( c <= 'z' ) ) || ( ( c >= 'A' ) && ( c <= 'Z' ) ) || ( ( c >= '0' ) && ( c <= '9' ) );
55 }
56
57
58
59
60
61
62
63
64 public static String preprocessWikiText( String text )
65 {
66 if ( text == null )
67 {
68 return "";
69 }
70
71 text = text.trim( );
72
73 int length = text.length( );
74 char [ ] chars = new char [ length];
75 text.getChars( 0, length, chars, 0 );
76
77 StringBuilder sb = new StringBuilder( );
78 boolean blankLine = true;
79 StringBuilder spaces = new StringBuilder( );
80
81 for ( int p = 0; p < length; p++ )
82 {
83 char c = chars [p];
84
85 if ( c == '\r' )
86 {
87
88 if ( ( ( p + 1 ) < length ) && ( chars [p + 1] == '\n' ) )
89 {
90 p++;
91 }
92
93 sb.append( '\n' );
94 spaces.delete( 0, spaces.length( ) );
95 blankLine = true;
96 }
97 else
98 if ( c == '\n' )
99 {
100 sb.append( c );
101 spaces.delete( 0, spaces.length( ) );
102 blankLine = true;
103 }
104 else
105 if ( blankLine )
106 {
107 if ( c <= ' ' )
108 {
109 spaces.append( c );
110 }
111 else
112 {
113 sb.append( spaces );
114 blankLine = false;
115 sb.append( c );
116 }
117 }
118 else
119 {
120 sb.append( c );
121 }
122 }
123
124 return sb.toString( );
125 }
126
127 public static String escapeHTML( String s )
128 {
129 if ( s == null )
130 {
131 return "";
132 }
133
134 StringBuffer sb = new StringBuffer( s.length( ) + 100 );
135 int length = s.length( );
136
137 for ( int i = 0; i < length; i++ )
138 {
139 char ch = s.charAt( i );
140
141 if ( '<' == ch )
142 {
143 sb.append( "<" );
144 }
145 else
146 if ( '>' == ch )
147 {
148 sb.append( ">" );
149 }
150 else
151 if ( '&' == ch )
152 {
153 sb.append( "&" );
154 }
155 else
156 if ( '\'' == ch )
157 {
158 sb.append( "'" );
159 }
160 else
161 if ( '"' == ch )
162 {
163 sb.append( """ );
164 }
165 else
166 {
167 sb.append( ch );
168 }
169 }
170
171 return sb.toString( );
172 }
173
174 private static synchronized HashMap<String, Character> getHtmlEntities( )
175 {
176 if ( entities == null )
177 {
178 entities = new HashMap<String, Character>( );
179 entities.put( "lt", '<' );
180 entities.put( "gt", '>' );
181 entities.put( "amp", '&' );
182 entities.put( "quot", '"' );
183 entities.put( "apos", '\'' );
184 entities.put( "nbsp", '\u00A0' );
185 entities.put( "shy", '\u00AD' );
186 entities.put( "copy", '\u00A9' );
187 entities.put( "reg", '\u00AE' );
188 entities.put( "trade", '\u2122' );
189 entities.put( "mdash", '\u2014' );
190 entities.put( "ndash", '\u2013' );
191 entities.put( "ldquo", '\u201C' );
192 entities.put( "rdquo", '\u201D' );
193 entities.put( "euro", '\u20AC' );
194 entities.put( "middot", '\u00B7' );
195 entities.put( "bull", '\u2022' );
196 entities.put( "laquo", '\u00AB' );
197 entities.put( "raquo", '\u00BB' );
198 }
199
200 return entities;
201 }
202
203 public static String unescapeHTML( String value )
204 {
205 if ( value == null )
206 {
207 return null;
208 }
209
210 if ( value.indexOf( '&' ) < 0 )
211 {
212 return value;
213 }
214
215 HashMap<String, Character> ent = getHtmlEntities( );
216 StringBuffer sb = new StringBuffer( );
217 final int length = value.length( );
218
219 for ( int i = 0; i < length; i++ )
220 {
221 char c = value.charAt( i );
222
223 if ( c == '&' )
224 {
225 char ce = 0;
226 int i1 = value.indexOf( ';', i + 1 );
227
228 if ( ( i1 > i ) && ( ( i1 - i ) <= 12 ) )
229 {
230 if ( value.charAt( i + 1 ) == '#' )
231 {
232 if ( value.charAt( i + 2 ) == 'x' )
233 {
234 ce = (char) atoi( value.substring( i + 3, i1 ), 16 );
235 }
236 else
237 {
238 ce = (char) atoi( value.substring( i + 2, i1 ) );
239 }
240 }
241 else
242 {
243 synchronized( ent )
244 {
245 Character ceObj = ent.get( value.substring( i + 1, i1 ) );
246 ce = ( ceObj == null ) ? 0 : ceObj.charValue( );
247 }
248 }
249 }
250
251 if ( ce > 0 )
252 {
253 sb.append( ce );
254 i = i1;
255 }
256 else
257 {
258 sb.append( c );
259 }
260 }
261 else
262 {
263 sb.append( c );
264 }
265 }
266
267 return sb.toString( );
268 }
269
270 static public int atoi( String s )
271 {
272 try
273 {
274 return Integer.parseInt( s );
275 }
276 catch( Throwable ex )
277 {
278 return 0;
279 }
280 }
281
282 static public int atoi( String s, int base )
283 {
284 try
285 {
286 return Integer.parseInt( s, base );
287 }
288 catch( Throwable ex )
289 {
290 return 0;
291 }
292 }
293
294 public static String replaceString( String str, String from, String to )
295 {
296 StringBuffer buf = new StringBuffer( );
297 int flen = from.length( );
298 int i1 = 0;
299 int i2 = 0;
300
301 while ( ( i2 = str.indexOf( from, i1 ) ) >= 0 )
302 {
303 buf.append( str.substring( i1, i2 ) );
304 buf.append( to );
305 i1 = i2 + flen;
306 }
307
308 buf.append( str.substring( i1 ) );
309
310 return buf.toString( );
311 }
312
313 public static String [ ] split( String s, char separator )
314 {
315
316 if ( s == null )
317 {
318 return null;
319 }
320
321 ArrayList<String> parts = new ArrayList<String>( );
322 int beginIndex = 0;
323 int endIndex;
324
325 while ( ( endIndex = s.indexOf( separator, beginIndex ) ) >= 0 )
326 {
327 parts.add( s.substring( beginIndex, endIndex ) );
328 beginIndex = endIndex + 1;
329 }
330
331 parts.add( s.substring( beginIndex ) );
332
333 String [ ] a = new String [ parts.size( )];
334
335 return parts.toArray( a );
336 }
337
338
339
340
341
342
343
344
345
346 public static String translit( String s )
347 {
348 if ( s == null )
349 {
350 return "";
351 }
352
353 StringBuilder sb = new StringBuilder( s.length( ) + 100 );
354 final int length = s.length( );
355 final int translitTableLength = translitTable.length( );
356
357 for ( int i = 0; i < length; i++ )
358 {
359 char ch = s.charAt( i );
360
361
362 if ( ( ( ch >= 'à' ) && ( ch <= 'ÿ' ) ) || ( ( ch >= 'À' ) && ( ch <= 'ß' ) ) || ( ch == '¸' ) || ( ch == '¨' ) )
363 {
364 int idx = translitTable.indexOf( ch );
365 char c;
366
367 if ( idx >= 0 )
368 {
369 for ( idx++; idx < translitTableLength; idx++ )
370 {
371 c = translitTable.charAt( idx );
372
373 if ( ( ( c >= 'à' ) && ( c <= 'ÿ' ) ) || ( ( c >= 'À' ) && ( c <= 'ß' ) ) || ( c == '¸' ) || ( c == '¨' ) )
374 {
375 break;
376 }
377
378 sb.append( c );
379 }
380 }
381 }
382 else
383 {
384 sb.append( ch );
385 }
386 }
387
388 return sb.toString( );
389 }
390
391 public static String emptyToNull( String s )
392 {
393 return "".equals( s ) ? null : s;
394 }
395
396 public static String noNull( String s )
397 {
398 return ( s == null ) ? "" : s;
399 }
400
401 public static String noNull( String s, String val )
402 {
403 return ( s == null ) ? val : s;
404 }
405
406 public static boolean isEmpty( String s )
407 {
408 return ( ( s == null ) || ( s.length( ) == 0 ) );
409 }
410 }