001    /*
002     * Copyright 2001-2006 Geert Bevin <gbevin[remove] at uwyn dot com>
003     * Distributed under the terms of either:
004     * - the common development and distribution license (CDDL), v1.0; or
005     * - the GNU Lesser General Public License, v2.1 or later
006     * $Id: StringUtils.java 3108 2006-03-13 18:03:00Z gbevin $
007     */
008    package com.uwyn.jhighlight.tools;
009    
010    import com.uwyn.jhighlight.pcj.map.CharKeyOpenHashMap;
011    import java.util.ArrayList;
012    import java.util.Iterator;
013    import java.util.regex.Pattern;
014    
015    /**
016     * General purpose class containing common <code>String</code> manipulation
017     * methods.
018     *
019     * @author Geert Bevin (gbevin[remove] at uwyn dot com)
020     * @version $Revision: 3108 $
021     * @since 1.0
022     */
023    public abstract class StringUtils
024    {
025            private static final CharKeyOpenHashMap mHtmlEncodeMap = new CharKeyOpenHashMap();
026            
027            static
028            {
029                    // Html encoding mapping according to the HTML 4.0 spec
030                    // http://www.w3.org/TR/REC-html40/sgml/entities.html
031                    
032                    // Special characters for HTML
033                    mHtmlEncodeMap.put('\u0026', "&amp;");
034                    mHtmlEncodeMap.put('\u003C', "&lt;");
035                    mHtmlEncodeMap.put('\u003E', "&gt;");
036                    mHtmlEncodeMap.put('\u0022', "&quot;");
037                    
038                    mHtmlEncodeMap.put('\u0152', "&OElig;");
039                    mHtmlEncodeMap.put('\u0153', "&oelig;");
040                    mHtmlEncodeMap.put('\u0160', "&Scaron;");
041                    mHtmlEncodeMap.put('\u0161', "&scaron;");
042                    mHtmlEncodeMap.put('\u0178', "&Yuml;");
043                    mHtmlEncodeMap.put('\u02C6', "&circ;");
044                    mHtmlEncodeMap.put('\u02DC', "&tilde;");
045                    mHtmlEncodeMap.put('\u2002', "&ensp;");
046                    mHtmlEncodeMap.put('\u2003', "&emsp;");
047                    mHtmlEncodeMap.put('\u2009', "&thinsp;");
048                    mHtmlEncodeMap.put('\u200C', "&zwnj;");
049                    mHtmlEncodeMap.put('\u200D', "&zwj;");
050                    mHtmlEncodeMap.put('\u200E', "&lrm;");
051                    mHtmlEncodeMap.put('\u200F', "&rlm;");
052                    mHtmlEncodeMap.put('\u2013', "&ndash;");
053                    mHtmlEncodeMap.put('\u2014', "&mdash;");
054                    mHtmlEncodeMap.put('\u2018', "&lsquo;");
055                    mHtmlEncodeMap.put('\u2019', "&rsquo;");
056                    mHtmlEncodeMap.put('\u201A', "&sbquo;");
057                    mHtmlEncodeMap.put('\u201C', "&ldquo;");
058                    mHtmlEncodeMap.put('\u201D', "&rdquo;");
059                    mHtmlEncodeMap.put('\u201E', "&bdquo;");
060                    mHtmlEncodeMap.put('\u2020', "&dagger;");
061                    mHtmlEncodeMap.put('\u2021', "&Dagger;");
062                    mHtmlEncodeMap.put('\u2030', "&permil;");
063                    mHtmlEncodeMap.put('\u2039', "&lsaquo;");
064                    mHtmlEncodeMap.put('\u203A', "&rsaquo;");
065                    mHtmlEncodeMap.put('\u20AC', "&euro;");
066                    
067                    // Character entity references for ISO 8859-1 characters
068                    mHtmlEncodeMap.put('\u00A0', "&nbsp;");
069                    mHtmlEncodeMap.put('\u00A1', "&iexcl;");
070                    mHtmlEncodeMap.put('\u00A2', "&cent;");
071                    mHtmlEncodeMap.put('\u00A3', "&pound;");
072                    mHtmlEncodeMap.put('\u00A4', "&curren;");
073                    mHtmlEncodeMap.put('\u00A5', "&yen;");
074                    mHtmlEncodeMap.put('\u00A6', "&brvbar;");
075                    mHtmlEncodeMap.put('\u00A7', "&sect;");
076                    mHtmlEncodeMap.put('\u00A8', "&uml;");
077                    mHtmlEncodeMap.put('\u00A9', "&copy;");
078                    mHtmlEncodeMap.put('\u00AA', "&ordf;");
079                    mHtmlEncodeMap.put('\u00AB', "&laquo;");
080                    mHtmlEncodeMap.put('\u00AC', "&not;");
081                    mHtmlEncodeMap.put('\u00AD', "&shy;");
082                    mHtmlEncodeMap.put('\u00AE', "&reg;");
083                    mHtmlEncodeMap.put('\u00AF', "&macr;");
084                    mHtmlEncodeMap.put('\u00B0', "&deg;");
085                    mHtmlEncodeMap.put('\u00B1', "&plusmn;");
086                    mHtmlEncodeMap.put('\u00B2', "&sup2;");
087                    mHtmlEncodeMap.put('\u00B3', "&sup3;");
088                    mHtmlEncodeMap.put('\u00B4', "&acute;");
089                    mHtmlEncodeMap.put('\u00B5', "&micro;");
090                    mHtmlEncodeMap.put('\u00B6', "&para;");
091                    mHtmlEncodeMap.put('\u00B7', "&middot;");
092                    mHtmlEncodeMap.put('\u00B8', "&cedil;");
093                    mHtmlEncodeMap.put('\u00B9', "&sup1;");
094                    mHtmlEncodeMap.put('\u00BA', "&ordm;");
095                    mHtmlEncodeMap.put('\u00BB', "&raquo;");
096                    mHtmlEncodeMap.put('\u00BC', "&frac14;");
097                    mHtmlEncodeMap.put('\u00BD', "&frac12;");
098                    mHtmlEncodeMap.put('\u00BE', "&frac34;");
099                    mHtmlEncodeMap.put('\u00BF', "&iquest;");
100                    mHtmlEncodeMap.put('\u00C0', "&Agrave;");
101                    mHtmlEncodeMap.put('\u00C1', "&Aacute;");
102                    mHtmlEncodeMap.put('\u00C2', "&Acirc;");
103                    mHtmlEncodeMap.put('\u00C3', "&Atilde;");
104                    mHtmlEncodeMap.put('\u00C4', "&Auml;");
105                    mHtmlEncodeMap.put('\u00C5', "&Aring;");
106                    mHtmlEncodeMap.put('\u00C6', "&AElig;");
107                    mHtmlEncodeMap.put('\u00C7', "&Ccedil;");
108                    mHtmlEncodeMap.put('\u00C8', "&Egrave;");
109                    mHtmlEncodeMap.put('\u00C9', "&Eacute;");
110                    mHtmlEncodeMap.put('\u00CA', "&Ecirc;");
111                    mHtmlEncodeMap.put('\u00CB', "&Euml;");
112                    mHtmlEncodeMap.put('\u00CC', "&Igrave;");
113                    mHtmlEncodeMap.put('\u00CD', "&Iacute;");
114                    mHtmlEncodeMap.put('\u00CE', "&Icirc;");
115                    mHtmlEncodeMap.put('\u00CF', "&Iuml;");
116                    mHtmlEncodeMap.put('\u00D0', "&ETH;");
117                    mHtmlEncodeMap.put('\u00D1', "&Ntilde;");
118                    mHtmlEncodeMap.put('\u00D2', "&Ograve;");
119                    mHtmlEncodeMap.put('\u00D3', "&Oacute;");
120                    mHtmlEncodeMap.put('\u00D4', "&Ocirc;");
121                    mHtmlEncodeMap.put('\u00D5', "&Otilde;");
122                    mHtmlEncodeMap.put('\u00D6', "&Ouml;");
123                    mHtmlEncodeMap.put('\u00D7', "&times;");
124                    mHtmlEncodeMap.put('\u00D8', "&Oslash;");
125                    mHtmlEncodeMap.put('\u00D9', "&Ugrave;");
126                    mHtmlEncodeMap.put('\u00DA', "&Uacute;");
127                    mHtmlEncodeMap.put('\u00DB', "&Ucirc;");
128                    mHtmlEncodeMap.put('\u00DC', "&Uuml;");
129                    mHtmlEncodeMap.put('\u00DD', "&Yacute;");
130                    mHtmlEncodeMap.put('\u00DE', "&THORN;");
131                    mHtmlEncodeMap.put('\u00DF', "&szlig;");
132                    mHtmlEncodeMap.put('\u00E0', "&agrave;");
133                    mHtmlEncodeMap.put('\u00E1', "&aacute;");
134                    mHtmlEncodeMap.put('\u00E2', "&acirc;");
135                    mHtmlEncodeMap.put('\u00E3', "&atilde;");
136                    mHtmlEncodeMap.put('\u00E4', "&auml;");
137                    mHtmlEncodeMap.put('\u00E5', "&aring;");
138                    mHtmlEncodeMap.put('\u00E6', "&aelig;");
139                    mHtmlEncodeMap.put('\u00E7', "&ccedil;");
140                    mHtmlEncodeMap.put('\u00E8', "&egrave;");
141                    mHtmlEncodeMap.put('\u00E9', "&eacute;");
142                    mHtmlEncodeMap.put('\u00EA', "&ecirc;");
143                    mHtmlEncodeMap.put('\u00EB', "&euml;");
144                    mHtmlEncodeMap.put('\u00EC', "&igrave;");
145                    mHtmlEncodeMap.put('\u00ED', "&iacute;");
146                    mHtmlEncodeMap.put('\u00EE', "&icirc;");
147                    mHtmlEncodeMap.put('\u00EF', "&iuml;");
148                    mHtmlEncodeMap.put('\u00F0', "&eth;");
149                    mHtmlEncodeMap.put('\u00F1', "&ntilde;");
150                    mHtmlEncodeMap.put('\u00F2', "&ograve;");
151                    mHtmlEncodeMap.put('\u00F3', "&oacute;");
152                    mHtmlEncodeMap.put('\u00F4', "&ocirc;");
153                    mHtmlEncodeMap.put('\u00F5', "&otilde;");
154                    mHtmlEncodeMap.put('\u00F6', "&ouml;");
155                    mHtmlEncodeMap.put('\u00F7', "&divide;");
156                    mHtmlEncodeMap.put('\u00F8', "&oslash;");
157                    mHtmlEncodeMap.put('\u00F9', "&ugrave;");
158                    mHtmlEncodeMap.put('\u00FA', "&uacute;");
159                    mHtmlEncodeMap.put('\u00FB', "&ucirc;");
160                    mHtmlEncodeMap.put('\u00FC', "&uuml;");
161                    mHtmlEncodeMap.put('\u00FD', "&yacute;");
162                    mHtmlEncodeMap.put('\u00FE', "&thorn;");
163                    mHtmlEncodeMap.put('\u00FF', "&yuml;");
164                    
165                    // Mathematical, Greek and Symbolic characters for HTML
166                    mHtmlEncodeMap.put('\u0192', "&fnof;");
167                    mHtmlEncodeMap.put('\u0391', "&Alpha;");
168                    mHtmlEncodeMap.put('\u0392', "&Beta;");
169                    mHtmlEncodeMap.put('\u0393', "&Gamma;");
170                    mHtmlEncodeMap.put('\u0394', "&Delta;");
171                    mHtmlEncodeMap.put('\u0395', "&Epsilon;");
172                    mHtmlEncodeMap.put('\u0396', "&Zeta;");
173                    mHtmlEncodeMap.put('\u0397', "&Eta;");
174                    mHtmlEncodeMap.put('\u0398', "&Theta;");
175                    mHtmlEncodeMap.put('\u0399', "&Iota;");
176                    mHtmlEncodeMap.put('\u039A', "&Kappa;");
177                    mHtmlEncodeMap.put('\u039B', "&Lambda;");
178                    mHtmlEncodeMap.put('\u039C', "&Mu;");
179                    mHtmlEncodeMap.put('\u039D', "&Nu;");
180                    mHtmlEncodeMap.put('\u039E', "&Xi;");
181                    mHtmlEncodeMap.put('\u039F', "&Omicron;");
182                    mHtmlEncodeMap.put('\u03A0', "&Pi;");
183                    mHtmlEncodeMap.put('\u03A1', "&Rho;");
184                    mHtmlEncodeMap.put('\u03A3', "&Sigma;");
185                    mHtmlEncodeMap.put('\u03A4', "&Tau;");
186                    mHtmlEncodeMap.put('\u03A5', "&Upsilon;");
187                    mHtmlEncodeMap.put('\u03A6', "&Phi;");
188                    mHtmlEncodeMap.put('\u03A7', "&Chi;");
189                    mHtmlEncodeMap.put('\u03A8', "&Psi;");
190                    mHtmlEncodeMap.put('\u03A9', "&Omega;");
191                    mHtmlEncodeMap.put('\u03B1', "&alpha;");
192                    mHtmlEncodeMap.put('\u03B2', "&beta;");
193                    mHtmlEncodeMap.put('\u03B3', "&gamma;");
194                    mHtmlEncodeMap.put('\u03B4', "&delta;");
195                    mHtmlEncodeMap.put('\u03B5', "&epsilon;");
196                    mHtmlEncodeMap.put('\u03B6', "&zeta;");
197                    mHtmlEncodeMap.put('\u03B7', "&eta;");
198                    mHtmlEncodeMap.put('\u03B8', "&theta;");
199                    mHtmlEncodeMap.put('\u03B9', "&iota;");
200                    mHtmlEncodeMap.put('\u03BA', "&kappa;");
201                    mHtmlEncodeMap.put('\u03BB', "&lambda;");
202                    mHtmlEncodeMap.put('\u03BC', "&mu;");
203                    mHtmlEncodeMap.put('\u03BD', "&nu;");
204                    mHtmlEncodeMap.put('\u03BE', "&xi;");
205                    mHtmlEncodeMap.put('\u03BF', "&omicron;");
206                    mHtmlEncodeMap.put('\u03C0', "&pi;");
207                    mHtmlEncodeMap.put('\u03C1', "&rho;");
208                    mHtmlEncodeMap.put('\u03C2', "&sigmaf;");
209                    mHtmlEncodeMap.put('\u03C3', "&sigma;");
210                    mHtmlEncodeMap.put('\u03C4', "&tau;");
211                    mHtmlEncodeMap.put('\u03C5', "&upsilon;");
212                    mHtmlEncodeMap.put('\u03C6', "&phi;");
213                    mHtmlEncodeMap.put('\u03C7', "&chi;");
214                    mHtmlEncodeMap.put('\u03C8', "&psi;");
215                    mHtmlEncodeMap.put('\u03C9', "&omega;");
216                    mHtmlEncodeMap.put('\u03D1', "&thetasym;");
217                    mHtmlEncodeMap.put('\u03D2', "&upsih;");
218                    mHtmlEncodeMap.put('\u03D6', "&piv;");
219                    mHtmlEncodeMap.put('\u2022', "&bull;");
220                    mHtmlEncodeMap.put('\u2026', "&hellip;");
221                    mHtmlEncodeMap.put('\u2032', "&prime;");
222                    mHtmlEncodeMap.put('\u2033', "&Prime;");
223                    mHtmlEncodeMap.put('\u203E', "&oline;");
224                    mHtmlEncodeMap.put('\u2044', "&frasl;");
225                    mHtmlEncodeMap.put('\u2118', "&weierp;");
226                    mHtmlEncodeMap.put('\u2111', "&image;");
227                    mHtmlEncodeMap.put('\u211C', "&real;");
228                    mHtmlEncodeMap.put('\u2122', "&trade;");
229                    mHtmlEncodeMap.put('\u2135', "&alefsym;");
230                    mHtmlEncodeMap.put('\u2190', "&larr;");
231                    mHtmlEncodeMap.put('\u2191', "&uarr;");
232                    mHtmlEncodeMap.put('\u2192', "&rarr;");
233                    mHtmlEncodeMap.put('\u2193', "&darr;");
234                    mHtmlEncodeMap.put('\u2194', "&harr;");
235                    mHtmlEncodeMap.put('\u21B5', "&crarr;");
236                    mHtmlEncodeMap.put('\u21D0', "&lArr;");
237                    mHtmlEncodeMap.put('\u21D1', "&uArr;");
238                    mHtmlEncodeMap.put('\u21D2', "&rArr;");
239                    mHtmlEncodeMap.put('\u21D3', "&dArr;");
240                    mHtmlEncodeMap.put('\u21D4', "&hArr;");
241                    mHtmlEncodeMap.put('\u2200', "&forall;");
242                    mHtmlEncodeMap.put('\u2202', "&part;");
243                    mHtmlEncodeMap.put('\u2203', "&exist;");
244                    mHtmlEncodeMap.put('\u2205', "&empty;");
245                    mHtmlEncodeMap.put('\u2207', "&nabla;");
246                    mHtmlEncodeMap.put('\u2208', "&isin;");
247                    mHtmlEncodeMap.put('\u2209', "&notin;");
248                    mHtmlEncodeMap.put('\u220B', "&ni;");
249                    mHtmlEncodeMap.put('\u220F', "&prod;");
250                    mHtmlEncodeMap.put('\u2211', "&sum;");
251                    mHtmlEncodeMap.put('\u2212', "&minus;");
252                    mHtmlEncodeMap.put('\u2217', "&lowast;");
253                    mHtmlEncodeMap.put('\u221A', "&radic;");
254                    mHtmlEncodeMap.put('\u221D', "&prop;");
255                    mHtmlEncodeMap.put('\u221E', "&infin;");
256                    mHtmlEncodeMap.put('\u2220', "&ang;");
257                    mHtmlEncodeMap.put('\u2227', "&and;");
258                    mHtmlEncodeMap.put('\u2228', "&or;");
259                    mHtmlEncodeMap.put('\u2229', "&cap;");
260                    mHtmlEncodeMap.put('\u222A', "&cup;");
261                    mHtmlEncodeMap.put('\u222B', "&int;");
262                    mHtmlEncodeMap.put('\u2234', "&there4;");
263                    mHtmlEncodeMap.put('\u223C', "&sim;");
264                    mHtmlEncodeMap.put('\u2245', "&cong;");
265                    mHtmlEncodeMap.put('\u2248', "&asymp;");
266                    mHtmlEncodeMap.put('\u2260', "&ne;");
267                    mHtmlEncodeMap.put('\u2261', "&equiv;");
268                    mHtmlEncodeMap.put('\u2264', "&le;");
269                    mHtmlEncodeMap.put('\u2265', "&ge;");
270                    mHtmlEncodeMap.put('\u2282', "&sub;");
271                    mHtmlEncodeMap.put('\u2283', "&sup;");
272                    mHtmlEncodeMap.put('\u2284', "&nsub;");
273                    mHtmlEncodeMap.put('\u2286', "&sube;");
274                    mHtmlEncodeMap.put('\u2287', "&supe;");
275                    mHtmlEncodeMap.put('\u2295', "&oplus;");
276                    mHtmlEncodeMap.put('\u2297', "&otimes;");
277                    mHtmlEncodeMap.put('\u22A5', "&perp;");
278                    mHtmlEncodeMap.put('\u22C5', "&sdot;");
279                    mHtmlEncodeMap.put('\u2308', "&lceil;");
280                    mHtmlEncodeMap.put('\u2309', "&rceil;");
281                    mHtmlEncodeMap.put('\u230A', "&lfloor;");
282                    mHtmlEncodeMap.put('\u230B', "&rfloor;");
283                    mHtmlEncodeMap.put('\u2329', "&lang;");
284                    mHtmlEncodeMap.put('\u232A', "&rang;");
285                    mHtmlEncodeMap.put('\u25CA', "&loz;");
286                    mHtmlEncodeMap.put('\u2660', "&spades;");
287                    mHtmlEncodeMap.put('\u2663', "&clubs;");
288                    mHtmlEncodeMap.put('\u2665', "&hearts;");
289                    mHtmlEncodeMap.put('\u2666', "&diams;");
290            }
291            
292            private StringUtils()
293            {
294            }
295            
296            /**
297             * Transforms a provided <code>String</code> object into a new string,
298             * containing only valid Html characters.
299             *
300             * @param source The string that has to be transformed into a valid Html
301             * string.
302             *
303             * @return The encoded <code>String</code> object.
304             *
305             * @since 1.0
306             */
307            public static String encodeHtml(String source)
308            {
309                    return encode(source, mHtmlEncodeMap);
310            }
311            
312            /**
313             * Transforms a provided <code>String</code> object into a new string,
314             * using the mapping that are provided through the supplied encoding table.
315             *
316             * @param source The string that has to be transformed into a valid string,
317             * using the mappings that are provided through the supplied encoding table.
318             * @param encodingTables A <code>Map</code> object containing the mappings to
319             * transform characters into valid entities. The keys of this map should be
320             * <code>Character</code> objects and the values <code>String</code>
321             * objects.
322             *
323             * @return The encoded <code>String</code> object.
324             *
325             * @since 1.0
326             */
327            private static String encode(String source, CharKeyOpenHashMap encodingTable)
328            {
329                    if (null == source)
330                    {
331                            return null;
332                    }
333                    
334                    if (null == encodingTable)
335                    {
336                            return source;
337                    }
338                    
339                    StringBuffer    encoded_string = null;
340                    char[]                  string_to_encode_array = source.toCharArray();
341                    int                             last_match = -1;
342                    int                             difference = 0;
343                    
344                    for (int i = 0; i < string_to_encode_array.length; i++)
345                    {
346                            char char_to_encode = string_to_encode_array[i];
347                            
348                            if (encodingTable.containsKey(char_to_encode))
349                            {
350                                    if (null == encoded_string)
351                                    {
352                                            encoded_string = new StringBuffer(source.length());
353                                    }
354                                    difference = i - (last_match + 1);
355                                    if (difference > 0)
356                                    {
357                                            encoded_string.append(string_to_encode_array, last_match + 1, difference);
358                                    }
359                                    encoded_string.append(encodingTable.get(char_to_encode));
360                                    last_match = i;
361                            }
362                    }
363                    
364                    if (null == encoded_string)
365                    {
366                            return source;
367                    }
368                    else
369                    {
370                            difference = string_to_encode_array.length - (last_match + 1);
371                            if (difference > 0)
372                            {
373                                    encoded_string.append(string_to_encode_array, last_match + 1, difference);
374                            }
375                            return encoded_string.toString();
376                    }
377            }
378            
379            /**
380             * Checks if the name filters through an including and an excluding
381             * regular expression.
382             *
383             * @param name The <code>String</code> that will be filtered.
384             * @param included The regular expressions that needs to succeed
385             * @param excluded The regular expressions that needs to fail
386             *
387             * @return <code>true</code> if the name filtered through correctly; or
388             * <p>
389             * <code>false</code> otherwise.
390             *
391             * @since 1.0
392             */
393            public static boolean filter(String name, Pattern included, Pattern excluded)
394            {
395                    Pattern[] included_array = null;
396                    if (included != null)
397                    {
398                            included_array = new Pattern[] {included};
399                    }
400                    
401                    Pattern[] excluded_array = null;
402                    if (excluded != null)
403                    {
404                            excluded_array = new Pattern[] {excluded};
405                    }
406                    
407                    return filter(name, included_array, excluded_array);
408            }
409            
410            /**
411             * Checks if the name filters through a series of including and excluding
412             * regular expressions.
413             *
414             * @param name The <code>String</code> that will be filtered.
415             * @param included An array of regular expressions that need to succeed
416             * @param excluded An array of regular expressions that need to fail
417             *
418             * @return <code>true</code> if the name filtered through correctly; or
419             * <p>
420             * <code>false</code> otherwise.
421             *
422             * @since 1.0
423             */
424            public static boolean filter(String name, Pattern[] included, Pattern[] excluded)
425            {
426                    if (null == name)
427                    {
428                            return false;
429                    }
430                    
431                    boolean accepted = false;
432                    
433                    // retain only the includes
434                    if (null == included)
435                    {
436                            accepted = true;
437                    }
438                    else
439                    {
440                            Pattern pattern;
441                            for (int i = 0; i < included.length; i++)
442                            {
443                                    pattern = included[i];
444                                    
445                                    if (pattern != null &&
446                                            pattern.matcher(name).matches())
447                                    {
448                                            accepted = true;
449                                            break;
450                                    }
451                            }
452                    }
453                    
454                    // remove the excludes
455                    if (accepted &&
456                            excluded != null)
457                    {
458                            Pattern pattern;
459                            for (int i = 0; i < excluded.length; i++)
460                            {
461                                    pattern = excluded[i];
462                                    
463                                    if (pattern != null &&
464                                            pattern.matcher(name).matches())
465                                    {
466                                            accepted = false;
467                                            break;
468                                    }
469                            }
470                    }
471                    
472                    return accepted;
473            }
474            
475            /**
476             * Splits a string into different parts, using a seperator string to detect
477             * the seperation boundaries in a case-sensitive manner. The seperator will
478             * not be included in the list of parts.
479             *
480             * @param source The string that will be split into parts.
481             * @param seperator The seperator string that will be used to determine the
482             * parts.
483             *
484             * @return An <code>ArrayList</code> containing the parts as
485             * <code>String</code> objects.
486             *
487             * @since 1.0
488             */
489            public static ArrayList split(String source, String seperator)
490            {
491                    return split(source, seperator, true);
492            }
493            
494            /**
495             * Splits a string into different parts, using a seperator string to detect
496             * the seperation boundaries. The seperator will not be included in the list
497             * of parts.
498             *
499             * @param source The string that will be split into parts.
500             * @param seperator The seperator string that will be used to determine the
501             * parts.
502             * @param matchCase A <code>boolean</code> indicating if the match is going
503             * to be performed in a case-sensitive manner or not.
504             *
505             * @return An <code>ArrayList</code> containing the parts as
506             * <code>String</code> objects.
507             *
508             * @since 1.0
509             */
510            public static ArrayList split(String source, String seperator, boolean matchCase)
511            {
512                    ArrayList       substrings = new ArrayList();
513                    
514                    if (null == source)
515                    {
516                            return substrings;
517                    }
518                    
519                    if (null == seperator)
520                    {
521                            substrings.add(source);
522                            return substrings;
523                    }
524                    
525                    int             current_index = 0;
526                    int             delimiter_index = 0;
527                    String  element = null;
528                    
529                    String  source_lookup_reference = null;
530                    if (!matchCase)
531                    {
532                            source_lookup_reference = source.toLowerCase();
533                            seperator = seperator.toLowerCase();
534                    }
535                    else
536                    {
537                            source_lookup_reference = source;
538                    }
539                    
540                    while (current_index <= source_lookup_reference.length())
541                    {
542                            delimiter_index = source_lookup_reference.indexOf(seperator, current_index);
543                            
544                            if (-1 == delimiter_index)
545                            {
546                                    element = new String(source.substring(current_index, source.length()));
547                                    substrings.add(element);
548                                    current_index = source.length() + 1;
549                            }
550                            else
551                            {
552                                    element = new String(source.substring(current_index, delimiter_index));
553                                    substrings.add(element);
554                                    current_index = delimiter_index + seperator.length();
555                            }
556                    }
557                    
558                    return substrings;
559            }
560            
561            /**
562             * Searches for a string within a specified string in a case-sensitive
563             * manner and replaces every match with another string.
564             *
565             * @param source The string in which the matching parts will be replaced.
566             * @param stringToReplace The string that will be searched for.
567             * @param replacementString The string that will replace each matching part.
568             *
569             * @return A new <code>String</code> object containing the replacement
570             * result.
571             *
572             * @since 1.0
573             */
574            public static String replace(String source, String stringToReplace, String replacementString)
575            {
576                    return replace(source, stringToReplace, replacementString, true);
577            }
578            
579            /**
580             * Searches for a string within a specified string and replaces every match
581             * with another string.
582             *
583             * @param source The string in which the matching parts will be replaced.
584             * @param stringToReplace The string that will be searched for.
585             * @param replacementString The string that will replace each matching part.
586             * @param matchCase A <code>boolean</code> indicating if the match is going
587             * to be performed in a case-sensitive manner or not.
588             *
589             * @return A new <code>String</code> object containing the replacement
590             * result.
591             *
592             * @since 1.0
593             */
594            public static String replace(String source, String stringToReplace, String replacementString, boolean matchCase)
595            {
596                    if (null == source)
597                    {
598                            return null;
599                    }
600                    
601                    if (null == stringToReplace)
602                    {
603                            return source;
604                    }
605                    
606                    if (null == replacementString)
607                    {
608                            return source;
609                    }
610                    
611                    Iterator                string_parts = split(source, stringToReplace, matchCase).iterator();
612                    StringBuffer    new_string = new StringBuffer();
613                    
614                    synchronized (new_string) // speed increase by thread lock pre-allocation
615                    {
616                            while (string_parts.hasNext())
617                            {
618                                    String string_part = (String)string_parts.next();
619                                    new_string.append(string_part);
620                                    if (string_parts.hasNext())
621                                    {
622                                            new_string.append(replacementString);
623                                    }
624                            }
625                            
626                            return new_string.toString();
627                    }
628            }
629            
630            /**
631             * Creates a new string that contains the provided string a number of times.
632             *
633             * @param source The string that will be repeated.
634             * @param count  The number of times that the string will be repeated.
635             * @return A new <code>String</code> object containing the repeated
636             * concatenation result.
637             *
638             * @since 1.0
639             */
640            public static String repeat(String source, int count)
641            {
642                    if (null == source)
643                    {
644                            return null;
645                    }
646                    
647                    StringBuffer new_string = new StringBuffer();
648                    synchronized (new_string) // speed increase by thread lock pre-allocation
649                    {
650                            while (count > 0)
651                            {
652                                    new_string.append(source);
653                                    count --;
654                            }
655                            
656                            return new_string.toString();
657                    }
658            }
659            
660            /**
661             * Converts all tabs on a line to spaces according to the provided tab
662             * width.
663             *
664             * @param line The line whose tabs have to be converted.
665             * @param tabWidth The tab width.
666             * @return A new <code>String</code> object containing the line with the
667             * replaced tabs.
668             * @since 1.0
669             */
670            public static String convertTabsToSpaces(String line, int tabWidth)
671            {
672                    StringBuffer result = new StringBuffer();
673                    
674                    synchronized (result) // speed increase by thread lock pre-allocation
675                    {
676                            int tab_index = -1;
677                            int last_tab_index = 0;
678                            int added_chars = 0;
679                            int tab_size;
680                            while ((tab_index = line.indexOf("\t", last_tab_index)) != -1)
681                            {
682                                    tab_size = tabWidth - ((tab_index + added_chars) % tabWidth);
683                                    if (0 == tab_size)
684                                    {
685                                            tab_size = tabWidth;
686                                    }
687                                    added_chars += tab_size - 1;
688                                    result.append(line.substring(last_tab_index, tab_index));
689                                    result.append(StringUtils.repeat(" ", tab_size));
690                                    last_tab_index = tab_index + 1;
691                            }
692                            if (0 == last_tab_index)
693                            {
694                                    return line;
695                            }
696                            else
697                            {
698                                    result.append(line.substring(last_tab_index));
699                            }
700                    }
701                    
702                    return result.toString();
703            }
704    }
705    
706