KDevelop API Documentation

yyindent.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002 ** Copyright (C) 2000 Trolltech AS.  All rights reserved.
00003 **
00004 ** This file is part of Qt Designer.
00005 **
00006 ** This file may be distributed and/or modified under the terms of the
00007 ** GNU General Public License version 2 as published by the Free Software
00008 ** Foundation and appearing in the file COPYING included in the
00009 ** packaging of this file.
00010 **
00011 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00012 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00013 **
00014 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
00015 **
00016 ** Contact info@trolltech.com if any conditions of this licensing are
00017 ** not clear to you.
00018 **
00019 **********************************************************************/
00020 
00021 /*
00022   This file is a self-contained interactive indenter for C
00023   derivatives, in particular C++.
00024 
00025   The general problem of indenting a C++ program is ill posed. On the
00026   one hand, an indenter has to analyze programs written in a
00027   free-form formal language that is best described in terms of
00028   tokens, not characters, not lines. On the other hand, indentation
00029   applies to lines and white space characters matter, and otherwise
00030   the programs to indent are formally invalid in general, as they are
00031   begin edited.
00032 
00033   The approach taken here works line by line. We receive a program
00034   consisting of N lines or more, and we want to compute the
00035   indentation appropriate for the Nth line. Lines beyond the Nth
00036   lines are of no concern to us, so for simplicity we pretend the
00037   program has exactly N lines and we call the Nth line the "bottom
00038   line". Typically, we have to indent the bottom line when it's still
00039   empty, so we concentrate our analysis on the N - 1 lines that
00040   precede.
00041 
00042   By inspecting the (N - 1)-th line, the (N - 2)-th line, ...
00043   backwards, we determine the kind of the bottom line and indent it
00044   accordingly.
00045 
00046     * The bottom line is a comment line. See
00047       bottomLineStartsInCComment() and
00048       indentWhenBottomLineStartsInCComment().
00049     * The bottom line is a continuation line. See isContinuationLine()
00050       and indentForContinuationLine().
00051     * The bottom line is a standalone line. See
00052       indentForStandaloneLine().
00053 
00054   Certain tokens that influence the indentation, notably braces, are
00055   looked for in the lines. This is done by simple string comparison,
00056   without a real tokenizer. Confusing constructs such as comments and
00057   string literals are removed beforehand.
00058 */
00059 
00060 #include <qregexp.h>
00061 #include <qmap.h>
00062 #include <qvariant.h>
00063 #include <kdebug.h>
00064 
00065 /*
00066   The indenter avoids getting stuck in almost infinite loops by
00067   imposing arbitrary limits on the number of lines it analyzes when
00068   looking for a construct.
00069 
00070   For example, the indenter never considers more than BigRoof lines
00071   backwards when looking for the start of a C-style comment.
00072 */
00073 static const int SmallRoof = 40;
00074 static const int BigRoof = 400;
00075 
00076 /*
00077   The indenter supports a few parameters:
00078 
00079     * ppHardwareTabSize is the size of a '\t' in your favorite editor.
00080     * ppIndentSize is the size of an indentation, the "software tab
00081       size".
00082     * ppContinuationIndentSize is the extra indent for a continuation
00083       line, when there is nothing to align against on the previous
00084       line.
00085     * ppCommentOffset is the indentation within a C-style comment,
00086       when it cannot be picked up.
00087 */
00088 static /*const*/ int ppHardwareTabSize = 8;
00089 static /*const*/ int ppIndentSize = 4;
00090 static /*const*/ int ppContinuationIndentSize = 8;
00091 static /*const*/ int ppCommentOffset = 2;
00092 
00093 static QRegExp *literal = 0;
00094 static QRegExp *label = 0;
00095 static QRegExp *inlineCComment = 0;
00096 static QRegExp *braceX = 0;
00097 static QRegExp *iflikeKeyword = 0;
00098 
00099 
00100 void configureCIndent( const QMap<QString, QVariant>& values )
00101 {
00102     if( values.contains("TabSize") )
00103         ppHardwareTabSize = values[ "TabSize" ].toInt();
00104 
00105     if( values.contains("IndentSize") )
00106         ppIndentSize = values[ "IndentSize" ].toInt();
00107 
00108     if( values.contains("ContinuationSize") )
00109         ppContinuationIndentSize = values[ "ContinuationSize" ].toInt();
00110 
00111     if( values.contains("CommentOffset") )
00112         ppCommentOffset = values[ "CommentOffset" ].toInt();
00113 }
00114 
00115 /*
00116   Returns the first non-space character in the string t, or
00117   QChar::null if the string is made only of white space.
00118 */
00119 static QChar firstNonWhiteSpace( const QString& t )
00120 {
00121     int i = 0;
00122     while ( i < (int) t.length() ) {
00123     if ( !t[i].isSpace() )
00124         return t[i];
00125     i++;
00126     }
00127     return QChar::null;
00128 }
00129 
00130 /*
00131   Returns TRUE if string t is made only of white space; otherwise
00132   returns FALSE.
00133 */
00134 static bool isOnlyWhiteSpace( const QString& t )
00135 {
00136     return firstNonWhiteSpace( t ).isNull();
00137 }
00138 
00139 /*
00140   Assuming string t is a line, returns the column number of a given
00141   index. Column numbers and index are identical for strings that don't
00142   contain '\t's.
00143 */
00144 static int columnForIndex( const QString& t, int index )
00145 {
00146     int col = 0;
00147     if ( index > (int) t.length() )
00148     index = t.length();
00149 
00150     for ( int i = 0; i < index; i++ ) {
00151     if ( t[i].latin1() == '\t' ) {
00152         col = ( (col / ppHardwareTabSize) + 1 ) * ppHardwareTabSize;
00153     } else {
00154         col++;
00155     }
00156     }
00157     return col;
00158 }
00159 
00160 /*
00161   Returns the indentation size of string t.
00162 */
00163 static int indentOfLine( const QString& t )
00164 {
00165     return columnForIndex( t, t.find(firstNonWhiteSpace(t)) );
00166 }
00167 
00168 /*
00169   Replaces t[k] by ch, unless t[k] is '\t'. Tab characters are better
00170   left alone since they break the "index equals column" rule. No
00171   provisions are taken against '\n' or '\r', which shouldn't occur in
00172   t anyway.
00173 */
00174 static inline void eraseChar( QString& t, int k, QChar ch )
00175 {
00176     if ( t[k] != '\t' )
00177     t[k] = ch;
00178 }
00179 
00180 /*
00181   Removes some nefast constructs from a code line and returns the
00182   resulting line.
00183 */
00184 static QString trimmedCodeLine( const QString& t )
00185 {
00186     QString trimmed = t;
00187     int k;
00188 
00189     /*
00190       Replace character and string literals by X's, since they may
00191       contain confusing characters (such as '{' and ';'). "Hello!" is
00192       replaced by XXXXXXXX. The literals are rigourously of the same
00193       length before and after; otherwise, we would break alignment of
00194       continuation lines.
00195     */
00196     k = 0;
00197     while ( (k = trimmed.find(*literal, k)) != -1 ) {
00198     for ( int i = 0; i < literal->matchedLength(); i++ )
00199         eraseChar( trimmed, k + i, QChar('X') );
00200     k += literal->matchedLength();
00201     }
00202 
00203     /*
00204       Replace inline C-style comments by spaces. Other comments are
00205       handled elsewhere.
00206     */
00207     k = 0;
00208     while ( (k = trimmed.find(*inlineCComment, k)) != -1 ) {
00209     for ( int i = 0; i < inlineCComment->matchedLength(); i++ )
00210         eraseChar( trimmed, k + i, QChar(' ') );
00211     k += inlineCComment->matchedLength();
00212     }
00213 
00214     /*
00215       Replace case and goto labels by spaces, to allow esoteric
00216       alignments:
00217 
00218       foo1: foo2: bar1;
00219               bar2;
00220     */
00221     while ( trimmed.findRev(QChar(':')) != -1 && trimmed.find(*label) != -1 ) {
00222     QString cap1 = label->cap( 1 );
00223     int pos1 = label->pos( 1 );
00224     for ( int i = 0; i < (int) cap1.length(); i++ )
00225         eraseChar( trimmed, pos1 + i, QChar(' ') );
00226     }
00227 
00228     /*
00229       Remove C++-style comments.
00230     */
00231     k = trimmed.find( QString("//") );
00232     if ( k != -1 )
00233     trimmed.truncate( k );
00234 
00235     return trimmed;
00236 }
00237 
00238 /*
00239   Returns '(' if the last parenthesis is opening, ')' if it is
00240   closing, and QChar::null if there are no parentheses in t.
00241 */
00242 static inline QChar lastParen( const QString& t )
00243 {
00244     int i = t.length();
00245     while ( i > 0 ) {
00246     i--;
00247     if ( t[i] == QChar('(') || t[i] == QChar(')') )
00248         return t[i];
00249     }
00250     return QChar::null;
00251 }
00252 
00253 /*
00254   Returns TRUE if typedIn the same as okayCh or is null; otherwise
00255   returns FALSE.
00256 */
00257 static inline bool okay( QChar typedIn, QChar okayCh )
00258 {
00259     return typedIn.isNull() || typedIn == okayCh;
00260 }
00261 
00262 /*
00263   The "linizer" is a group of functions and variables to iterate
00264   through the source code of the program to indent. The program is
00265   given as a list of strings, with the bottom line being the line to
00266   indent. The actual program might contain extra lines, but those are
00267   uninteresting and not passed over to us.
00268 */
00269 
00270 struct LinizerState
00271 {
00272     QString line;
00273     int braceDepth;
00274     bool leftBraceFollows;
00275 
00276     QStringList::ConstIterator iter;
00277     bool inCComment;
00278     bool pendingRightBrace;
00279 };
00280 
00281 static QStringList *yyProgram = 0;
00282 static LinizerState *yyLinizerState = 0;
00283 
00284 // shorthands
00285 static const QString *yyLine = 0;
00286 static const int *yyBraceDepth = 0;
00287 static const bool *yyLeftBraceFollows = 0;
00288 
00289 /*
00290   Saves and restores the state of the global linizer. This enables
00291   backtracking.
00292 */
00293 #define YY_SAVE() \
00294     LinizerState savedState = *yyLinizerState
00295 #define YY_RESTORE() \
00296     *yyLinizerState = savedState
00297 
00298 /*
00299   Advances to the previous line in yyProgram and update yyLine
00300   accordingly. yyLine is cleaned from comments and other damageable
00301   constructs. Empty lines are skipped.
00302 */
00303 static bool readLine()
00304 {
00305     int k;
00306 
00307     yyLinizerState->leftBraceFollows =
00308         ( firstNonWhiteSpace(yyLinizerState->line) == QChar('{') );
00309 
00310     do {
00311     if ( yyLinizerState->iter == yyProgram->begin() ) {
00312         yyLinizerState->line = QString::null;
00313         return FALSE;
00314     }
00315 
00316     --yyLinizerState->iter;
00317     yyLinizerState->line = *yyLinizerState->iter;
00318 
00319     yyLinizerState->line = trimmedCodeLine( yyLinizerState->line );
00320 
00321     /*
00322       Remove C-style comments that span multiple lines. If the
00323       bottom line starts in a C-style comment, we are not aware
00324       of that and eventually yyLine will contain a slash-aster.
00325 
00326       Notice that both if's can be executed, since
00327       yyLinizerState->inCComment is potentially set to FALSE in
00328       the first if. The order of the if's is also important.
00329     */
00330 
00331     if ( yyLinizerState->inCComment ) {
00332         QString slashAster( "/*" );
00333 
00334         k = yyLinizerState->line.find( slashAster );
00335         if ( k == -1 ) {
00336         yyLinizerState->line = QString::null;
00337         } else {
00338         yyLinizerState->line.truncate( k );
00339         yyLinizerState->inCComment = FALSE;
00340         }
00341     }
00342 
00343     if ( !yyLinizerState->inCComment ) {
00344         QString asterSlash( "*/" );
00345 
00346         k = yyLinizerState->line.find( asterSlash );
00347         if ( k != -1 ) {
00348         for ( int i = 0; i < k + 2; i++ )
00349             eraseChar( yyLinizerState->line, i, QChar(' ') );
00350         yyLinizerState->inCComment = TRUE;
00351         }
00352     }
00353 
00354     /*
00355       Remove preprocessor directives.
00356     */
00357     k = 0;
00358     while ( k < (int) yyLinizerState->line.length() ) {
00359         QChar ch = yyLinizerState->line[k];
00360         if ( ch == QChar('#') ) {
00361         yyLinizerState->line = QString::null;
00362         } else if ( !ch.isSpace() ) {
00363         break;
00364         }
00365         k++;
00366     }
00367 
00368     /*
00369       Remove trailing spaces.
00370     */
00371     k = yyLinizerState->line.length();
00372     while ( k > 0 && yyLinizerState->line[k - 1].isSpace() )
00373         k--;
00374     yyLinizerState->line.truncate( k );
00375 
00376     /*
00377       '}' increment the brace depth and '{' decrements it and not
00378       the other way around, as we are parsing backwards.
00379     */
00380     yyLinizerState->braceDepth +=
00381         yyLinizerState->line.contains( QChar('}') ) -
00382         yyLinizerState->line.contains( QChar('{') );
00383 
00384     /*
00385       We use a dirty trick for
00386 
00387           } else ...
00388 
00389       We don't count the '}' yet, so that it's more or less
00390       equivalent to the friendly construct
00391 
00392           }
00393           else ...
00394     */
00395     if ( yyLinizerState->pendingRightBrace )
00396         yyLinizerState->braceDepth++;
00397     yyLinizerState->pendingRightBrace =
00398         ( yyLinizerState->line.find(*braceX) == 0 );
00399     if ( yyLinizerState->pendingRightBrace )
00400         yyLinizerState->braceDepth--;
00401     } while ( yyLinizerState->line.isEmpty() );
00402 
00403     return TRUE;
00404 }
00405 
00406 /*
00407   Resets the linizer to its initial state, with yyLine containing the
00408   line above the bottom line of the program.
00409 */
00410 static void startLinizer()
00411 {
00412     yyLinizerState->braceDepth = 0;
00413     yyLinizerState->inCComment = FALSE;
00414     yyLinizerState->pendingRightBrace = FALSE;
00415 
00416     yyLine = &yyLinizerState->line;
00417     yyBraceDepth = &yyLinizerState->braceDepth;
00418     yyLeftBraceFollows = &yyLinizerState->leftBraceFollows;
00419 
00420     yyLinizerState->iter = yyProgram->end();
00421     --yyLinizerState->iter;
00422     yyLinizerState->line = *yyLinizerState->iter;
00423     readLine();
00424 }
00425 
00426 /*
00427   Returns TRUE if the start of the bottom line of yyProgram (and
00428   potentially the whole line) is part of a C-style comment; otherwise
00429   returns FALSE.
00430 */
00431 static bool bottomLineStartsInCComment()
00432 {
00433     QString slashAster( "/*" );
00434     QString asterSlash( "*/" );
00435 
00436     /*
00437       We could use the linizer here, but that would slow us down
00438       terribly. We are better to trim only the code lines we need.
00439     */
00440     QStringList::ConstIterator p = yyProgram->end();
00441     --p; // skip bottom line
00442 
00443     for ( int i = 0; i < BigRoof; i++ ) {
00444     if ( p == yyProgram->begin() )
00445         return FALSE;
00446     --p;
00447 
00448     if ( (*p).find(slashAster) != -1 ||
00449          (*p).find(asterSlash) != -1 ) {
00450         QString trimmed = trimmedCodeLine( *p );
00451 
00452         if ( trimmed.find(slashAster) != -1 ) {
00453         return TRUE;
00454         } else if ( trimmed.find(asterSlash) != -1 ) {
00455         return FALSE;
00456         }
00457     }
00458     }
00459     return FALSE;
00460 }
00461 
00462 /*
00463   Returns the recommended indent for the bottom line of yyProgram
00464   assuming that it starts in a C-style comment, a condition that is
00465   tested elsewhere.
00466 
00467   Essentially, we're trying to align against some text on the previous
00468   line.
00469 */
00470 static int indentWhenBottomLineStartsInCComment()
00471 {
00472     int k = yyLine->findRev( QString("/*") );
00473     if ( k == -1 ) {
00474     /*
00475       We found a normal text line in a comment. Align the
00476       bottom line with the text on this line.
00477     */
00478     return indentOfLine( *yyLine );
00479     } else {
00480     /*
00481       The C-style comment starts on this line. If there is
00482       text on the same line, align with it. Otherwise, align
00483       with the slash-aster plus a given offset.
00484     */
00485     int indent = columnForIndex( *yyLine, k );
00486     k += 2;
00487     while ( k < (int) yyLine->length() ) {
00488         if ( !(*yyLine)[k].isSpace() )
00489         return columnForIndex( *yyLine, k );
00490         k++;
00491     }
00492     return indent + ppCommentOffset;
00493     }
00494 }
00495 
00496 /*
00497   A function called match...() modifies the linizer state. If it
00498   returns TRUE, yyLine is the top line of the matched construct;
00499   otherwise, the linizer is left in an unknown state.
00500 
00501   A function called is...() keeps the linizer state intact.
00502 */
00503 
00504 /*
00505   Returns TRUE if the current line (and upwards) forms a braceless
00506   control statement; otherwise returns FALSE.
00507 
00508   The first line of the following example is a "braceless control
00509   statement":
00510 
00511       if ( x )
00512       y;
00513 */
00514 static bool matchBracelessControlStatement()
00515 {
00516     int delimDepth = 0;
00517 
00518     if ( yyLine->endsWith(QString("else")) )
00519     return TRUE;
00520 
00521     if ( !yyLine->endsWith(QChar(')')) )
00522     return FALSE;
00523 
00524     for ( int i = 0; i < SmallRoof; i++ ) {
00525     int j = yyLine->length();
00526     while ( j > 0 ) {
00527         j--;
00528         QChar ch = (*yyLine)[j];
00529 
00530         switch ( ch.unicode() ) {
00531         case ')':
00532         delimDepth++;
00533         break;
00534         case '(':
00535         delimDepth--;
00536         if ( delimDepth == 0 ) {
00537             if ( yyLine->find(*iflikeKeyword) != -1 ) {
00538             /*
00539               We have
00540 
00541                   if ( x )
00542                   y
00543 
00544               "if ( x )" is not part of the statement
00545               "y".
00546             */
00547             return TRUE;
00548             }
00549         }
00550         if ( delimDepth == -1 ) {
00551             /*
00552               We have
00553 
00554               if ( (1 +
00555                 2)
00556 
00557               and not
00558 
00559               if ( 1 +
00560                    2 )
00561             */
00562             return FALSE;
00563         }
00564         break;
00565         case '{':
00566         case '}':
00567         case ';':
00568         /*
00569           We met a statement separator, but not where we
00570           expected it. What follows is probably a weird
00571           continuation line. Be careful with ';' in for,
00572           though.
00573         */
00574         if ( ch != QChar( ';' ) || delimDepth == 0 )
00575             return FALSE;
00576         }
00577     }
00578 
00579     if ( !readLine() )
00580         break;
00581     }
00582     return FALSE;
00583 }
00584 
00585 /*
00586   Returns TRUE if yyLine is an unfinished line; otherwise returns
00587   FALSE.
00588 
00589   In many places, we'll use the terms "standalone line", "unfinished
00590   line" and "continuation line". The meaning of these should be
00591   evident:
00592 
00593       a = b;    // standalone line
00594       c = d +   // unfinished line
00595       e +   // unfinished continuation line
00596       f +   // unfinished continuation line
00597       g;    // continuation line
00598 */
00599 static bool isUnfinishedLine()
00600 {
00601     bool unf = FALSE;
00602 
00603     YY_SAVE();
00604 
00605     if ( yyLine->isEmpty() )
00606     return FALSE;
00607 
00608     QChar lastCh = (*yyLine)[(int) yyLine->length() - 1];
00609     if ( QString("{};").find(lastCh) == -1 ) {
00610     /*
00611       It doesn't end with ';' or similar. If it's not "Q_OBJECT"
00612       nor "if ( x )", it must be an unfinished line.
00613     */
00614     /* qmake ignore Q_OBJECT */
00615     unf = ( yyLine->contains(QString("Q_OBJECT")) == 0 &&
00616         !matchBracelessControlStatement() );
00617     } else if ( lastCh == QChar(';') ) {
00618     if ( lastParen(*yyLine) == QChar('(') ) {
00619         /*
00620           Exception:
00621 
00622           for ( int i = 1; i < 10;
00623         */
00624         unf = TRUE;
00625     } else if ( readLine() && yyLine->endsWith(QChar(';')) &&
00626             lastParen(*yyLine) == QChar('(') ) {
00627         /*
00628           Exception:
00629 
00630           for ( int i = 1;
00631             i < 10;
00632         */
00633         unf = TRUE;
00634     }
00635     }
00636 
00637     YY_RESTORE();
00638     return unf;
00639 }
00640 
00641 /*
00642   Returns TRUE if yyLine is a continuation line; otherwise returns
00643   FALSE.
00644 */
00645 static bool isContinuationLine()
00646 {
00647     bool cont = FALSE;
00648 
00649     YY_SAVE();
00650     if ( readLine() )
00651     cont = isUnfinishedLine();
00652     YY_RESTORE();
00653     return cont;
00654 }
00655 
00656 /*
00657   Returns the recommended indent for the bottom line of yyProgram,
00658   assuming it's a continuation line.
00659 
00660   We're trying to align the continuation line against some parenthesis
00661   or other bracked left opened on a previous line, or some interesting
00662   operator such as '='.
00663 */
00664 static int indentForContinuationLine()
00665 {
00666     int braceDepth = 0;
00667     int delimDepth = 0;
00668 
00669     bool leftBraceFollowed = *yyLeftBraceFollows;
00670 
00671     for ( int i = 0; i < SmallRoof; i++ ) {
00672     int hook = -1;
00673 
00674     int j = yyLine->length();
00675     while ( j > 0 && hook < 0 ) {
00676         j--;
00677         QChar ch = (*yyLine)[j];
00678 
00679         switch ( ch.unicode() ) {
00680         case ')':
00681         case ']':
00682         delimDepth++;
00683         break;
00684         case '}':
00685         braceDepth++;
00686         break;
00687         case '(':
00688         case '[':
00689         delimDepth--;
00690         /*
00691           An unclosed delimiter is a good place to align at,
00692           at least for some styles (including Trolltech's).
00693         */
00694         if ( delimDepth == -1 )
00695             hook = j;
00696         break;
00697         case '{':
00698         braceDepth--;
00699         /*
00700           A left brace followed by other stuff on the same
00701           line is typically for an enum or an initializer.
00702           Such a brace must be treated just like the other
00703           delimiters.
00704         */
00705         if ( braceDepth == -1 ) {
00706             if ( j < (int) yyLine->length() - 1 ) {
00707             hook = j;
00708             } else {
00709             return 0; // shouldn't happen
00710             }
00711         }
00712         break;
00713         case '=':
00714         /*
00715           An equal sign is a very natural alignment hook
00716           because it's usually the operator with the lowest
00717           precedence in statements it appears in. Case in
00718           point:
00719 
00720               int x = 1 +
00721                   2;
00722 
00723           However, we have to beware of constructs such as
00724           default arguments and explicit enum constant
00725           values:
00726 
00727               void foo( int x = 0,
00728                 int y = 0 );
00729 
00730           And not
00731 
00732               void foo( int x = 0,
00733                       int y = 0 );
00734 
00735           These constructs are caracterized by a ',' at the
00736           end of the unfinished lines or by unbalanced
00737           parentheses.
00738         */
00739         if ( j == 0 || QString("!=<>").find((*yyLine)[j - 1]) == -1 ) {
00740             if ( braceDepth == 0 && delimDepth == 0 &&
00741              j < (int) yyLine->length() - 1 &&
00742              !yyLine->endsWith(QChar(',')) &&
00743              (yyLine->contains(QChar('(')) ==
00744               yyLine->contains(QChar(')'))) )
00745             hook = j;
00746         }
00747         }
00748     }
00749 
00750     if ( hook >= 0 ) {
00751         /*
00752           Yes, we have a delimiter or an operator to align
00753           against! We don't really align against it, but rather
00754           against the following token, if any. In this example,
00755           the following token is "11":
00756 
00757           int x = ( 11 +
00758                 2 );
00759 
00760           If there is no such token, we use a continuation indent:
00761 
00762           static QRegExp foo( QString(
00763               "foo foo foo foo foo foo foo foo foo") );
00764         */
00765         hook++;
00766         while ( hook < (int) yyLine->length() ) {
00767         if ( !(*yyLine)[hook].isSpace() )
00768             return columnForIndex( *yyLine, hook );
00769         hook++;
00770         }
00771         return indentOfLine( *yyLine ) + ppContinuationIndentSize;
00772     }
00773 
00774     if ( braceDepth != 0 )
00775         break;
00776 
00777     /*
00778       The line's delimiters are balanced. It looks like a
00779       continuation line or something.
00780     */
00781     if ( delimDepth == 0 ) {
00782         if ( isContinuationLine() || leftBraceFollowed ) {
00783         /*
00784           We have
00785 
00786               x = 1 +
00787               2 +
00788               3;
00789 
00790           or
00791 
00792               int main()
00793               {
00794 
00795           The "3;" should fall right under the "2;", and the
00796           "{" under the "int".
00797         */
00798         return indentOfLine( *yyLine );
00799         } else {
00800         /*
00801           We have
00802 
00803               stream << 1 +
00804                   2;
00805 
00806           We could, but we don't, try to analyze which
00807           operator has precedence over which and so on, in
00808           which case we could give the excellent result
00809 
00810               stream << 1 +
00811                 2;
00812 
00813           (We do have a special trick above for the
00814           assignment operator above, though.)
00815         */
00816         return indentOfLine( *yyLine ) + ppContinuationIndentSize;
00817         }
00818     }
00819 
00820     if ( !readLine() )
00821         break;
00822     }
00823     return 0;
00824 }
00825 
00826 /*
00827   Returns the recommended indent for the bottom line of yyProgram if
00828   that line is standalone (or should be indented likewise).
00829 
00830   Indenting a standalone line is tricky, mostly because of braceless
00831   control statements. Grossly, we are looking backwards for a special
00832   line, a "hook line", that we can use as a starting point to indent,
00833   and then modify the indentation level according to the braces met
00834   along the way to that hook.
00835 
00836   Let's consider a few examples. In all cases, we want to indent the
00837   bottom line.
00838 
00839   Example 1:
00840 
00841       x = 1;
00842       y = 2;
00843 
00844   The hook line is "x = 1;". We met 0 opening braces and 0 closing
00845   braces. Therefore, "y = 2;" inherits the indent of "x = 1;".
00846 
00847   Example 2:
00848 
00849       if ( x ) {
00850       y;
00851 
00852   The hook line is "if ( x ) {". No matter what precedes it, "y;" has
00853   to be indented one level deeper than the hook line, since we met one
00854   opening brace along the way.
00855 
00856   Example 3:
00857 
00858       if ( a )
00859       while ( b ) {
00860           c;
00861       }
00862       d;
00863 
00864   To indent "d;" correctly, we have to go as far as the "if ( a )".
00865   Compare with
00866 
00867       if ( a ) {
00868       while ( b ) {
00869           c;
00870       }
00871       d;
00872 
00873   Still, we're striving to go back as little as possible to accomodate
00874   people with irregular indentation schemes. A hook line near at hand
00875   is much more reliable than a remote one.
00876 */
00877 static int indentForStandaloneLine()
00878 {
00879     for ( int i = 0; i < SmallRoof; i++ ) {
00880     if ( !*yyLeftBraceFollows ) {
00881         YY_SAVE();
00882 
00883         if ( matchBracelessControlStatement() ) {
00884         /*
00885           The situation is this, and we want to indent "z;":
00886 
00887               if ( x &&
00888                y )
00889               z;
00890 
00891           yyLine is "if ( x &&".
00892         */
00893         return indentOfLine( *yyLine ) + ppIndentSize;
00894         }
00895         YY_RESTORE();
00896     }
00897 
00898     if ( yyLine->endsWith(QChar(';')) ||
00899          yyLine->contains(QChar('{')) > 0 ) {
00900         /*
00901           The situation is possibly this, and we want to indent
00902           "z;":
00903 
00904           while ( x )
00905               y;
00906           z;
00907 
00908           We return the indent of "while ( x )". In place of "y;",
00909           any arbitrarily complex compound statement can appear.
00910         */
00911 
00912         if ( *yyBraceDepth > 0 ) {
00913         do {
00914             if ( !readLine() )
00915             break;
00916         } while ( *yyBraceDepth > 0 );
00917         }
00918 
00919         LinizerState hookState;
00920 
00921         if ( *yyBraceDepth == 0 ) {
00922         while ( isContinuationLine() )
00923             readLine();
00924         hookState = *yyLinizerState;
00925 
00926         readLine();
00927         if ( *yyBraceDepth == 0 ) {
00928             do {
00929             if ( !matchBracelessControlStatement() )
00930                 break;
00931             hookState = *yyLinizerState;
00932             } while ( readLine() );
00933         }
00934         } else {
00935         hookState = *yyLinizerState;
00936         }
00937 
00938         *yyLinizerState = hookState;
00939 
00940         while ( isContinuationLine() )
00941         readLine();
00942 
00943         /*
00944           Never trust lines containing only '{' or '}', as some
00945           people (Richard Stallman) format them weirdly.
00946         */
00947         if ( yyLine->stripWhiteSpace().length() > 1 )
00948         return indentOfLine( *yyLine ) - *yyBraceDepth * ppIndentSize;
00949     }
00950 
00951     if ( !readLine() )
00952         break;
00953     }
00954     return 0;
00955 }
00956 
00957 /*
00958   Constructs global variables used by the indenter.
00959 */
00960 static void initializeIndenter()
00961 {
00962     literal = new QRegExp( QString("([\"'])(?:\\\\.|[^\\\\])*\\1") );
00963     literal->setMinimal( TRUE );
00964     label = new QRegExp( QString(
00965         "^\\s*((?:case\\b[^:]+|[a-zA-Z_0-9]+):)(?!:)") );
00966     inlineCComment = new QRegExp( QString("/\\*.*\\*/") );
00967     inlineCComment->setMinimal( TRUE );
00968     braceX = new QRegExp( QString("^\\s*\\}\\s*(?:else|catch)\\b") );
00969     iflikeKeyword = new QRegExp( QString("\\b(?:catch|do|for|if|while)\\b") );
00970 
00971     yyLinizerState = new LinizerState;
00972 }
00973 
00974 /*
00975   Destroys global variables used by the indenter.
00976 */
00977 static void terminateIndenter()
00978 {
00979     delete literal;
00980     delete label;
00981     delete inlineCComment;
00982     delete braceX;
00983     delete iflikeKeyword;
00984     delete yyLinizerState;
00985 }
00986 
00987 /*
00988   Returns the recommended indent for the bottom line of program.
00989   Unless null, typedIn stores the character of yyProgram that
00990   triggered reindentation.
00991 
00992   This function works better if typedIn is set properly; it is
00993   slightly more conservative if typedIn is completely wild, and
00994   slighly more liberal if typedIn is always null. The user might be
00995   annoyed by the liberal behavior.
00996 */
00997 int indentForBottomLine( const QStringList& program, QChar typedIn )
00998 {
00999     if ( program.isEmpty() )
01000     return 0;
01001 
01002     initializeIndenter();
01003 
01004     yyProgram = new QStringList( program );
01005     startLinizer();
01006 
01007     const QString& bottomLine = program.last();
01008     QChar firstCh = firstNonWhiteSpace( bottomLine );
01009     int indent;
01010 
01011     if ( bottomLineStartsInCComment() ) {
01012     /*
01013       The bottom line starts in a C-style comment. Indent it
01014       smartly, unless the user has already played around with it,
01015       in which case it's better to leave her stuff alone.
01016     */
01017     if ( isOnlyWhiteSpace(bottomLine) ) {
01018         indent = indentWhenBottomLineStartsInCComment();
01019     } else {
01020         indent = indentOfLine( bottomLine );
01021     }
01022     } else if ( okay(typedIn, QChar('#')) && firstCh == QChar('#') ) {
01023     /*
01024       Preprocessor directives go flush left.
01025     */
01026     indent = 0;
01027     } else {
01028     if ( isUnfinishedLine() ) {
01029         indent = indentForContinuationLine();
01030     } else {
01031         indent = indentForStandaloneLine();
01032     }
01033 
01034     if ( okay(typedIn, QChar('}')) && firstCh == QChar('}') ) {
01035         /*
01036           A closing brace is one level more to the left than the
01037           code it follows.
01038         */
01039         indent -= ppIndentSize;
01040     } else if ( okay(typedIn, QChar(':')) ) {
01041         QRegExp caseLabel( QString(
01042             "\\s*(?:case\\b[^:]+|default\\s+):\\s*") );
01043 
01044         if ( caseLabel.exactMatch(bottomLine) ) {
01045         /*
01046           Move a case label one level to the left, but only
01047           if the user did not play around with it yet. Some
01048           users have exotic tastes in the matter, and most
01049           users probably are not patient enough to wait for
01050           the final ':' to format their code properly.
01051 
01052           We don't attempt the same for goto labels, as the
01053           user might be in the middle of "foo::bar".
01054         */
01055         if ( indentOfLine(bottomLine) <= indent )
01056             indent -= ppIndentSize;
01057         else
01058             indent = indentOfLine( bottomLine );
01059         }
01060     }
01061     }
01062     delete yyProgram;
01063     terminateIndenter();
01064     return QMAX( 0, indent );
01065 }
01066 
01067 #ifdef Q_TEST_YYINDENT
01068 /*
01069   Test driver.
01070 */
01071 
01072 #include <qfile.h>
01073 #include <qtextstream.h>
01074 
01075 #include <errno.h>
01076 
01077 static QString fileContents( const QString& fileName )
01078 {
01079     QFile f( fileName );
01080     if ( !f.open(IO_ReadOnly) ) {
01081     qWarning( "yyindent error: Cannot open file '%s' for reading: %s",
01082           fileName.latin1(), strerror(errno) );
01083     return QString::null;
01084     }
01085 
01086     QTextStream t( &f );
01087     QString contents = t.read();
01088     f.close();
01089     if ( contents.isEmpty() )
01090     qWarning( "yyindent error: File '%s' is empty", fileName.latin1() );
01091     return contents;
01092 }
01093 
01094 int main( int argc, char **argv )
01095 {
01096     if ( argc != 2 ) {
01097     qWarning( "usage: yyindent file.cpp" );
01098     return 1;
01099     }
01100 
01101     QString code = fileContents( QString(argv[1]) );
01102     QStringList program = QStringList::split( QChar('\n'), code, TRUE );
01103     QStringList p;
01104     QString out;
01105 
01106     while ( !program.isEmpty() && program.last().stripWhiteSpace().isEmpty() )
01107     program.remove( program.fromLast() );
01108 
01109     QStringList::ConstIterator line = program.begin();
01110     while ( line != program.end() ) {
01111     p.push_back( *line );
01112     QChar typedIn = firstNonWhiteSpace( *line );
01113     if ( p.last().endsWith(QChar(':')) )
01114         typedIn = QChar( ':' );
01115     int indent = indentForBottomLine( p, typedIn );
01116 
01117     if ( !(*line).stripWhiteSpace().isEmpty() ) {
01118         for ( int j = 0; j < indent; j++ )
01119         out += QChar( ' ' );
01120         out += (*line).stripWhiteSpace();
01121     }
01122     out += QChar( '\n' );
01123     line++;
01124     }
01125 
01126     while ( out.endsWith(QChar('\n')) )
01127     out.truncate( out.length() - 1 );
01128 
01129     printf( "%s\n", out.latin1() );
01130     return 0;
01131 }
01132 #endif
KDE Logo
This file is part of the documentation for KDevelop Version 3.1.2.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Tue Feb 22 09:22:24 2005 by doxygen 1.3.9.1 written by Dimitri van Heesch, © 1997-2003