Package epydoc :: Package docwriter :: Module html_colorize
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docwriter.html_colorize

   1  # 
   2  # epydoc.html: HTML colorizers 
   3  # Edward Loper 
   4  # 
   5  # Created [10/16/02 09:49 PM] 
   6  # $Id: html_colorize.py 1195 2006-04-09 18:12:40Z edloper $ 
   7  # 
   8   
   9  """ 
  10  Functions to produce colorized HTML code for various objects. 
  11  Currently, C{colorize} defines functions to colorize regular 
  12  expressions and doctest blocks. 
  13   
  14  @group Regular Expression Tags: *_TAG 
  15  """ 
  16  __docformat__ = 'epytext en' 
  17   
  18  import sys, sre_parse, sre, re, codecs 
  19  import sre_constants 
  20  from epydoc import log 
  21  from epydoc.util import decode_with_backslashreplace, plaintext_to_html 
  22  from epydoc.util import py_src_filename 
  23  from epydoc.apidoc import * 
  24   
  25  ###################################################################### 
  26  ## Regular expression colorizer 
  27  ###################################################################### 
  28   
  29  # HTML tags for colorize_re 
  30   
  31  RE_TAG         = 're' 
  32  r'''The CSS class for colorizing regular expressions.''' 
  33   
  34  ANY_TAG        = 're-char' 
  35  r'''The CSS class for colorizing C{"."} in regexps.''' 
  36   
  37  ESCAPE_TAG     = 're-char' 
  38  r'''The CSS class for colorizing escaped characters (such as C{r"\("}) 
  39  in regexps.''' 
  40   
  41  CATEGORY_TAG   = 're-char' 
  42  r'''The CSS class for colorizing character categories (such as 
  43  C{r"\d"})) in regexps.''' 
  44   
  45  AT_TAG         = 're-char' 
  46  r'''The CSS class for colorizing character locations (such as C{"^"}) 
  47  in regexps.''' 
  48   
  49  BRANCH_TAG     = 're-op' 
  50  r'''The CSS class for colorizing C{"|"} in regexps.''' 
  51   
  52  STAR_TAG       = 're-op' 
  53  r'''The CSS class for colorizing C{"*"} and C{"*?"} in regexps.''' 
  54   
  55  PLUS_TAG       = 're-op' 
  56  r'''The CSS class for colorizing C{"+"} and C{"+?"} in regexps.''' 
  57   
  58  QMRK_TAG       = 're-op' 
  59  r'''The CSS class for colorizing C{"?"} and C{"??"} in regexps.''' 
  60   
  61  RNG_TAG        = 're-op' 
  62  r'''The CSS class for colorizing repeat ranges (such as C{"a{3,8}"}) in 
  63  regexps.''' 
  64   
  65  PAREN_TAG      = 're-group' 
  66  r'''The CSS class for colorizing parenthases in regexps.''' 
  67   
  68  CHOICE_TAG     = 're-group' 
  69  r'''The CSS class for colorizing character choice expressions (such as 
  70  C{"[abc]"}) in regexps.''' 
  71   
  72  ASSERT_TAG     = 're-group' 
  73  r'''The CSS class for colorizing assertions (such as C{"(?=abc)"}) in 
  74  regexps.''' 
  75   
  76  REF_TAG        = 're-ref' 
  77  r'''The CSS class for colorizing references (such as C{r"\1"}) in 
  78  regexps.''' 
  79   
80 -def colorize_re(regexp):
81 r""" 82 @return: The HTML code for a colorized version of the pattern for 83 the given SRE regular expression. If C{colorize_re} can't 84 figure out how to colorize the regexp, then it will simply return 85 the (uncolorized) pattern, with C{'&'}, C{'<'}, and C{'>'} 86 escaped as HTML entities. The colorized expression includes 87 spans with the following css classes: 88 - X{re}: The entire regular expression. 89 - X{re-char}: Special characters (such as C{'.'}, C{'\('}), 90 character categories (such as C{'\w'}), and locations 91 (such as C{'\b'}). 92 - X{re-op}: Operators (such as C{'*'} and C{'|'}). 93 - X{re-group}: Grouping constructs (such as C{'(...)'}). 94 - X{re-ref} References (such as C{'\1'}) 95 @rtype: C{string} 96 @param regexp: The regular expression to colorize. 97 @type regexp: C{SRE_Pattern} or C{string} 98 @raise sre_constants.error: If regexp is not a valid regular 99 expression. 100 """ 101 if isinstance(regexp, str): 102 pat = decode_with_backslashreplace(regexp) 103 tree = sre_parse.parse(pat) 104 105 elif isinstance(regexp, unicode): 106 tree = sre_parse.parse(regexp) 107 108 elif hasattr(regexp, 'pattern') and hasattr(regexp, 'flags'): 109 if isinstance(regexp.pattern, str): 110 pat = decode_with_backslashreplace(regexp.pattern) 111 tree = sre_parse.parse(pat, regexp.flags) 112 113 elif isinstance(regexp.pattern, unicode): 114 tree = sre_parse.parse(regexp.pattern, regexp.flags) 115 116 else: 117 raise TypeError("Bad regexp object -- pattern is not a string") 118 else: 119 raise TypeError("Expected a regexp or a string") 120 121 return ('<span class="%s">%s</span>' % 122 (RE_TAG, _colorize_re(tree, 1)))
123
124 -def _colorize_re(tree, noparen=0):
125 """ 126 Recursively descend the given regexp parse tree to produce the 127 HTML code for a colorized version of the regexp. 128 129 @param tree: The regexp parse tree for the regexp that should be 130 colorized. 131 @type tree: L{sre_parse.SubPattern} 132 @param noparen: If true, then don't include parenthases around the 133 expression in C{tree}, even if it contains multiple elements. 134 @type noparen: C{boolean} 135 @return: The HTML code for a colorized version of C{tree} 136 @rtype: C{string} 137 """ 138 result = [] 139 out = result.append 140 141 if len(tree) > 1 and not noparen: 142 out('<span class="%s">(</span>' % PAREN_TAG) 143 for elt in tree: 144 op = elt[0] 145 args = elt[1] 146 147 if op == sre_constants.LITERAL: 148 c = unichr(args) 149 if c == '\t': out(r'<span class="%s">\t</span>' % ESCAPE_TAG) 150 elif c == '\n': out(r'<span class="%s">\n</span>' % ESCAPE_TAG) 151 elif c == '\r': out(r'<span class="%s">\r</span>' % ESCAPE_TAG) 152 elif c == '\f': out(r'<span class="%s">\f</span>' % ESCAPE_TAG) 153 elif c == '\v': out(r'<span class="%s">\v</span>' % ESCAPE_TAG) 154 elif ord(c)<32 or ord(c)>=127: 155 if c < 256: template = r'<span class="%s">\x%02x</span>' 156 else: template = r'<span class="%s">\u%04x</span>' 157 out(template % (ESCAPE_TAG,ord(c))) 158 elif c in '.^$\\*+?{}[]|()': 159 out(r'<span class="%s">\%c</span>' % (ESCAPE_TAG, c)) 160 else: out(plaintext_to_html(unichr(args))) 161 continue 162 163 elif op == sre_constants.ANY: 164 out('<span class="%s">.</span>' % ANY_TAG) 165 166 elif op == sre_constants.BRANCH: 167 if args[0] is not None: 168 raise ValueError('Branch expected None arg but got %s' 169 % args[0]) 170 VBAR = '<span class="%s">|</span>' % BRANCH_TAG 171 out(VBAR.join([_colorize_re(item,1) for item in args[1]])) 172 173 elif op == sre_constants.IN: 174 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): 175 out(_colorize_re(args)) 176 else: 177 out('<span class="%s">[</span>' % CHOICE_TAG) 178 out(_colorize_re(args, 1)) 179 out('<span class="%s">]</span>' % CHOICE_TAG) 180 181 elif op == sre_constants.CATEGORY: 182 out('<span class="%s">' % CATEGORY_TAG) 183 if args == sre_constants.CATEGORY_DIGIT: out(r'\d') 184 elif args == sre_constants.CATEGORY_NOT_DIGIT: out(r'\D') 185 elif args == sre_constants.CATEGORY_SPACE: out(r'\s') 186 elif args == sre_constants.CATEGORY_NOT_SPACE: out(r'\S') 187 elif args == sre_constants.CATEGORY_WORD: out(r'\w') 188 elif args == sre_constants.CATEGORY_NOT_WORD: out(r'\W') 189 else: raise ValueError('Unknown category %s' % args) 190 out('</span>') 191 192 elif op == sre_constants.AT: 193 out('<span class="%s">' % AT_TAG) 194 if args == sre_constants.AT_BEGINNING_STRING: out(r'\A') 195 elif args == sre_constants.AT_BEGINNING: out(r'^') 196 elif args == sre_constants.AT_END: out(r'$') 197 elif args == sre_constants.AT_BOUNDARY: out(r'\b') 198 elif args == sre_constants.AT_NON_BOUNDARY: out(r'\B') 199 elif args == sre_constants.AT_END_STRING: out(r'\Z') 200 else: raise ValueError('Unknown position %s' % args) 201 out('</span>') 202 203 elif op == sre_constants.MAX_REPEAT: 204 min = args[0] 205 max = args[1] 206 if max == sre_constants.MAXREPEAT: 207 if min == 0: 208 out(_colorize_re(args[2])) 209 out('<span class="%s">*</span>' % STAR_TAG) 210 elif min == 1: 211 out(_colorize_re(args[2])) 212 out('<span class="%s">+</span>' % PLUS_TAG) 213 else: 214 out(_colorize_re(args[2])) 215 out('<span class="%s">{%d,}</span>' % (RNG_TAG, min)) 216 elif min == 0: 217 if max == 1: 218 out(_colorize_re(args[2])) 219 out('<span class="%s">?</span>' % QMRK_TAG) 220 else: 221 out(_colorize_re(args[2])) 222 out('<span class="%s">{,%d}</span>' % (RNG_TAG, max)) 223 elif min == max: 224 out(_colorize_re(args[2])) 225 out('<span class="%s">{%d}</span>' % (RNG_TAG, max)) 226 else: 227 out(_colorize_re(args[2])) 228 out('<span class="%s">{%d,%d}</span>' % (RNG_TAG, min, max)) 229 230 elif op == sre_constants.MIN_REPEAT: 231 min = args[0] 232 max = args[1] 233 if max == sre_constants.MAXREPEAT: 234 if min == 0: 235 out(_colorize_re(args[2])) 236 out('<span class="%s">*?</span>' % STAR_TAG) 237 elif min == 1: 238 out(_colorize_re(args[2])) 239 out('<span class="%s">+?</span>' % PLUS_TAG) 240 else: 241 out(_colorize_re(args[2])) 242 out('<span class="%s">{%d,}?</span>' % (RNG_TAG, min)) 243 elif min == 0: 244 if max == 1: 245 out(_colorize_re(args[2])) 246 out('<span class="%s">??</span>' % QMRK_TAG) 247 else: 248 out(_colorize_re(args[2])) 249 out('<span class="%s">{,%d}?</span>' % (RNG_TAG, max)) 250 elif min == max: 251 out(_colorize_re(args[2])) 252 out('<span class="%s">{%d}?</span>' % (RNG_TAG, max)) 253 else: 254 out(_colorize_re(args[2])) 255 out('<span class="%s">{%d,%d}?</span>'%(RNG_TAG, min, max)) 256 257 elif op == sre_constants.SUBPATTERN: 258 if args[0] is None: 259 out('<span class="%s">(?:</span>' % PAREN_TAG) 260 elif isinstance(args[0], (int, long)): 261 # This is cheating: 262 out('<span class="%s">(</span>' % PAREN_TAG) 263 else: 264 out('<span class="%s">(?P&lt;</span>' % PAREN_TAG) 265 out('<span class="%s">%s</span>' % 266 (REF_TAG, plaintext_to_html(args[0]))) 267 out('<span class="%s">&gt;</span>' % PAREN_TAG) 268 out(_colorize_re(args[1], 1)) 269 out('<span class="%s">)</span>' % PAREN_TAG) 270 271 elif op == sre_constants.GROUPREF: 272 out('<span class="%s">\\%d</span>' % (REF_TAG, args)) 273 274 elif op == sre_constants.RANGE: 275 start = _colorize_re( ((sre_constants.LITERAL, args[0]),) ) 276 end = _colorize_re( ((sre_constants.LITERAL, args[1]),) ) 277 out('%s<span class="%s">-</span>%s' % (start, CHOICE_TAG, end)) 278 279 elif op == sre_constants.NEGATE: 280 out('<span class="%s">^</span>' % CHOICE_TAG) 281 282 elif op == sre_constants.ASSERT: 283 if args[0]: out('<span class="%s">(?=</span>' % ASSERT_TAG) 284 else: out('<span class="%s">(?&lt;=</span>' % ASSERT_TAG) 285 out(''.join(_colorize_re(args[1], 1))) 286 out('<span class="%s">)</span>' % ASSERT_TAG) 287 288 elif op == sre_constants.ASSERT_NOT: 289 if args[0]: out('<span class="%s">(?!</span>' % ASSERT_TAG) 290 else: out('<span class="%s">(?&lt;!</span>' % ASSERT_TAG) 291 out(''.join(_colorize_re(args[1], 1))) 292 out('<span class="%s">)</span>' % ASSERT_TAG) 293 294 elif op == sre_constants.NOT_LITERAL: 295 lit = _colorize_re( ((sre_constants.LITERAL, args),) ) 296 out('<span class="%s">[^</span>%s<span class="%s">]</span>' % 297 (CHOICE_TAG, lit, CHOICE_TAG)) 298 else: 299 log.error("Error colorizing regexp: unknown elt %r" % elt) 300 if len(tree) > 1 and not noparen: 301 out('<span class="%s">)</span>' % PAREN_TAG) 302 return u''.join(result)
303 304 ###################################################################### 305 ## Doctest block colorizer 306 ###################################################################### 307 308 # Regular expressions for colorize_doctestblock 309 _KEYWORDS = ["del", "from", "lambda", "return", "and", "or", "is", 310 "global", "not", "try", "break", "else", "if", "elif", 311 "while", "class", "except", "import", "pass", "raise", 312 "continue", "finally", "in", "print", "def", "for"] 313 _KEYWORD = '|'.join([r'(\b%s\b)' % _KW for _KW in _KEYWORDS]) 314 _STRING = '|'.join([r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))', 315 r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"]) 316 _STRING = _STRING.replace('"', '&quot;') # Careful with this! 317 _COMMENT = '(#.*?$)' 318 _PROMPT = r'(^\s*(&gt;&gt;&gt;|\.\.\.)(\s|$))' 319 320 _PROMPT_RE = re.compile(_PROMPT, re.MULTILINE | re.DOTALL) 321 '''The regular expression used to find Python prompts (">>>" and 322 "...") in doctest blocks.''' 323 324 _DOCTEST_RE = re.compile('|'.join([_STRING, _COMMENT, _KEYWORD]), 325 re.MULTILINE | re.DOTALL) 326 '''The regular expression used by L{_doctest_sub} to colorize doctest 327 blocks.''' 328 329 del _KEYWORDS, _KEYWORD, _STRING, _COMMENT, _PROMPT, _KW 330
331 -def colorize_doctestblock(str):
332 """ 333 @return: The HTML code for a colorized version of a given doctest 334 block. In particular, this identifies spans with the 335 following css classes: 336 - X{py-src}: The Python source code. 337 - X{py-prompt}: The ">>>" and "..." prompts. 338 - X{py-string}: Strings in the Python source code. 339 - X{py-comment}: Comments in the Python source code. 340 - X{py-keyword}: Keywords in the Python source code. 341 - X{py-output}: Python's output (lines without a prompt). 342 The string that is passed to colorize_doctest should already 343 have HTML characters escaped (e.g., C{">"} should be encoded 344 as C{"&gt;"}). 345 @type str: C{string} 346 @param str: The contents of the doctest block to be colorized. 347 @rtype: C{string} 348 """ 349 pysrc = pyout = '' 350 outstr = '' 351 for line in str.split('\n')+['\n']: 352 if _PROMPT_RE.match(line): 353 if pyout: 354 outstr += ('<span class="py-output">%s</span>\n\n' % 355 pyout.strip()) 356 pyout = '' 357 pysrc += line+'\n' 358 else: 359 if pysrc: 360 # Prompt over-rides other colors (incl string) 361 pysrc = _DOCTEST_RE.sub(_doctest_sub, pysrc) 362 pysrc = _PROMPT_RE.sub(r'<span class="py-prompt">'+ 363 r'\1</span>', pysrc) 364 outstr += ('<span class="py-src">%s</span>\n' 365 % pysrc.strip()) 366 pysrc = '' 367 pyout += line+'\n' 368 if pyout.strip(): 369 outstr += ('<span class="py-output">%s</span>\n' % 370 pyout.strip()) 371 return outstr.strip()
372
373 -def _doctest_sub(match):
374 """ 375 This helper function is used by L{colorize_doctestblock} to 376 add colorization to matching expressions. It is called by 377 C{_DOCTEST_RE.sub} with an expression that matches 378 C{_DOCTEST_RE}. 379 380 @return: The HTML code for the colorized expression. 381 @rtype: C{string} 382 @see: L{_DOCTEST_RE} 383 """ 384 str = match.group() 385 if str[:1] == "'" or str[:6] == '&quot;': 386 return '<span class="py-string">%s</span>' % str 387 elif str[:1] in '#': 388 return '<span class="py-comment">%s</span>' % str 389 else: 390 return '<span class="py-keyword">%s</span>' % str
391 392 ###################################################################### 393 ## Python source colorizer 394 ###################################################################### 395 """ 396 Goals: 397 - colorize tokens appropriately (using css) 398 - optionally add line numbers 399 - 400 """ 401 402 #: Javascript code for the PythonSourceColorizer 403 PYSRC_JAVASCRIPTS = '''\ 404 function expand(id) { 405 var elt = document.getElementById(id+"-expanded"); 406 if (elt) elt.style.display = "block"; 407 var elt = document.getElementById(id+"-expanded-linenums"); 408 if (elt) elt.style.display = "block"; 409 var elt = document.getElementById(id+"-collapsed"); 410 if (elt) { elt.innerHTML = ""; elt.style.display = "none"; } 411 var elt = document.getElementById(id+"-collapsed-linenums"); 412 if (elt) { elt.innerHTML = ""; elt.style.display = "none"; } 413 var elt = document.getElementById(id+"-toggle"); 414 if (elt) { elt.innerHTML = "-"; } 415 } 416 417 function collapse(id) { 418 var elt = document.getElementById(id+"-expanded"); 419 if (elt) elt.style.display = "none"; 420 var elt = document.getElementById(id+"-expanded-linenums"); 421 if (elt) elt.style.display = "none"; 422 var elt = document.getElementById(id+"-collapsed-linenums"); 423 if (elt) { elt.innerHTML = "<br/>"; elt.style.display="block"; } 424 var elt = document.getElementById(id+"-toggle"); 425 if (elt) { elt.innerHTML = "+"; } 426 var elt = document.getElementById(id+"-collapsed"); 427 if (elt) { 428 elt.style.display = "block"; 429 430 var indent = elt.indent; 431 var pad = elt.pad; 432 var s = "<span class=\'lineno\'>"; 433 for (var i=0; i<pad.length; i++) { s += "&nbsp;" } 434 s += "</span>"; 435 s += "&nbsp;&nbsp;<span class=\'py-line\'>"; 436 for (var i=0; i<indent.length; i++) { s += "&nbsp;" } 437 s += "<a href=\'#\' onclick=\'expand(\\"" + id; 438 s += "\\");return false\'>...</a></span><br />"; 439 elt.innerHTML = s; 440 } 441 } 442 443 function toggle(id) { 444 elt = document.getElementById(id+"-toggle"); 445 if (elt.innerHTML == "-") 446 collapse(id); 447 else 448 expand(id); 449 } 450 function highlight(id) { 451 var elt = document.getElementById(id+"-def"); 452 if (elt) elt.className = "highlight-hdr"; 453 var elt = document.getElementById(id+"-expanded"); 454 if (elt) elt.className = "highlight"; 455 var elt = document.getElementById(id+"-collapsed"); 456 if (elt) elt.className = "highlight"; 457 } 458 459 function num_lines(s) { 460 var n = 1; 461 var pos = s.indexOf("\\n"); 462 while ( pos > 0) { 463 n += 1; 464 pos = s.indexOf("\\n", pos+1); 465 } 466 return n; 467 } 468 469 // Collapse all blocks that mave more than `min_lines` lines. 470 function collapse_all(min_lines) { 471 var elts = document.getElementsByTagName("div"); 472 for (var i=0; i<elts.length; i++) { 473 var elt = elts[i]; 474 var split = elt.id.indexOf("-"); 475 if (split > 0) 476 if (elt.id.substring(split, elt.id.length) == "-expanded") 477 if (num_lines(elt.innerHTML) > min_lines) 478 collapse(elt.id.substring(0, split)); 479 } 480 } 481 482 function expandto(href) { 483 var start = href.indexOf("#")+1; 484 if (start != 0) { 485 if (href.substring(start, href.length) != "-") { 486 collapse_all(4); 487 pos = href.indexOf(".", start); 488 while (pos != -1) { 489 var id = href.substring(start, pos); 490 expand(id); 491 pos = href.indexOf(".", pos+1); 492 } 493 var id = href.substring(start, href.length); 494 expand(id); 495 highlight(id); 496 } 497 } 498 } 499 500 function kill_doclink(id) { 501 if (id) { 502 var parent = document.getElementById(id); 503 parent.removeChild(parent.childNodes.item(0)); 504 } 505 else if (!this.contains(event.toElement)) { 506 var parent = document.getElementById(this.parentID); 507 parent.removeChild(parent.childNodes.item(0)); 508 } 509 } 510 511 function doclink(id, name, targets) { 512 var elt = document.getElementById(id); 513 514 // If we already opened the box, then destroy it. 515 // (This case should never occur, but leave it in just in case.) 516 if (elt.childNodes.length > 1) { 517 elt.removeChild(elt.childNodes.item(0)); 518 } 519 else { 520 // The outer box: relative + inline positioning. 521 var box1 = document.createElement("div"); 522 box1.style.position = "relative"; 523 box1.style.display = "inline"; 524 box1.style.top = 0; 525 box1.style.left = 0; 526 527 // A shadow for fun 528 var shadow = document.createElement("div"); 529 shadow.style.position = "absolute"; 530 shadow.style.left = "-1.3em"; 531 shadow.style.top = "-1.3em"; 532 shadow.style.background = "#404040"; 533 534 // The inner box: absolute positioning. 535 var box2 = document.createElement("div"); 536 box2.style.position = "relative"; 537 box2.style.border = "1px solid #a0a0a0"; 538 box2.style.left = "-.2em"; 539 box2.style.top = "-.2em"; 540 box2.style.background = "white"; 541 box2.style.padding = ".3em .4em .3em .4em"; 542 box2.style.fontStyle = "normal"; 543 box2.onmouseout=kill_doclink; 544 box2.parentID = id; 545 546 var links = ""; 547 target_list = targets.split(","); 548 for (var i=0; i<target_list.length; i++) { 549 var target = target_list[i].split("="); 550 links += "<li><a href=\'" + target[1] + 551 "\' style=\'text-decoration:none\'>" + 552 target[0] + "</a></li>"; 553 } 554 555 // Put it all together. 556 elt.insertBefore(box1, elt.childNodes.item(0)); 557 //box1.appendChild(box2); 558 box1.appendChild(shadow); 559 shadow.appendChild(box2); 560 box2.innerHTML = 561 "Which <b>"+name+"</b> do you want to see documentation for?" + 562 "<ul style=\'margin-bottom: 0;\'>" + 563 links + 564 "<li><a href=\'#\' style=\'text-decoration:none\' " + 565 "onclick=\'kill_doclink(\\""+id+"\\");return false;\'>"+ 566 "<i>None of the above</i></a></li></ul>"; 567 } 568 } 569 ''' 570 571 PYSRC_EXPANDTO_JAVASCRIPT = '''\ 572 <script type="text/javascript"> 573 <!-- 574 expandto(location.href); 575 // --> 576 </script> 577 ''' 578 579 580 import tokenize, sys, token, cgi, keyword 581 try: from cStringIO import StringIO 582 except: from StringIO import StringIO 583
585 """ 586 A class that renders a python module's source code into HTML 587 pages. These HTML pages are intended to be provided along with 588 the API documentation for a module, in case a user wants to learn 589 more about a particular object by examining its source code. 590 Links are therefore generated from the API documentation to the 591 source code pages, and from the source code pages back into the 592 API documentation. 593 594 The HTML generated by C{PythonSourceColorizer} has several notable 595 features: 596 597 - CSS styles are used to color tokens according to their type. 598 (See L{CSS_CLASSES} for a list of the different token types 599 that are identified). 600 601 - Line numbers are included to the left of each line. 602 603 - The first line of each class and function definition includes 604 a link to the API source documentation for that object. 605 606 - The first line of each class and function definition includes 607 an anchor that can be used to link directly to that class or 608 function. 609 610 - If javascript is enabled, and the page is loaded using the 611 anchor for a class or function (i.e., if the url ends in 612 C{'#I{<name>}'}), then that class or function will automatically 613 be highlighted; and all other classes and function definition 614 blocks will be 'collapsed'. These collapsed blocks can be 615 expanded by clicking on them. 616 617 - Unicode input is supported (including automatic detection 618 of C{'coding:'} declarations). 619 620 """ 621 #: A look-up table that is used to determine which CSS class 622 #: should be used to colorize a given token. The following keys 623 #: may be used: 624 #: - Any token name (e.g., C{'STRING'}) 625 #: - Any operator token (e.g., C{'='} or C{'@'}). 626 #: - C{'KEYWORD'} -- Python keywords such as C{'for'} and C{'if'} 627 #: - C{'DEFNAME'} -- the name of a class or function at the top 628 #: of its definition statement. 629 #: - C{'BASECLASS'} -- names of base classes at the top of a class 630 #: definition statement. 631 #: - C{'PARAM'} -- function parameters 632 #: - C{'DOCSTRING'} -- docstrings 633 #: - C{'DECORATOR'} -- decorator names 634 #: If no CSS class can be found for a given token, then it won't 635 #: be marked with any CSS class. 636 CSS_CLASSES = { 637 'NUMBER': 'py-number', 638 'STRING': 'py-string', 639 'COMMENT': 'py-comment', 640 'NAME': 'py-name', 641 'KEYWORD': 'py-keyword', 642 'DEFNAME': 'py-def-name', 643 'BASECLASS': 'py-base-class', 644 'PARAM': 'py-param', 645 'DOCSTRING': 'py-docstring', 646 'DECORATOR': 'py-decorator', 647 'OP': 'py-op', 648 '@': 'py-decorator', 649 } 650 651 #: HTML code for the beginning of a collapsable function or class 652 #: definition block. The block contains two <div>...</div> 653 #: elements -- a collapsed version and an expanded version -- and 654 #: only one of these elements is visible at any given time. By 655 #: default, all definition blocks are expanded. 656 #: 657 #: This string should be interpolated with the following values:: 658 #: (name, indentation, name) 659 #: Where C{name} is the anchor name for the function or class; and 660 #: indentation is a string of whitespace used to indent the 661 #: ellipsis marker in the collapsed version. 662 START_DEF_BLOCK = ( 663 '<div id="%s-collapsed" style="display:none;" ' 664 'pad="%s" indent="%s"></div>' 665 '<div id="%s-expanded">') 666 667 #: HTML code for the end of a collapsable function or class 668 #: definition block. 669 END_DEF_BLOCK = '</div>' 670 671 #: A regular expression used to pick out the unicode encoding for 672 #: the source file. 673 UNICODE_CODING_RE = re.compile(r'.*?\n?.*?coding[:=]\s*([-\w.]+)') 674 675 #: A configuration constant, used to determine whether or not to add 676 #: collapsable <div> elements for definition blocks. 677 ADD_DEF_BLOCKS = True 678 679 #: A configuration constant, used to determine whether or not to 680 #: add line numbers. 681 ADD_LINE_NUMBERS = True 682 683 #: A configuration constant, used to determine whether or not to 684 #: add tooltips for linked names. 685 ADD_TOOLTIPS = True 686 687 #: If true, then try to guess which target is appropriate for 688 #: linked names; if false, then always open a div asking the 689 #: user which one they want. 690 GUESS_LINK_TARGETS = True 691
692 - def __init__(self, module_filename, module_name, 693 docindex=None, api_docs=None, url_func=None):
694 """ 695 Create a new HTML colorizer for the specified module. 696 697 @param module_filename: The name of the file containing the 698 module; its text will be loaded from this file. 699 @param module_name: The dotted name of the module; this will 700 be used to create links back into the API source 701 documentation. 702 """ 703 # Get the source version, if possible. 704 try: module_filename = py_src_filename(module_filename) 705 except: pass 706 707 #: The filename of the module we're colorizing. 708 self.module_filename = module_filename 709 710 #: The dotted name of the module we're colorizing. 711 self.module_name = module_name 712 713 self.docindex = docindex 714 715 #: A mapping from short names to lists of ValueDoc. 716 self.name_to_docs = {} 717 for api_doc in api_docs: 718 if (api_doc.canonical_name is not None and 719 url_func(api_doc) is not None): 720 name = api_doc.canonical_name[-1] 721 self.name_to_docs.setdefault(name,set()).add(api_doc) 722 723 #: A function that maps APIDoc -> URL 724 self.url_func = url_func 725 726 #: The index in C{text} of the last character of the last 727 #: token we've processed. 728 self.pos = 0 729 730 #: A list that maps line numbers to character offsets in 731 #: C{text}. In particular, line C{M{i}} begins at character 732 #: C{line_offset[i]} in C{text}. Since line numbers begin at 733 #: 1, the first element of C{line_offsets} is C{None}. 734 self.line_offsets = [] 735 736 #: A list of C{(toktype, toktext)} for all tokens on the 737 #: logical line that we are currently processing. Once a 738 #: complete line of tokens has been collected in C{cur_line}, 739 #: it is sent to L{handle_line} for processing. 740 self.cur_line = [] 741 742 #: A list of the names of the class or functions that include 743 #: the current block. C{context} has one element for each 744 #: level of indentation; C{context[i]} is the name of the class 745 #: or function defined by the C{i}th level of indentation, or 746 #: C{None} if that level of indentation doesn't correspond to a 747 #: class or function definition. 748 self.context = [] 749 750 #: A list of indentation strings for each of the current 751 #: block's indents. I.e., the current total indentation can 752 #: be found by taking C{''.join(self.indents)}. 753 self.indents = [] 754 755 #: The line number of the line we're currently processing. 756 self.lineno = 0 757 758 #: The name of the class or function whose definition started 759 #: on the previous logical line, or C{None} if the previous 760 #: logical line was not a class or function definition. 761 self.def_name = None
762 763
764 - def find_line_offsets(self):
765 """ 766 Construct the L{line_offsets} table from C{self.text}. 767 """ 768 # line 0 doesn't exist; line 1 starts at char offset 0. 769 self.line_offsets = [None, 0] 770 # Find all newlines in `text`, and add an entry to 771 # line_offsets for each one. 772 pos = self.text.find('\n') 773 while pos != -1: 774 self.line_offsets.append(pos+1) 775 pos = self.text.find('\n', pos+1) 776 # Add a final entry, marking the end of the string. 777 self.line_offsets.append(len(self.text))
778
779 - def lineno_to_html(self):
780 template = '%%%ds' % self.linenum_size 781 n = template % self.lineno 782 return '<span class="lineno">%s</span>' % n
783
784 - def colorize(self):
785 """ 786 Return an HTML string that renders the source code for the 787 module that was specified in the constructor. 788 """ 789 # Initialize all our state variables 790 self.pos = 0 791 self.cur_line = [] 792 self.context = [] 793 self.indents = [] 794 self.lineno = 1 795 self.def_name = None 796 797 # Load the module's text. 798 self.text = open(self.module_filename).read() 799 self.text = self.text.expandtabs().rstrip()+'\n' 800 801 # Construct the line_offsets table. 802 self.find_line_offsets() 803 804 num_lines = self.text.count('\n')+1 805 self.linenum_size = len(`num_lines+1`) 806 807 # Call the tokenizer, and send tokens to our `tokeneater()` 808 # method. If anything goes wrong, then fall-back to using 809 # the input text as-is (with no colorization). 810 try: 811 output = StringIO() 812 self.out = output.write 813 tokenize.tokenize(StringIO(self.text).readline, self.tokeneater) 814 html = output.getvalue() 815 except tokenize.TokenError, ex: 816 html = self.text 817 818 # Check for a unicode encoding declaration. 819 m = self.UNICODE_CODING_RE.match(self.text) 820 if m: coding = m.group(1) 821 else: coding = 'iso-8859-1' 822 823 # Decode the html string into unicode, and then encode it back 824 # into ascii, replacing any non-ascii characters with xml 825 # character references. 826 try: 827 html = html.decode(coding).encode('ascii', 'xmlcharrefreplace') 828 except LookupError: 829 coding = 'iso-8859-1' 830 html = html.decode(coding).encode('ascii', 'xmlcharrefreplace') 831 832 # Call expandto. 833 html += PYSRC_EXPANDTO_JAVASCRIPT 834 835 return html
836
837 - def tokeneater(self, toktype, toktext, (srow,scol), (erow,ecol), line):
838 """ 839 A callback function used by C{tokenize.tokenize} to handle 840 each token in the module. C{tokeneater} collects tokens into 841 the C{self.cur_line} list until a complete logical line has 842 been formed; and then calls L{handle_line} to process that line. 843 """ 844 # If we encounter any errors, then just give up. 845 if toktype == token.ERRORTOKEN: 846 raise tokenize.TokenError, toktype 847 848 # Did we skip anything whitespace? If so, add a pseudotoken 849 # for it, with toktype=None. (Note -- this skipped string 850 # might also contain continuation slashes; but I won't bother 851 # to colorize them.) 852 startpos = self.line_offsets[srow] + scol 853 if startpos > self.pos: 854 skipped = self.text[self.pos:startpos] 855 self.cur_line.append( (None, skipped) ) 856 857 # Update our position. 858 self.pos = startpos + len(toktext) 859 860 # Update our current line. 861 self.cur_line.append( (toktype, toktext) ) 862 863 # When we reach the end of a line, process it. 864 if toktype == token.NEWLINE or toktype == token.ENDMARKER: 865 self.handle_line(self.cur_line) 866 self.cur_line = []
867 868 _next_uid = 0 869
870 - def handle_line(self, line):
871 """ 872 Render a single logical line from the module, and write the 873 generated HTML to C{self.out}. 874 875 @param line: A single logical line, encoded as a list of 876 C{(toktype,tokttext)} pairs corresponding to the tokens in 877 the line. 878 """ 879 # def_name is the name of the function or class defined by 880 # this line; or None if no funciton or class is defined. 881 def_name = None 882 883 in_base_list = False 884 in_param_list = False 885 in_param_default = 0 886 at_module_top = (self.lineno == 1) 887 888 ended_def_blocks = 0 889 890 # The html output. 891 if self.ADD_LINE_NUMBERS: 892 s = self.lineno_to_html() 893 self.lineno += 1 894 else: 895 s = '' 896 s += ' <span class="py-line">' 897 898 # Loop through each token, and colorize it appropriately. 899 for i, (toktype, toktext) in enumerate(line): 900 # For each token, determine its css class and whether it 901 # should link to a url. 902 css_class = None 903 url = None 904 tooltip = None 905 onclick = uid = None # these 3 are used together. 906 907 # Is this token the class name in a class definition? If 908 # so, then make it a link back into the API docs. 909 if i>=2 and line[i-2][1] == 'class': 910 in_base_list = True 911 css_class = self.CSS_CLASSES['DEFNAME'] 912 def_name = toktext 913 if None not in self.context: 914 cls_name = '.'.join(self.context+[def_name]) 915 url = self.name2url(cls_name) 916 s = self.mark_def(s, cls_name) 917 918 # Is this token the function name in a function def? If 919 # so, then make it a link back into the API docs. 920 elif i>=2 and line[i-2][1] == 'def': 921 in_param_list = True 922 css_class = self.CSS_CLASSES['DEFNAME'] 923 def_name = toktext 924 if None not in self.context: 925 cls_name = '.'.join(self.context) 926 func_name = '.'.join(self.context+[def_name]) 927 url = self.name2url(cls_name, def_name) 928 s = self.mark_def(s, func_name) 929 930 # For each indent, update the indents list (which we use 931 # to keep track of indentation strings) and the context 932 # list. If this indent is the start of a class or 933 # function def block, then self.def_name will be its name; 934 # otherwise, it will be None. 935 elif toktype == token.INDENT: 936 self.indents.append(toktext) 937 self.context.append(self.def_name) 938 939 # When we dedent, pop the last elements off the indents 940 # list and the context list. If the last context element 941 # is a name, then we're ending a class or function def 942 # block; so write an end-div tag. 943 elif toktype == token.DEDENT: 944 self.indents.pop() 945 if self.context.pop(): 946 ended_def_blocks += 1 947 948 # If this token contains whitespace, then don't bother to 949 # give it a css tag. 950 elif toktype in (None, tokenize.NL, token.NEWLINE, 951 token.ENDMARKER): 952 css_class = None 953 954 # Check if the token is a keyword. 955 elif toktype == token.NAME and keyword.iskeyword(toktext): 956 css_class = self.CSS_CLASSES['KEYWORD'] 957 958 elif in_base_list and toktype == token.NAME: 959 css_class = self.CSS_CLASSES['BASECLASS'] 960 961 elif (in_param_list and toktype == token.NAME and 962 not in_param_default): 963 css_class = self.CSS_CLASSES['PARAM'] 964 965 # Class/function docstring. 966 elif (self.def_name and line[i-1][0] == token.INDENT and 967 self.is_docstring(line, i)): 968 css_class = self.CSS_CLASSES['DOCSTRING'] 969 970 # Module docstring. 971 elif at_module_top and self.is_docstring(line, i): 972 css_class = self.CSS_CLASSES['DOCSTRING'] 973 974 # check for decorators?? 975 elif (toktype == token.NAME and 976 ((i>0 and line[i-1][1]=='@') or 977 (i>1 and line[i-1][0]==None and line[i-2][1] == '@'))): 978 css_class = self.CSS_CLASSES['DECORATOR'] 979 980 # If it's a name, try to link it. 981 elif toktype == token.NAME: 982 css_class = self.CSS_CLASSES['NAME'] 983 # If we have a variable named `toktext` in the current 984 # context, then link to that. Note that if we're inside 985 # a function, then that function is our context, not 986 # the namespace that contains it. [xx] this isn't always 987 # the right thing to do. 988 if None not in self.context and self.GUESS_LINK_TARGETS: 989 container = DottedName(self.module_name, *self.context) 990 doc = self.docindex.get_vardoc(container+toktext) 991 if doc is not None: 992 url = self.url_func(doc) 993 # Otherwise, check the name_to_docs index to see what 994 # else this name might refer to. 995 if url is None: 996 docs = sorted(self.name_to_docs.get(toktext, [])) 997 if docs: 998 tooltip='\n'.join(['%s'%d.canonical_name 999 for d in docs]) 1000 if len(docs) == 1 and self.GUESS_LINK_TARGETS: 1001 url = self.url_func(docs[0]) 1002 else: 1003 uid, onclick = self.doclink(toktext, docs) 1004 1005 # For all other tokens, look up the CSS class to use 1006 # based on the token's type. 1007 else: 1008 if toktype == token.OP and toktext in self.CSS_CLASSES: 1009 css_class = self.CSS_CLASSES[toktext] 1010 elif token.tok_name[toktype] in self.CSS_CLASSES: 1011 css_class = self.CSS_CLASSES[token.tok_name[toktype]] 1012 else: 1013 css_class = None 1014 1015 # update our status.. 1016 if toktext == ':': 1017 in_base_list = False 1018 in_param_list = False 1019 if toktext == '=' and in_param_list: 1020 in_param_default = True 1021 if in_param_default: 1022 if toktext in ('(','[','{'): in_param_default += 1 1023 if toktext in (')',']','}'): in_param_default -= 1 1024 if toktext == ',' and in_param_default == 1: 1025 in_param_default = 0 1026 1027 # Write this token, with appropriate colorization. 1028 if tooltip and self.ADD_TOOLTIPS: 1029 tooltip_html = ' title="%s"' % tooltip 1030 else: tooltip_html = '' 1031 if css_class: css_class_html = ' class="%s"' % css_class 1032 else: css_class_html = '' 1033 if onclick: 1034 s += ('<span id="%s"%s><a%s%s href="#" onclick="%s">' % 1035 (uid, css_class_html, tooltip_html, 1036 css_class_html, onclick)) 1037 elif url: 1038 s += ('<a%s%s href="%s">' % 1039 (tooltip_html, css_class_html, url)) 1040 elif css_class_html or tooltip_html: 1041 s += '<span%s%s>' % (tooltip_html, css_class_html) 1042 if i == len(line)-1: 1043 s += ' </span>' # Closes <span class="py-line"> 1044 s += cgi.escape(toktext) 1045 else: 1046 s += self.add_line_numbers(cgi.escape(toktext), css_class) 1047 1048 if onclick: s += "</a></span>" 1049 if url: s += '</a>' 1050 elif css_class_html or tooltip_html: s += '</span>' 1051 1052 if self.ADD_DEF_BLOCKS: 1053 for i in range(ended_def_blocks): 1054 self.out(self.END_DEF_BLOCK) 1055 1056 # Strip any empty <span>s. 1057 s = re.sub(r'<span class="[\w+]"></span>', '', s) 1058 1059 # Write the line. 1060 self.out(s) 1061 1062 if def_name and None not in self.context: 1063 self.out('</div>') 1064 1065 # Add div's if we're starting a def block. 1066 if (self.ADD_DEF_BLOCKS and def_name and 1067 (line[-2][1] == ':') and None not in self.context): 1068 indentation = (''.join(self.indents)+' ').replace(' ', '+') 1069 linenum_padding = '+'*self.linenum_size 1070 name='.'.join(self.context+[def_name]) 1071 self.out(self.START_DEF_BLOCK % (name, linenum_padding, 1072 indentation, name)) 1073 1074 self.def_name = def_name
1075 1088
1089 - def doc_descr(self, doc, context):
1090 name = doc.canonical_name.contextualize(context) 1091 descr = '%s %s' % (self.doc_kind(doc), name) 1092 if isinstance(doc, RoutineDoc): 1093 descr += '()' 1094 return descr
1095 1096 # [XX] copied streight from html.py; this should be consolidated, 1097 # probably into apidoc.
1098 - def doc_kind(self, doc):
1099 if isinstance(doc, ModuleDoc) and doc.is_package == True: 1100 return 'Package' 1101 elif (isinstance(doc, ModuleDoc) and 1102 doc.canonical_name[0].startswith('script')): 1103 return 'Script' 1104 elif isinstance(doc, ModuleDoc): 1105 return 'Module' 1106 elif isinstance(doc, ClassDoc): 1107 return 'Class' 1108 elif isinstance(doc, ClassMethodDoc): 1109 return 'Class Method' 1110 elif isinstance(doc, StaticMethodDoc): 1111 return 'Static Method' 1112 elif isinstance(doc, RoutineDoc): 1113 if isinstance(self.docindex.container(doc), ClassDoc): 1114 return 'Method' 1115 else: 1116 return 'Function' 1117 else: 1118 return 'Variable'
1119
1120 - def mark_def(self, s, name):
1121 replacement = ('<a name="%s"></a><div id="%s-def">\\1' 1122 '<a class="pysrc-toggle" href="#" id="%s-toggle" ' 1123 'onclick="toggle(\'%s\'); return false;">-</a>\\2' % 1124 (name, name, name, name)) 1125 return re.sub('(.*) (<span class="py-line">.*)\Z', replacement, s)
1126
1127 - def is_docstring(self, line, i):
1128 if line[i][0] != token.STRING: return False 1129 for toktype, toktext in line[i:]: 1130 if toktype not in (token.NEWLINE, tokenize.COMMENT, 1131 tokenize.NL, token.STRING, None): 1132 return False 1133 return True
1134
1135 - def add_line_numbers(self, s, css_class):
1136 result = '' 1137 start = 0 1138 end = s.find('\n')+1 1139 while end: 1140 result += s[start:end-1] 1141 if css_class: result += '</span>' 1142 result += ' </span>' # py-line 1143 result += '\n' 1144 if self.ADD_LINE_NUMBERS: 1145 result += self.lineno_to_html() 1146 result += ' <span class="py-line">' 1147 if css_class: result += '<span class="%s">' % css_class 1148 start = end 1149 end = s.find('\n', end)+1 1150 self.lineno += 1 1151 result += s[start:] 1152 return result
1153
1154 - def name2url(self, class_name, func_name=None):
1155 if class_name: 1156 class_name = '%s.%s' % (self.module_name, class_name) 1157 if func_name: 1158 return '%s-class.html#%s' % (class_name, func_name) 1159 else: 1160 return '%s-class.html' % class_name 1161 else: 1162 return '%s-module.html#%s' % (self.module_name, func_name)
1163 1164 # if __name__=='__main__': 1165 # s = PythonSourceColorizer('../apidoc.py', 'epydoc.apidoc').colorize() 1166 # import codecs 1167 # f = codecs.open('/home/edloper/public_html/color.html', 'w', 'ascii', 'xmlcharrefreplace') 1168 # f.write(s) 1169 # f.close() 1170 1171 HDR = '''\ 1172 <?xml version="1.0" encoding="ascii"?> 1173 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 1174 "DTD/xhtml1-transitional.dtd"> 1175 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 1176 <head> 1177 <title>$title$</title> 1178 <link rel="stylesheet" href="epydoc.css" type="text/css" /> 1179 <script type="text/javascript" src="epydoc.js"></script> 1180 </head> 1181 1182 <body bgcolor="white" text="black" link="blue" vlink="#204080" 1183 alink="#204080"> 1184 ''' 1185 FOOT = '</body></html>' 1186 if __name__=='__main__': 1187 #s = PythonSourceColorizer('../apidoc.py', 'epydoc.apidoc').colorize() 1188 s = PythonSourceColorizer('/tmp/fo.py', 'epydoc.apidoc').colorize() 1189 #print s 1190 import codecs 1191 f = codecs.open('/home/edloper/public_html/color3.html', 'w', 'ascii', 'xmlcharrefreplace') 1192 f.write(HDR+'<pre id="py-src-top" class="py-src">'+s+'</pre>'+FOOT) 1193 f.close() 1194