This scanner is really complex, since Ruby is a complex language!
It tries to highlight 100% of all common code, and 90% of strange codes.
It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.
For now, I think it’s better than the scanners in VIM or Syntax, or any highlighter I was able to find, except Caleb’s RubyLexer.
I hope it’s also better than the rdoc/irb lexer.
# File lib/coderay/scanners/ruby.rb, line 30 30: def scan_tokens tokens, options 31: last_token_dot = false 32: value_expected = true 33: heredocs = nil 34: last_state = nil 35: state = :initial 36: depth = nil 37: inline_block_stack = [] 38: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' 39: 40: patterns = Patterns # avoid constant lookup 41: 42: until eos? 43: match = nil 44: kind = nil 45: 46: if state.instance_of? patterns::StringState 47: # {{{ 48: match = scan_until(state.pattern) || scan_until(/\z/) 49: tokens << [match, :content] unless match.empty? 50: break if eos? 51: 52: if state.heredoc and self[1] # end of heredoc 53: match = getch.to_s 54: match << scan_until(/$/) unless eos? 55: tokens << [match, :delimiter] 56: tokens << [:close, state.type] 57: state = state.next_state 58: next 59: end 60: 61: case match = getch 62: 63: when state.delim 64: if state.paren 65: state.paren_depth -= 1 66: if state.paren_depth > 0 67: tokens << [match, :nesting_delimiter] 68: next 69: end 70: end 71: tokens << [match, :delimiter] 72: if state.type == :regexp and not eos? 73: modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/x) 74: tokens << [modifiers, :modifier] unless modifiers.empty? 75: end 76: tokens << [:close, state.type] 77: value_expected = false 78: state = state.next_state 79: 80: when '\' 81: if state.interpreted 82: if esc = scan(/ #{patterns::ESCAPE} /x) 83: tokens << [match + esc, :char] 84: else 85: tokens << [match, :error] 86: end 87: else 88: case m = getch 89: when state.delim, '\' 90: tokens << [match + m, :char] 91: when nil 92: tokens << [match, :error] 93: else 94: tokens << [match + m, :content] 95: end 96: end 97: 98: when '#' 99: case peek(1) 100: when '{' 101: inline_block_stack << [state, depth, heredocs] 102: value_expected = true 103: state = :initial 104: depth = 1 105: tokens << [:open, :inline] 106: tokens << [match + getch, :inline_delimiter] 107: when '$', '@' 108: tokens << [match, :escape] 109: last_state = state # scan one token as normal code, then return here 110: state = :initial 111: else 112: raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens 113: end 114: 115: when state.paren 116: state.paren_depth += 1 117: tokens << [match, :nesting_delimiter] 118: 119: when /#{patterns::REGEXP_SYMBOLS}/x 120: tokens << [match, :function] 121: 122: else 123: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens 124: 125: end 126: next 127: # }}} 128: else 129: # {{{ 130: if match = scan(/[ \t\f]+/) 131: kind = :space 132: match << scan(/\s*/) unless eos? || heredocs 133: value_expected = true if match.index(\n\) 134: tokens << [match, kind] 135: next 136: 137: elsif match = scan(/\\?\n/) 138: kind = :space 139: if match == "\n" 140: value_expected = true 141: state = :initial if state == :undef_comma_expected 142: end 143: if heredocs 144: unscan # heredoc scanning needs \n at start 145: state = heredocs.shift 146: tokens << [:open, state.type] 147: heredocs = nil if heredocs.empty? 148: next 149: else 150: match << scan(/\s*/) unless eos? 151: end 152: tokens << [match, kind] 153: next 154: 155: elsif bol? && match = scan(/\#!.*/) 156: tokens << [match, :doctype] 157: next 158: 159: elsif match = scan(/\#.*/) or 160: ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/) ) 161: kind = :comment 162: tokens << [match, kind] 163: next 164: 165: elsif state == :initial 166: 167: # IDENTS # 168: if match = scan(unicode ? /#{patterns::METHOD_NAME}/o : 169: /#{patterns::METHOD_NAME}/) 170: if last_token_dot 171: kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end 172: else 173: kind = patterns::IDENT_KIND[match] 174: if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) 175: kind = :constant 176: elsif kind == :reserved 177: state = patterns::DEF_NEW_STATE[match] 178: value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match] 179: end 180: end 181: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/) 182: 183: elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/) 184: kind = :ident 185: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/) 186: 187: # OPERATORS # 188: elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /) 189: if match !~ / [.\)\]\}] / or match =~ /\.\.\.?/ 190: value_expected = :set 191: end 192: last_token_dot = :set if self[1] 193: kind = :operator 194: unless inline_block_stack.empty? 195: case match 196: when '{' 197: depth += 1 198: when '}' 199: depth -= 1 200: if depth == 0 # closing brace of inline block reached 201: state, depth, heredocs = inline_block_stack.pop 202: heredocs = nil if heredocs && heredocs.empty? 203: tokens << [match, :inline_delimiter] 204: kind = :inline 205: match = :close 206: end 207: end 208: end 209: 210: elsif match = scan(/ ['"] /x) 211: tokens << [:open, :string] 212: kind = :delimiter 213: state = patterns::StringState.new :string, match == '"', match # important for streaming 214: 215: elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/) 216: kind = :instance_variable 217: 218: elsif value_expected and match = scan(/\//) 219: tokens << [:open, :regexp] 220: kind = :delimiter 221: interpreted = true 222: state = patterns::StringState.new :regexp, interpreted, match 223: 224: # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o) 225: elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/) : scan(/#{patterns::NUMERIC}/) 226: kind = self[1] ? :float : :integer 227: 228: elsif match = scan(/#{patterns::SYMBOL}/) 229: case delim = match[1] 230: when '', "" 231: tokens << [:open, :symbol] 232: tokens << [':', :symbol] 233: match = delim.chr 234: kind = :delimiter 235: state = patterns::StringState.new :symbol, delim == "", match 236: else 237: kind = :symbol 238: end 239: 240: elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /) 241: value_expected = :set 242: kind = :operator 243: 244: elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/) 245: indented = self[1] == '-' 246: quote = self[3] 247: delim = self[quote ? 4 : 2] 248: kind = patterns::QUOTE_TO_TYPE[quote] 249: tokens << [:open, kind] 250: tokens << [match, :delimiter] 251: match = :close 252: heredoc = patterns::StringState.new kind, quote != '\', delim, (indented ? :indented : :linestart ) 253: heredocs ||= [] # create heredocs if empty 254: heredocs << heredoc 255: 256: elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/) 257: kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do 258: raise_inspect 'Unknown fancy string: %%%p' % k, tokens 259: end 260: tokens << [:open, kind] 261: state = patterns::StringState.new kind, interpreted, self[2] 262: kind = :delimiter 263: 264: elsif value_expected and match = scan(/#{patterns::CHARACTER}/) 265: kind = :integer 266: 267: elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /) 268: value_expected = :set 269: kind = :operator 270: 271: elsif match = scan(/`/) 272: if last_token_dot 273: kind = :operator 274: else 275: tokens << [:open, :shell] 276: kind = :delimiter 277: state = patterns::StringState.new :shell, true, match 278: end 279: 280: elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/) 281: kind = :global_variable 282: 283: elsif match = scan(/#{patterns::CLASS_VARIABLE}/) 284: kind = :class_variable 285: 286: else 287: if !unicode 288: # check for unicode 289: debug, $DEBUG = $DEBUG, false 290: begin 291: if check(/./u).size > 1 292: # seems like we should try again with unicode 293: unicode = true 294: end 295: rescue 296: # bad unicode char; use getch 297: ensure 298: $DEBUG = debug 299: end 300: next if unicode 301: end 302: kind = :error 303: match = getch 304: 305: end 306: 307: elsif state == :def_expected 308: state = :initial 309: if scan(/self\./) 310: tokens << ['self', :pre_constant] 311: tokens << ['.', :operator] 312: end 313: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o : 314: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/) 315: kind = :method 316: else 317: next 318: end 319: 320: elsif state == :module_expected 321: if match = scan(/<</) 322: kind = :operator 323: else 324: state = :initial 325: if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /x) 326: kind = :class 327: else 328: next 329: end 330: end 331: 332: elsif state == :undef_expected 333: state = :undef_comma_expected 334: if match = scan(/#{patterns::METHOD_NAME_EX}/) 335: kind = :method 336: elsif match = scan(/#{patterns::SYMBOL}/) 337: case delim = match[1] 338: when '', "" 339: tokens << [:open, :symbol] 340: tokens << [':', :symbol] 341: match = delim.chr 342: kind = :delimiter 343: state = patterns::StringState.new :symbol, delim == "", match 344: state.next_state = :undef_comma_expected 345: else 346: kind = :symbol 347: end 348: else 349: state = :initial 350: next 351: end 352: 353: elsif state == :alias_expected 354: match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o : 355: /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/) 356: 357: if match 358: tokens << [self[1], (self[1][0] == :: ? :symbol : :method)] 359: tokens << [self[2], :space] 360: tokens << [self[3], (self[3][0] == :: ? :symbol : :method)] 361: end 362: state = :initial 363: next 364: 365: elsif state == :undef_comma_expected 366: if match = scan(/,/) 367: kind = :operator 368: state = :undef_expected 369: else 370: state = :initial 371: next 372: end 373: 374: end 375: # }}} 376: 377: unless kind == :error 378: value_expected = value_expected == :set 379: last_token_dot = last_token_dot == :set 380: end 381: 382: if $CODERAY_DEBUG and not kind 383: raise_inspect 'Error token %p in line %d' % 384: [[match, kind], line], tokens, state 385: end 386: raise_inspect 'Empty token', tokens unless match 387: 388: tokens << [match, kind] 389: 390: if last_state 391: state = last_state 392: last_state = nil 393: end 394: end 395: end 396: 397: inline_block_stack << [state] if state.is_a? patterns::StringState 398: until inline_block_stack.empty? 399: this_block = inline_block_stack.pop 400: tokens << [:close, :inline] if this_block.size > 1 401: state = this_block.first 402: tokens << [:close, state.type] 403: end 404: 405: tokens 406: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.