Methods

Included Modules

Files

Class Index [+]

Quicksearch

CodeRay::Scanners::Ruby

This scanner is really complex, since Ruby is a complex language!

It tries to highlight 100% of all common code, and 90% of strange codes.

It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.

For now, I think it’s better than the scanners in VIM or Syntax, or any highlighter I was able to find, except Caleb’s RubyLexer.

I hope it’s also better than the rdoc/irb lexer.

Constants

EncodingError

Private Instance Methods

scan_tokens(tokens, options) click to toggle source
     # File lib/coderay/scanners/ruby.rb, line 30
 30:     def scan_tokens tokens, options
 31:       last_token_dot = false
 32:       value_expected = true
 33:       heredocs = nil
 34:       last_state = nil
 35:       state = :initial
 36:       depth = nil
 37:       inline_block_stack = []
 38:       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
 39:       
 40:       patterns = Patterns  # avoid constant lookup
 41:       
 42:       until eos?
 43:         match = nil
 44:         kind = nil
 45: 
 46:         if state.instance_of? patterns::StringState
 47: # {{{
 48:           match = scan_until(state.pattern) || scan_until(/\z/)
 49:           tokens << [match, :content] unless match.empty?
 50:           break if eos?
 51: 
 52:           if state.heredoc and self[1]  # end of heredoc
 53:             match = getch.to_s
 54:             match << scan_until(/$/) unless eos?
 55:             tokens << [match, :delimiter]
 56:             tokens << [:close, state.type]
 57:             state = state.next_state
 58:             next
 59:           end
 60: 
 61:           case match = getch
 62: 
 63:           when state.delim
 64:             if state.paren
 65:               state.paren_depth -= 1
 66:               if state.paren_depth > 0
 67:                 tokens << [match, :nesting_delimiter]
 68:                 next
 69:               end
 70:             end
 71:             tokens << [match, :delimiter]
 72:             if state.type == :regexp and not eos?
 73:               modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/x)
 74:               tokens << [modifiers, :modifier] unless modifiers.empty?
 75:             end
 76:             tokens << [:close, state.type]
 77:             value_expected = false
 78:             state = state.next_state
 79: 
 80:           when '\'
 81:             if state.interpreted
 82:               if esc = scan(/ #{patterns::ESCAPE} /x)
 83:                 tokens << [match + esc, :char]
 84:               else
 85:                 tokens << [match, :error]
 86:               end
 87:             else
 88:               case m = getch
 89:               when state.delim, '\'
 90:                 tokens << [match + m, :char]
 91:               when nil
 92:                 tokens << [match, :error]
 93:               else
 94:                 tokens << [match + m, :content]
 95:               end
 96:             end
 97: 
 98:           when '#'
 99:             case peek(1)
100:             when '{'
101:               inline_block_stack << [state, depth, heredocs]
102:               value_expected = true
103:               state = :initial
104:               depth = 1
105:               tokens << [:open, :inline]
106:               tokens << [match + getch, :inline_delimiter]
107:             when '$', '@'
108:               tokens << [match, :escape]
109:               last_state = state  # scan one token as normal code, then return here
110:               state = :initial
111:             else
112:               raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
113:             end
114: 
115:           when state.paren
116:             state.paren_depth += 1
117:             tokens << [match, :nesting_delimiter]
118: 
119:           when /#{patterns::REGEXP_SYMBOLS}/x
120:             tokens << [match, :function]
121: 
122:           else
123:             raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
124: 
125:           end
126:           next
127: # }}}
128:         else
129: # {{{
130:           if match = scan(/[ \t\f]+/)
131:             kind = :space
132:             match << scan(/\s*/) unless eos? || heredocs
133:             value_expected = true if match.index(\n\)
134:             tokens << [match, kind]
135:             next
136:             
137:           elsif match = scan(/\\?\n/)
138:             kind = :space
139:             if match == "\n"
140:               value_expected = true
141:               state = :initial if state == :undef_comma_expected
142:             end
143:             if heredocs
144:               unscan  # heredoc scanning needs \n at start
145:               state = heredocs.shift
146:               tokens << [:open, state.type]
147:               heredocs = nil if heredocs.empty?
148:               next
149:             else
150:               match << scan(/\s*/) unless eos?
151:             end
152:             tokens << [match, kind]
153:             next
154:           
155:           elsif bol? && match = scan(/\#!.*/)
156:             tokens << [match, :doctype]
157:             next
158:             
159:           elsif match = scan(/\#.*/) or
160:             ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/) )
161:               kind = :comment
162:               tokens << [match, kind]
163:               next
164: 
165:           elsif state == :initial
166: 
167:             # IDENTS #
168:             if match = scan(unicode ? /#{patterns::METHOD_NAME}/o :
169:                                       /#{patterns::METHOD_NAME}/)
170:               if last_token_dot
171:                 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
172:               else
173:                 kind = patterns::IDENT_KIND[match]
174:                 if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
175:                   kind = :constant
176:                 elsif kind == :reserved
177:                   state = patterns::DEF_NEW_STATE[match]
178:                   value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
179:                 end
180:               end
181:               value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/)
182:             
183:             elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/)
184:               kind = :ident
185:               value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/)
186: 
187:             # OPERATORS #
188:             elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /)
189:               if match !~ / [.\)\]\}] / or match =~ /\.\.\.?/
190:                 value_expected = :set
191:               end
192:               last_token_dot = :set if self[1]
193:               kind = :operator
194:               unless inline_block_stack.empty?
195:                 case match
196:                 when '{'
197:                   depth += 1
198:                 when '}'
199:                   depth -= 1
200:                   if depth == 0  # closing brace of inline block reached
201:                     state, depth, heredocs = inline_block_stack.pop
202:                     heredocs = nil if heredocs && heredocs.empty?
203:                     tokens << [match, :inline_delimiter]
204:                     kind = :inline
205:                     match = :close
206:                   end
207:                 end
208:               end
209: 
210:             elsif match = scan(/ ['"] /x)
211:               tokens << [:open, :string]
212:               kind = :delimiter
213:               state = patterns::StringState.new :string, match == '"', match  # important for streaming
214: 
215:             elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/)
216:               kind = :instance_variable
217: 
218:             elsif value_expected and match = scan(/\//)
219:               tokens << [:open, :regexp]
220:               kind = :delimiter
221:               interpreted = true
222:               state = patterns::StringState.new :regexp, interpreted, match
223: 
224:             # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
225:             elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/) : scan(/#{patterns::NUMERIC}/)
226:               kind = self[1] ? :float : :integer
227: 
228:             elsif match = scan(/#{patterns::SYMBOL}/)
229:               case delim = match[1]
230:               when '', ""
231:                 tokens << [:open, :symbol]
232:                 tokens << [':', :symbol]
233:                 match = delim.chr
234:                 kind = :delimiter
235:                 state = patterns::StringState.new :symbol, delim == "", match
236:               else
237:                 kind = :symbol
238:               end
239: 
240:             elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /)
241:               value_expected = :set
242:               kind = :operator
243: 
244:             elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/)
245:               indented = self[1] == '-'
246:               quote = self[3]
247:               delim = self[quote ? 4 : 2]
248:               kind = patterns::QUOTE_TO_TYPE[quote]
249:               tokens << [:open, kind]
250:               tokens << [match, :delimiter]
251:               match = :close
252:               heredoc = patterns::StringState.new kind, quote != '\', delim, (indented ? :indented : :linestart )
253:               heredocs ||= []  # create heredocs if empty
254:               heredocs << heredoc
255: 
256:             elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/)
257:               kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
258:                 raise_inspect 'Unknown fancy string: %%%p' % k, tokens
259:               end
260:               tokens << [:open, kind]
261:               state = patterns::StringState.new kind, interpreted, self[2]
262:               kind = :delimiter
263: 
264:             elsif value_expected and match = scan(/#{patterns::CHARACTER}/)
265:               kind = :integer
266: 
267:             elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /)
268:               value_expected = :set
269:               kind = :operator
270: 
271:             elsif match = scan(/`/)
272:               if last_token_dot
273:                 kind = :operator
274:               else
275:                 tokens << [:open, :shell]
276:                 kind = :delimiter
277:                 state = patterns::StringState.new :shell, true, match
278:               end
279: 
280:             elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/)
281:               kind = :global_variable
282: 
283:             elsif match = scan(/#{patterns::CLASS_VARIABLE}/)
284:               kind = :class_variable
285: 
286:             else
287:               if !unicode
288:                 # check for unicode
289:                 debug, $DEBUG = $DEBUG, false
290:                 begin
291:                   if check(/./u).size > 1
292:                     # seems like we should try again with unicode
293:                     unicode = true
294:                   end
295:                 rescue
296:                   # bad unicode char; use getch
297:                 ensure
298:                   $DEBUG = debug
299:                 end
300:                 next if unicode
301:               end
302:               kind = :error
303:               match = getch
304: 
305:             end
306: 
307:           elsif state == :def_expected
308:             state = :initial
309:             if scan(/self\./)
310:               tokens << ['self', :pre_constant]
311:               tokens << ['.', :operator]
312:             end
313:             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o :
314:                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/)
315:               kind = :method
316:             else
317:               next
318:             end
319: 
320:           elsif state == :module_expected
321:             if match = scan(/<</)
322:               kind = :operator
323:             else
324:               state = :initial
325:               if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /x)
326:                 kind = :class
327:               else
328:                 next
329:               end
330:             end
331: 
332:           elsif state == :undef_expected
333:             state = :undef_comma_expected
334:             if match = scan(/#{patterns::METHOD_NAME_EX}/)
335:               kind = :method
336:             elsif match = scan(/#{patterns::SYMBOL}/)
337:               case delim = match[1]
338:               when '', ""
339:                 tokens << [:open, :symbol]
340:                 tokens << [':', :symbol]
341:                 match = delim.chr
342:                 kind = :delimiter
343:                 state = patterns::StringState.new :symbol, delim == "", match
344:                 state.next_state = :undef_comma_expected
345:               else
346:                 kind = :symbol
347:               end
348:             else
349:               state = :initial
350:               next
351:             end
352: 
353:           elsif state == :alias_expected
354:             match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o :
355:                                    /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/)
356:             
357:             if match
358:               tokens << [self[1], (self[1][0] == :: ? :symbol : :method)]
359:               tokens << [self[2], :space]
360:               tokens << [self[3], (self[3][0] == :: ? :symbol : :method)]
361:             end
362:             state = :initial
363:             next
364: 
365:           elsif state == :undef_comma_expected
366:             if match = scan(/,/)
367:               kind = :operator
368:               state = :undef_expected
369:             else
370:               state = :initial
371:               next
372:             end
373: 
374:           end
375: # }}}
376:           
377:           unless kind == :error
378:             value_expected = value_expected == :set
379:             last_token_dot = last_token_dot == :set
380:           end
381:           
382:           if $CODERAY_DEBUG and not kind
383:             raise_inspect 'Error token %p in line %d' %
384:               [[match, kind], line], tokens, state
385:           end
386:           raise_inspect 'Empty token', tokens unless match
387: 
388:           tokens << [match, kind]
389: 
390:           if last_state
391:             state = last_state
392:             last_state = nil
393:           end
394:         end
395:       end
396: 
397:       inline_block_stack << [state] if state.is_a? patterns::StringState
398:       until inline_block_stack.empty?
399:         this_block = inline_block_stack.pop
400:         tokens << [:close, :inline] if this_block.size > 1
401:         state = this_block.first
402:         tokens << [:close, state.type]
403:       end
404: 
405:       tokens
406:     end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.