Syntax::Ruby

A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.

Constants

KEYWORDS

The list of all identifiers recognized as keywords.

Public Instance Methods

setup() click to toggle source

Perform ruby-specific setup

    # File lib/syntax/lang/ruby.rb, line 18
18:     def setup
19:       @selector = false
20:       @allow_operator = false
21:       @heredocs = []
22:     end
step() click to toggle source

Step through a single iteration of the tokenization process.

     # File lib/syntax/lang/ruby.rb, line 25
 25:     def step
 26:       case
 27:         when bol? && check( /=begin/ )
 28:           start_group( :comment, scan_until( /^=end#{EOL}/ ) )
 29:         when bol? && check( /__END__#{EOL}/ )
 30:           start_group( :comment, scan_until( /\Z/ ) )
 31:       else
 32:         case
 33:           when check( /def\s+/ )
 34:             start_group :keyword, scan( /def\s+/ )
 35:             start_group :method,  scan_until( /(?=[;(\s]|#{EOL})/ )
 36:           when check( /class\s+/ )
 37:             start_group :keyword, scan( /class\s+/ )
 38:             start_group :class,  scan_until( /(?=[;\s<]|#{EOL})/ )
 39:           when check( /module\s+/ )
 40:             start_group :keyword, scan( /module\s+/ )
 41:             start_group :module,  scan_until( /(?=[;\s]|#{EOL})/ )
 42:           when check( /::/ )
 43:             start_group :punct, scan(/::/)
 44:           when check( /:"/ )
 45:             start_group :symbol, scan(/:/)
 46:             scan_delimited_region :symbol, :symbol, "", true
 47:             @allow_operator = true
 48:           when check( /:'/ )
 49:             start_group :symbol, scan(/:/)
 50:             scan_delimited_region :symbol, :symbol, "", false
 51:             @allow_operator = true
 52:           when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
 53:             start_group :symbol, matched
 54:             @allow_operator = true
 55:           when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
 56:             start_group :char, matched
 57:             @allow_operator = true
 58:           when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
 59:             if @selector || matched[1] == ?? || matched[1] == !!
 60:               start_group :ident,
 61:                 scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
 62:             else
 63:               start_group :constant,
 64:                 scan(/(__FILE__|__LINE__|true|false|nil|self)/)
 65:             end
 66:             @selector = false
 67:             @allow_operator = true
 68:           when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
 69:             start_group :number, matched
 70:             @allow_operator = true
 71:           else
 72:             case peek(2)
 73:               when "%r"
 74:                 scan_delimited_region :punct, :regex, scan( /../ ), true
 75:                 @allow_operator = true
 76:               when "%w", "%q"
 77:                 scan_delimited_region :punct, :string, scan( /../ ), false
 78:                 @allow_operator = true
 79:               when "%s"
 80:                 scan_delimited_region :punct, :symbol, scan( /../ ), false
 81:                 @allow_operator = true
 82:               when "%W", "%Q", "%x"
 83:                 scan_delimited_region :punct, :string, scan( /../ ), true
 84:                 @allow_operator = true
 85:               when /%[^\sa-zA-Z0-9]/
 86:                 scan_delimited_region :punct, :string, scan( /./ ), true
 87:                 @allow_operator = true
 88:               when "<<"
 89:                 saw_word = ( chunk[1,1] =~ /[\w!?]/ )
 90:                 start_group :punct, scan( /<</ )
 91:                 if saw_word
 92:                   @allow_operator = false
 93:                   return
 94:                 end
 95: 
 96:                 float_right = scan( /-/ )
 97:                 append "-" if float_right
 98:                 if ( type = scan( /['"]/ ) )
 99:                   append type
100:                   delim = scan_until( /(?=#{type})/ )
101:                   if delim.nil?
102:                     append scan_until( /\Z/ )
103:                     return
104:                   end
105:                 else
106:                   delim = scan( /\w+/ ) or return
107:                 end
108:                 start_group :constant, delim
109:                 start_group :punct, scan( /#{type}/ ) if type
110:                 @heredocs << [ float_right, type, delim ]
111:                 @allow_operator = true
112:               else
113:                 case peek(1)
114:                   when /[\n\r]/
115:                     unless @heredocs.empty?
116:                       scan_heredoc(*@heredocs.shift)
117:                     else
118:                       start_group :normal, scan( /\s+/ )
119:                     end
120:                     @allow_operator = false
121:                   when /\s/
122:                     start_group :normal, scan( /\s+/ )
123:                   when "#"
124:                     start_group :comment, scan( /#[^\n\r]*/ )
125:                   when /[A-Z]/
126:                     start_group @selector ? :ident : :constant, scan( /\w+/ )
127:                     @allow_operator = true
128:                   when /[a-z_]/
129:                     word = scan( /\w+[?!]?/ )
130:                     if !@selector && KEYWORDS.include?( word )
131:                       start_group :keyword, word
132:                       @allow_operator = false
133:                     elsif
134:                       start_group :ident, word
135:                       @allow_operator = true
136:                     end
137:                     @selector = false
138:                   when /\d/
139:                     start_group :number,
140:                       scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
141:                     @allow_operator = true
142:                   when '"'
143:                     scan_delimited_region :punct, :string, "", true
144:                     @allow_operator = true
145:                   when '/'
146:                     if @allow_operator
147:                       start_group :punct, scan(%{/})
148:                       @allow_operator = false
149:                     else
150:                       scan_delimited_region :punct, :regex, "", true
151:                       @allow_operator = true
152:                     end
153:                   when "'"
154:                     scan_delimited_region :punct, :string, "", false
155:                     @allow_operator = true
156:                   when "."
157:                     dots = scan( /\.{1,3}/ )
158:                     start_group :punct, dots
159:                     @selector = ( dots.length == 1 )
160:                   when /[@]/
161:                     start_group :attribute, scan( /@{1,2}\w*/ )
162:                     @allow_operator = true
163:                   when /[$]/
164:                     start_group :global, scan(/\$/)
165:                     start_group :global, scan( /\w+|./ ) if check(/./)
166:                     @allow_operator = true
167:                   when /[-!?*\/+=<>(\[\{}:;,&|%]/
168:                     start_group :punct, scan(/./)
169:                     @allow_operator = false
170:                   when /[)\]]/
171:                     start_group :punct, scan(/./)
172:                     @allow_operator = true
173:                   else
174:                     # all else just falls through this, to prevent
175:                     # infinite loops...
176:                     append getch
177:                 end
178:             end
179:         end
180:       end
181:     end

Private Instance Methods

scan_delimited_region( delim_group, inner_group, starter, exprs, delim=nil, heredoc=false ) click to toggle source

Scan a delimited region of text. This handles the simple cases (strings delimited with quotes) as well as the more complex cases of %-strings and here-documents.

  • delim_group is the group to use to classify the delimiters of the region

  • inner_group is the group to use to classify the contents of the region

  • starter is the text to use as the starting delimiter

  • exprs is a boolean flag indicating whether the region is an interpolated string or not

  • delim is the text to use as the delimiter of the region. If nil, the next character will be treated as the delimiter.

  • heredoc is either false, meaning the region is not a heredoc, or :flush (meaning the delimiter must be flushed left), or :float (meaning the delimiter doens’t have to be flush left).

     # File lib/syntax/lang/ruby.rb, line 201
201:       def scan_delimited_region( delim_group, inner_group, starter, exprs,
202:         delim=nil, heredoc=false )
203:       # begin
204:         if !delim
205:           start_group delim_group, starter
206:           delim = scan( /./ )
207:           append delim
208: 
209:           delim = case delim
210:             when '{' then '}'
211:             when '(' then ')'
212:             when '[' then ']'
213:             when '<' then '>'
214:             else delim
215:           end
216:         end
217: 
218:         start_region inner_group
219: 
220:         items = "\\\\|"
221:         if heredoc
222:           items << "(^"
223:           items << '\s*' if heredoc == :float
224:           items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
225:         else
226:           items << "#{Regexp.escape(delim)}"
227:         end
228:         items << "|#(\\$|@@?|\\{)" if exprs
229:         items = Regexp.new( items )
230: 
231:         loop do
232:           p = pos
233:           match = scan_until( items )
234:           if match.nil?
235:             start_group inner_group, scan_until( /\Z/ )
236:             break
237:           else
238:             text = pre_match[p..1]
239:             start_group inner_group, text if text.length > 0
240:             case matched.strip
241:               when "\\"
242:                 unless exprs
243:                   case peek(1)
244:                     when "'"
245:                       scan(/./)
246:                       start_group :escape, "\\'"
247:                     when "\\"
248:                       scan(/./)
249:                       start_group :escape, "\\\\"
250:                     else
251:                       start_group inner_group, "\\"
252:                   end
253:                 else
254:                   start_group :escape, "\\"
255:                   c = getch
256:                   append c
257:                   case c
258:                     when 'x'
259:                       append scan( /[a-fA-F0-9]{1,2}/ )
260:                     when /[0-7]/
261:                       append scan( /[0-7]{0,2}/ )
262:                   end
263:                 end
264:               when delim
265:                 end_region inner_group
266:                 start_group delim_group, matched
267:                 break
268:               when /^#/
269:                 do_highlight = (option(:expressions) == :highlight)
270:                 start_region :expr if do_highlight
271:                 start_group :expr, matched
272:                 case matched[1]
273:                   when {{
274:                     depth = 1
275:                     content = ""
276:                     while depth > 0
277:                       p = pos
278:                       c = scan_until( /[\{}]/ )
279:                       if c.nil?
280:                         content << scan_until( /\Z/ )
281:                         break
282:                       else
283:                         depth += ( matched == "{" ? 1 : 1 )
284:                         content << pre_match[p..1]
285:                         content << matched if depth > 0
286:                       end
287:                     end
288:                     if do_highlight
289:                       subtokenize "ruby", content
290:                       start_group :expr, "}"
291:                     else
292:                       append content + "}"
293:                     end
294:                   when $$, @@
295:                     append scan( /\w+/ )
296:                 end
297:                 end_region :expr if do_highlight
298:               else raise "unexpected match on #{matched}"
299:             end
300:           end
301:         end
302:       end
scan_heredoc(float, type, delim) click to toggle source

Scan a heredoc beginning at the current position.

  • float indicates whether the delimiter may be floated to the right

  • type is nil, a single quote, or a double quote

  • delim is the delimiter to look for

     # File lib/syntax/lang/ruby.rb, line 309
309:       def scan_heredoc(float, type, delim)
310:         scan_delimited_region( :constant, :string, "", type != "'",
311:           delim, float ? :float : :flush )
312:       end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.