Choose!
R_REF_ID = Regexp.compile(/(]*)(s*])/) R_REF_ID = Regexp.compile(/(]*)(s*])/)
note: input will be destroyed
# File lib/maruku/input/rubypants.rb, line 192 def apply_one_rule(reg, subst, input) output = [] while first = input.shift if first.kind_of?(String) && (m = reg.match(first)) output.push m. pre_match if m. pre_match.size > 0 input.unshift m.post_match if m.post_match.size > 0 subst.reverse.each do |x| input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end else output.push first end end return output end
# File lib/maruku/input/charsource.rb, line 154 def describe_pos(buffer, buffer_index) len = 75 num_before = [len/2, buffer_index].min num_after = [len/2, buffer.size-buffer_index].min num_before_max = buffer_index num_after_max = buffer.size-buffer_index # puts "num #{num_before} #{num_after}" num_before = [num_before_max, len-num_after].min num_after = [num_after_max, len-num_before].min # puts "num #{num_before} #{num_after}" index_start = [buffer_index - num_before, 0].max index_end = [buffer_index + num_after, buffer.size].min size = index_end- index_start # puts "- #{index_start} #{size}" str = buffer[index_start, size] str.gsub!("\n",'N') str.gsub!("\t",'T') if index_end == buffer.size str += "EOF" end pre_s = buffer_index-index_start pre_s = [pre_s, 0].max pre_s2 = [len-pre_s,0].max # puts "pre_S = #{pre_s}" pre =" "*(pre_s) "-"*len+"\n"+ str + "\n" + "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+ # pre + "|\n"+ pre + "+--- Byte #{buffer_index}\n"+ "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+ add_tabs(buffer,1,">") # "CharSource: At character #{@buffer_index} of block "+ # " beginning with:\n #{@buffer[0,50].inspect} ...\n"+ # " before: \n ... #{cur_chars(50).inspect} ... " end
# File lib/maruku/input/rubypants.rb, line 207 def educate(elements) Rules.each do |reg, subst| elements = apply_one_rule(reg, subst, elements) end # strips empty strings elements.delete_if {|x| x.kind_of?(String) && x.size == 0} final = [] # join consecutive strings elements.each do |x| if x.kind_of?(String) && final.last.kind_of?(String) final.last << x else final << x end end return final end
# File lib/maruku/input/parse_span_better.rb, line 301 def extension_meta(src, con, break_on_chars) if m = src.read_regexp(/([^\s\:\"\']+):/) name = m[1] al = read_attribute_list(src, con, break_on_chars) # puts "#{name}=#{al.inspect}" self.doc.ald[name] = al con.push md_ald(name, al) else al = read_attribute_list(src, con, break_on_chars) self.doc.ald[name] = al con.push md_ial(al) end end
Start: cursor on character *after* '{' End: curson on '}' or EOF
# File lib/maruku/input/parse_span_better.rb, line 277 def interpret_extension(src, con, break_on_chars) case src.cur_char when :: src.ignore_char # : extension_meta(src, con, break_on_chars) when ##, .. extension_meta(src, con, break_on_chars) else stuff = read_simple(src, escaped=[}}], break_on_chars, []) if stuff =~ /^(\w+\s|[^\w])/ extension_id = $1.strip if false else maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con extension_meta(src, con, break_on_chars) end else maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con extension_meta(src, con, break_on_chars) end end end
# File lib/maruku/input/parse_span_better.rb, line 35 def parse_lines_as_span(lines, parent=nil) parse_span_better lines.join("\n"), parent end
# File lib/maruku/input/parse_span_better.rb, line 39 def parse_span_better(string, parent=nil) if not string.kind_of? String then error "Passed #{string.class}." end st = (string + "") st.freeze src = CharSource.new(st, parent) read_span(src, EscapedCharInText, [nil]) end
# File lib/maruku/input/parse_span_better.rb, line 421 def read_em(src, delim) src.ignore_char children = read_span(src, EscapedCharInText, nil, [delim]) src.ignore_char md_em(children) end
# File lib/maruku/input/parse_span_better.rb, line 323 def read_email_el(src,con) src.ignore_char # leading < mail = read_simple(src, [], [>>]) src.ignore_char # closing > address = mail.gsub(/^mailto:/,'') con.push_element md_email(address) end
# File lib/maruku/input/parse_span_better.rb, line 435 def read_emstrong(src, delim) src.ignore_chars(3) children = read_span(src, EscapedCharInText, nil, [delim]) src.ignore_chars(3) md_emstrong(children) end
# File lib/maruku/input/parse_span_better.rb, line 462 def read_footnote_ref(src,con) ref = read_ref_id(src,con) con.push_element md_foot_ref(ref) end
# File lib/maruku/input/parse_span_better.rb, line 615 def read_image(src, con) src.ignore_chars(2) # opening "![" alt_text = read_span(src, EscapedCharInText, []]]) src.ignore_char # closing bracket # ignore space if src.cur_char == SPACE and (src.next_char == [[ or src.next_char == (( ) src.ignore_char end case src.cur_char when (( src.ignore_char # opening ( src.consume_whitespace url = read_url(src, [SPACE,\t\,))]) if not url error "Could not read url from #{src.cur_chars(10).inspect}", src,con end src.consume_whitespace title = nil if src.cur_char != )) # we have a title quote_char = src.cur_char title = read_quoted(src,con) if not title maruku_error 'Must quote title',src,con else # Tries to read a title with quotes:  # this is the most ugly thing in Markdown if not src.next_matches(/\s*\)/) # if there is not a closing par ), then read # the rest and guess it's title with quotes rest = read_simple(src, escaped=[], break_on_chars=[))], break_on_strings=[]) # chop the closing char rest.chop! title << quote_char << rest end end end src.consume_whitespace closing = src.shift_char # closing ) if closing != )) error( ("Unclosed link: '"<<closing<<"'")+ " Read url=#{url.inspect} title=#{title.inspect}",src,con) end con.push_element md_im_image(alt_text, url, title) when [[ # link ref ref_id = read_ref_id(src,con) if not ref_id # TODO: check around error('Reference not closed.', src, con) ref_id = "" end if ref_id.size == 0 ref_id = alt_text.to_s end ref_id = sanitize_ref_id(ref_id) con.push_element md_image(alt_text, ref_id) else # no stuff ref_id = sanitize_ref_id(alt_text.to_s) con.push_element md_image(alt_text, ref_id) end end
# File lib/maruku/input/parse_span_better.rb, line 510 def read_inline_code(src, con) # Count the number of ticks num_ticks = 0 while src.cur_char == `` num_ticks += 1 src.ignore_char end # We will read until this string end_string = "`"*num_ticks code = read_simple(src, escaped=[], break_on_chars=[], break_on_strings=[end_string]) # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}" src.ignore_chars num_ticks # Ignore at most one space if num_ticks > 1 && code[0] == SPACE code = code[1, code.size-1] end # drop last space if num_ticks > 1 && code[-1] == SPACE code = code[0,code.size-1] end # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} " con.push_element md_code(code) end
# File lib/maruku/input/parse_span_better.rb, line 467 def read_inline_html(src, con) h = HTMLHelper.new begin # This is our current buffer in the context next_stuff = src.current_remaining_buffer consumed = 0 while true if consumed >= next_stuff.size maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con break end h.eat_this next_stuff[consumed].chr; consumed += 1 break if h.is_finished? end src.ignore_chars(consumed) con.push_element md_html(h.stuff_you_read) #start = src.current_remaining_buffer # h.eat_this start # if not h.is_finished? # error "inline_html: Malformed:\n "+ # "#{start.inspect}\n #{h.inspect}",src,con # end # # consumed = start.size - h.rest.size # if consumed > 0 # con.push_element md_html(h.stuff_you_read) # src.ignore_chars(consumed) # else # puts "HTML helper did not work on #{start.inspect}" # con.push_char src.shift_char # end rescue Exception => e maruku_error "Bad html: \n" + add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'), src,con maruku_recover "I will try to continue after bad HTML.", src, con con.push_char src.shift_char end end
# File lib/maruku/input/parse_span_better.rb, line 541 def read_link(src, con) # we read the string and see what happens src.ignore_char # opening bracket children = read_span(src, EscapedCharInText, []]]) src.ignore_char # closing bracket # ignore space if src.cur_char == SPACE and (src.next_char == [[ or src.next_char == (( ) src.shift_char end case src.cur_char when (( src.ignore_char # opening ( src.consume_whitespace url = read_url(src, [SPACE,\t\,))]) if not url url = '' # no url is ok end src.consume_whitespace title = nil if src.cur_char != )) # we have a title quote_char = src.cur_char title = read_quoted(src,con) if not title maruku_error 'Must quote title',src,con else # Tries to read a title with quotes:  # this is the most ugly thing in Markdown if not src.next_matches(/\s*\)/) # if there is not a closing par ), then read # the rest and guess it's title with quotes rest = read_simple(src, escaped=[], break_on_chars=[))], break_on_strings=[]) # chop the closing char rest.chop! title << quote_char << rest end end end src.consume_whitespace closing = src.shift_char # closing ) if closing != )) maruku_error 'Unclosed link',src,con maruku_recover "No closing ): I will not create"+ " the link for #{children.inspect}", src, con con.push_elements children return end con.push_element md_im_link(children,url, title) when [[ # link ref ref_id = read_ref_id(src,con) if ref_id if ref_id.size == 0 ref_id = sanitize_ref_id(children.to_s) else ref_id = sanitize_ref_id(ref_id) end con.push_element md_link(children, ref_id) else maruku_error "Could not read ref_id", src, con maruku_recover "I will not create the link for "+ "#{children.inspect}", src, con con.push_elements children return end else # empty [link] id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_') con.push_element md_link(children, id) end end
Tries to read a quoted value. If stream does not start with ' or ", returns nil.
# File lib/maruku/input/parse_span_better.rb, line 365 def read_quoted(src, con) case src.cur_char when '', "" quote_char = src.shift_char # opening quote string = read_simple(src, EscapedCharInQuotes, [quote_char]) src.ignore_char # closing quote return string else # puts "Asked to read quote from: #{src.cur_chars(10).inspect}" return nil end end
# File lib/maruku/input/parse_span_better.rb, line 354 def read_quoted_or_unquoted(src, con, escaped, exit_on_chars) case src.cur_char when '', "" read_quoted(src, con) else read_simple(src, escaped, exit_on_chars) end end
Reads a bracketed id "[refid]". Consumes also both brackets.
# File lib/maruku/input/parse_span_better.rb, line 449 def read_ref_id(src, con) src.ignore_char # [ src.consume_whitespace # puts "Next: #{src.cur_chars(10).inspect}" if m = src.read_regexp(R_REF_ID) # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}" # puts "Then: #{src.cur_chars(10).inspect}" m[1] else nil end end
Reads a simple string (no formatting) until one of break_on_chars, while escaping the escaped. If the string is empty, it returns nil. Raises on error if the string terminates unexpectedly.
# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.
# File lib/maruku/input/parse_span_better.rb, line 384 def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil) text = "" while true # puts "Reading simple #{text.inspect}" c = src.cur_char if exit_on_chars && exit_on_chars.include?(c) # src.ignore_char if eat_delim break end break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x} case c when nil s= "String finished while reading (break on "+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+ " already read: #{text.inspect}" maruku_error s, src maruku_recover "I boldly continue", src break when \\\ d = src.next_char if escaped.include? d src.ignore_chars(2) text << d else text << src.shift_char end else text << src.shift_char end end # puts "Read simple #{text.inspect}" text.empty? ? nil : text end
This is the main loop for reading span elements
It's long, but not complex or difficult to understand.
# File lib/maruku/input/parse_span_better.rb, line 54 def read_span(src, escaped, exit_on_chars, exit_on_strings=nil) con = SpanContext.new c = d = nil while true c = src.cur_char # This is only an optimization which cuts 50% of the time used. # (but you can't use a-zA-z in exit_on_chars) if c && ((c>=aa && c<=zz) || ((c>=AA && c<=ZZ))) con.cur_string << src.shift_char next end break if exit_on_chars && exit_on_chars.include?(c) break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x} # check if there are extensions if check_span_extensions(src, con) next end case c = src.cur_char when \ \# it's space (32) if src.cur_chars_are " \n" src.ignore_chars(3) con.push_element md_br() next else src.ignore_char con.push_space end when \n\, \t\ src.ignore_char con.push_space when `` read_inline_code(src,con) when << # It could be: # 1) HTML "<div ..." # 2) HTML "<!-- ..." # 3) url "<http:// ", "<ftp:// ..." # 4) email "<andrea@... ", "<mailto:andrea@..." # 5) on itself! "a < b " # 6) Start of <<guillemettes>> case d = src.next_char when <<; # guillemettes src.ignore_chars(2) con.push_char << con.push_char << when !!; if src.cur_chars_are '<!--' read_inline_html(src, con) else con.push_char src.shift_char end when ?? read_xml_instr_span(src, con) when \ \, \t\ con.push_char src.shift_char else if src.next_matches(/<mailto:/) or src.next_matches(/<[\w\.]+\@/) read_email_el(src, con) elsif src.next_matches(/<\w+:/) read_url_el(src, con) elsif src.next_matches(/<\w/) #puts "This is HTML: #{src.cur_chars(20)}" read_inline_html(src, con) else #puts "This is NOT HTML: #{src.cur_chars(20)}" con.push_char src.shift_char end end when \\\ d = src.next_char if d == '' src.ignore_chars(2) con.push_element md_entity('apos') elsif d == "" src.ignore_chars(2) con.push_element md_entity('quot') elsif escaped.include? d src.ignore_chars(2) con.push_char d else con.push_char src.shift_char end when [[ if markdown_extra? && src.next_char == ^^ read_footnote_ref(src,con) else read_link(src, con) end when !! if src.next_char == [[ read_image(src, con) else con.push_char src.shift_char end when && # named references if m = src.read_regexp(/\&([\w\d]+);/) con.push_element md_entity(m[1]) # numeric elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/) num = m[1] ? m[2].hex : m[2].to_i con.push_element md_entity(num) else con.push_char src.shift_char end when ** if not src.next_char maruku_error "Opening * as last char.", src, con maruku_recover "Threating as literal" con.push_char src.shift_char else follows = src.cur_chars(4) if follows =~ /^\*\*\*[^\s\*]/ con.push_element read_emstrong(src,'***') elsif follows =~ /^\*\*[^\s\*]/ con.push_element read_strong(src,'**') elsif follows =~ /^\*[^\s\*]/ con.push_element read_em(src,'*') else # * is just a normal char con.push_char src.shift_char end end when __ if not src.next_char maruku_error "Opening _ as last char", src, con maruku_recover "Threating as literal", src, con con.push_char src.shift_char else # we don't want "mod_ruby" to start an emphasis # so we start one only if # 1) there's nothing else in the span (first char) # or 2) the last char was a space # or 3) the current string is empty #if con.elements.empty? || if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0) # also, we check the next characters follows = src.cur_chars(4) if follows =~ /^\_\_\_[^\s\_]/ con.push_element read_emstrong(src,'___') elsif follows =~ /^\_\_[^\s\_]/ con.push_element read_strong(src,'__') elsif follows =~ /^\_[^\s\_]/ con.push_element read_em(src,'_') else # _ is just a normal char con.push_char src.shift_char end else # _ is just a normal char con.push_char src.shift_char end end when {{ # extension if [##, .., ::].include? src.next_char src.ignore_char # { interpret_extension(src, con, [}}]) src.ignore_char # } else con.push_char src.shift_char end when nil maruku_error( ("Unclosed span (waiting for %s"+ "#{exit_on_strings.inspect})") % [ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""], src,con) break else # normal text con.push_char src.shift_char end # end case end # end while true con.push_string_if_present # Assign IAL to elements merge_ial(con.elements, src, con) # Remove leading space if (s = con.elements.first).kind_of? String if s[0] == \ \then con.elements[0] = s[1, s.size-1] end con.elements.shift if s.size == 0 end # Remove final spaces if (s = con.elements.last).kind_of? String s.chop! if s[-1] == \ \ con.elements.pop if s.size == 0 end educated = educate(con.elements) educated end
# File lib/maruku/input/parse_span_better.rb, line 428 def read_strong(src, delim) src.ignore_chars(2) children = read_span(src, EscapedCharInText, nil, [delim]) src.ignore_chars(2) md_strong(children) end
# File lib/maruku/input/parse_span_better.rb, line 332 def read_url(src, break_on) if ['',""].include? src.cur_char error 'Invalid char for url', src end url = read_simple(src, [], break_on) if not url # empty url url = "" end if url[0] == << && url[-1] == >> url = url[1, url.size-2] end if url.size == 0 return nil end url end
# File lib/maruku/input/parse_span_better.rb, line 315 def read_url_el(src,con) src.ignore_char # leading < url = read_simple(src, [], [>>]) src.ignore_char # closing > con.push_element md_url(url) end
# File lib/maruku/input/parse_span_better.rb, line 253 def read_xml_instr_span(src, con) src.ignore_chars(2) # starting <? # read target <?target code... ?> target = if m = src.read_regexp(/(\w+)/) m[1] else '' end delim = "?>" code = read_simple(src, escaped=[], break_on_chars=[], break_on_strings=[delim]) src.ignore_chars delim.size code = (code || "").strip con.push_element md_xml_instr(target, code) end
Generated with the Darkfish Rdoc Generator 2.