class Net::HTTPResponse

Attributes

no_cache[RW]

Public Instance Methods

body_charset(str=self.raw_body) click to toggle source
# File lib/rbot/core/utils/httputil.rb, line 32
def body_charset(str=self.raw_body)
  ctype = self['content-type'] || 'text/html'
  return nil unless ctype =~ %r^text/ || ctype =~ %rx(ht)?ml/

  charsets = ['latin1'] # should be in config

  if ctype.match(%rcharset=["']?([^\s"']+)["']?/)
    charsets << $1
    debug "charset #{charsets.last} added from header"
  end

  case str
  when %r<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/
    charsets << $1
    debug "xml charset #{charsets.last} added from xml pi"
  when %r<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/
    meta = $1
    if meta =~ %rcharset=['"]?([^\s'";]+)['"]?/
      charsets << $1
      debug "html charset #{charsets.last} added from meta"
    end
  end
  return charsets.uniq
end
body_to_utf(str) click to toggle source
# File lib/rbot/core/utils/httputil.rb, line 57
def body_to_utf(str)
  charsets = self.body_charset(str) or return str

  charsets.reverse_each do |charset|
    # XXX: this one is really ugly, but i don't know how to make it better
    #  -jsn

    0.upto(5) do |off|
      begin
        debug "trying #{charset} / offset #{off}"
        return Iconv.iconv('utf-8//ignore',
                           charset,
                           str.slice(0 .. (-1 - off))).first
      rescue
        debug "conversion failed for #{charset} / offset #{off}"
      end
    end
  end
  return str
end
cooked_body() click to toggle source
# File lib/rbot/core/utils/httputil.rb, line 126
def cooked_body
  return self.body_to_utf(self.decompress_body(self.raw_body))
end
decompress_body(str) click to toggle source
# File lib/rbot/core/utils/httputil.rb, line 78
def decompress_body(str)
  method = self['content-encoding']
  case method
  when nil
    return str
  when %rgzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
    debug "gunzipping body"
    begin
      return Zlib::GzipReader.new(StringIO.new(str)).read
    rescue Zlib::Error => e
      # If we can't unpack the whole stream (e.g. because we're doing a
      # partial read
      debug "full gunzipping failed (#{e}), trying to recover as much as possible"
      ret = ""
      begin
        Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
          ret << byte
        }
      rescue
      end
      return ret
    end
  when 'deflate'
    debug "inflating body"
    # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
    # -MAX_WBITS stops zlib from looking for a zlib header
    inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
    begin
      return inflater.inflate(str)
    rescue Zlib::Error => e
      raise e
      # TODO
      # debug "full inflation failed (#{e}), trying to recover as much as possible"
    end
  when %r^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/
    # B0rked servers (Freshmeat being one of them) sometimes return the charset
    # in the content-encoding; in this case we assume that the document has
    # a standarc content-encoding
    old_hsh = self.to_hash
    self['content-type']= self['content-type']+"; charset="+method.downcase
    warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
    return str
  else
    debug self.to_hash
    raise "Unhandled content encoding #{method}"
  end
end
partial_body(size=0) { |body_to_utf(decompress_body)| ... } click to toggle source

Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.

# File lib/rbot/core/utils/httputil.rb, line 132
def partial_body(size=0, &block)

  partial = String.new

  if @read
    debug "using body() as partial"
    partial = self.body
    yield self.body_to_utf(self.decompress_body(partial)) if block_given?
  else
    debug "disabling cache"
    self.no_cache = true
    self.read_body { |chunk|
      partial << chunk
      yield self.body_to_utf(self.decompress_body(partial)) if block_given?
      break if size and size > 0 and partial.length >= size
    }
  end

  return self.body_to_utf(self.decompress_body(partial))
end