Class Index [+]

Quicksearch

Nokogiri::HTML::Document

Public Class Methods

new click to toggle source

Create a new document

static VALUE new(int argc, VALUE *argv, VALUE klass)
{
  VALUE uri, external_id, rest, rb_doc;
  htmlDocPtr doc;

  rb_scan_args(argc, argv, "0*", &rest);
  uri         = rb_ary_entry(rest, (long)0);
  external_id = rb_ary_entry(rest, (long)1);

  doc = htmlNewDoc(
      RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
      RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
  );
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_obj_call_init(rb_doc, argc, argv);
  return rb_doc ;
}
parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) click to toggle source
 

Parse HTML. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.

    # File lib/nokogiri/html/document.rb, line 64
64:         def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
65: 
66:           options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
67:           # Give the options to the user
68:           yield options if block_given?
69: 
70:           if string_or_io.respond_to?(:encoding)
71:             unless string_or_io.encoding.name == "ASCII-8BIT"
72:               encoding ||= string_or_io.encoding.name
73:             end
74:           end
75: 
76:           if string_or_io.respond_to?(:read)
77:             url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
78:             return read_io(string_or_io, url, encoding, options.to_i)
79:           end
80: 
81:           # read_memory pukes on empty docs
82:           return new if string_or_io.nil? or string_or_io.empty?
83: 
84:           read_memory(string_or_io, url, encoding, options.to_i)
85:         end
read_io(io, url, encoding, options) click to toggle source

Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse

static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,
                      VALUE options )
{
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  htmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  doc = htmlReadIO(
      io_read_callback,
      io_close_callback,
      (void *)io,
      c_url,
      c_enc,
      (int)NUM2INT(options)
  );
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}
read_memory(string, url, encoding, options) click to toggle source

Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse

static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
{
  const char * c_buffer = StringValuePtr(string);
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  int len               = (int)RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  htmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}

Public Instance Methods

fragment(tags = nil) click to toggle source
  

Create a Nokogiri::XML::DocumentFragment from tags

    # File lib/nokogiri/html/document.rb, line 51
51:       def fragment tags = nil
52:         DocumentFragment.new(self, tags, self.root)
53:       end
meta_encoding() click to toggle source
 

Get the meta tag encoding for this document. If there is no meta tag, then nil is returned

    # File lib/nokogiri/html/document.rb, line 7
 7:       def meta_encoding
 8:         return nil unless meta = css('meta').find { |node|
 9:           node['http-equiv'] =~ /Content-Type/
10:         }
11: 
12:         /charset\s*=\s*([\w-]+)/.match(meta['content'])[1]
13:       end
meta_encoding=(encoding) click to toggle source
 

Set the meta tag encoding for this document. If there is no meta content tag, nil is returned and the encoding is not set.

    # File lib/nokogiri/html/document.rb, line 18
18:       def meta_encoding= encoding
19:         return nil unless meta = css('meta').find { |node|
20:           node['http-equiv'] =~ /Content-Type/
21:         }
22: 
23:         meta['content'] = "text/html; charset=%s" % encoding
24:         encoding
25:       end
serialize(options = {}) click to toggle source
  

Serialize Node using options. Save options can also be set using a block. See SaveOptions.

These two statements are equivalent:

 node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)

or

  node.serialize(:encoding => 'UTF-8') do |config|
    config.format.as_xml
  end
    # File lib/nokogiri/html/document.rb, line 41
41:       def serialize options = {}, &block
42:         options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
43:             XML::Node::SaveOptions::AS_HTML |
44:             XML::Node::SaveOptions::NO_DECLARATION |
45:             XML::Node::SaveOptions::NO_EMPTY_TAGS
46:         super
47:       end
type click to toggle source

The type for this document

static VALUE type(VALUE self)
{
  htmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);
  return INT2NUM((long)doc->type);
}

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.