Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document is created by parsing an XML document. See Nokogiri.XML()
For searching a Document, see Nokogiri::XML::Node#css and Nokogiri::XML::Node#xpath
Create a new document with version (defaults to “1.0“)
static VALUE new(int argc, VALUE *argv, VALUE klass) { xmlDocPtr doc; VALUE version, rest, rb_doc ; rb_scan_args(argc, argv, "0*", &rest); version = rb_ary_entry(rest, (long)0); if (NIL_P(version)) version = rb_str_new2("1.0"); doc = xmlNewDoc((xmlChar *)StringValuePtr(version)); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
Parse an XML file. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
# File lib/nokogiri/xml/document.rb, line 19 19: def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block 20: 21: options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options 22: # Give the options to the user 23: yield options if block_given? 24: 25: if string_or_io.respond_to?(:read) 26: url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil 27: return read_io(string_or_io, url, encoding, options.to_i) 28: end 29: 30: # read_memory pukes on empty docs 31: return new if string_or_io.nil? or string_or_io.empty? 32: 33: read_memory(string_or_io, url, encoding, options.to_i) 34: end
Create a new document from an IO object
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = xmlReadIO( (xmlInputReadCallback)io_read_callback, (xmlInputCloseCallback)io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Create a new document from a String
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
# File lib/nokogiri/xml/document.rb, line 176 176: def add_child child 177: raise "Document already has a root node" if root 178: if child.type == Node::DOCUMENT_FRAG_NODE 179: raise "Document cannot have multiple root nodes" if child.children.size > 1 180: super(child.children.first) 181: else 182: super 183: end 184: end
Recursively get all namespaces from this node and its subtree and return them as a hash.
For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:hello="world" /> </root>
This method will return:
{ 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
WARNING: this method will clobber duplicate names in the keys. For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:foo="baz" /> </root>
The hash returned will look like this: { ‘xmlns:foo’ => ‘bar’ }
# File lib/nokogiri/xml/document.rb, line 116 116: def collect_namespaces 117: ns = {} 118: traverse { |j| ns.merge!(j.namespaces) } 119: ns 120: end
Create a CDATA element containing text
# File lib/nokogiri/xml/document.rb, line 80 80: def create_cdata text 81: Nokogiri::XML::CDATA.new(self, text.to_s) 82: end
Create an element with name, and optionally setting the content and attributes.
doc.create_element "div" # <div></div> doc.create_element "div", :class => "container" # <div class='container'></div> doc.create_element "div", "contents" # <div>contents</div> doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div> doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
# File lib/nokogiri/xml/document.rb, line 53 53: def create_element name, *args, &block 54: elm = Nokogiri::XML::Element.new(name, self, &block) 55: args.each do |arg| 56: case arg 57: when Hash 58: arg.each { |k,v| 59: key = k.to_s 60: if key =~ /^xmlns(:\w+)?$/ 61: ns_name = key.split(":", 2)[1] 62: elm.add_namespace_definition ns_name, v 63: next 64: end 65: elm[k.to_s] = v.to_s 66: } 67: else 68: elm.content = arg 69: end 70: end 71: elm 72: end
Create a new entity named name.
type is an integer representing the type of entity to be created, and it defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See the constants on Nokogiri::XML::EntityDecl for more information.
external_id, system_id, and content set the External ID, System ID, and content respectively. All of these parameters are optional.
static VALUE create_entity(int argc, VALUE *argv, VALUE self) { VALUE name; VALUE type; VALUE external_id; VALUE system_id; VALUE content; xmlEntityPtr ptr; xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id, &content); xmlResetLastError(); ptr = xmlAddDocEntity( doc, (xmlChar *)(NIL_P(name) ? NULL : StringValuePtr(name)), (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)), (xmlChar *)(NIL_P(external_id) ? NULL : StringValuePtr(external_id)), (xmlChar *)(NIL_P(system_id) ? NULL : StringValuePtr(system_id)), (xmlChar *)(NIL_P(content) ? NULL : StringValuePtr(content)) ); if(NULL == ptr) { xmlErrorPtr error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not create entity"); return Qnil; } return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr); }
Create a text node with text
# File lib/nokogiri/xml/document.rb, line 75 75: def create_text_node text, &block 76: Nokogiri::XML::Text.new(text.to_s, self, &block) 77: end
Apply any decorators to node
# File lib/nokogiri/xml/document.rb, line 149 149: def decorate node 150: return unless @decorators 151: @decorators.each { |klass,list| 152: next unless node.is_a?(klass) 153: list.each { |moodule| node.extend(moodule) } 154: } 155: end
Get the list of decorators given key
# File lib/nokogiri/xml/document.rb, line 123 123: def decorators key 124: @decorators ||= Hash.new 125: @decorators[key] ||= [] 126: end
A reference to self
# File lib/nokogiri/xml/document.rb, line 90 90: def document 91: self 92: end
Copy this Document. An optional depth may be passed in, but it defaults to a deep copy. 0 is a shallow copy, 1 is a deep copy.
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) { xmlDocPtr doc, dup; VALUE level; if(rb_scan_args(argc, argv, "01", &level) == 0) level = INT2NUM((long)1); Data_Get_Struct(self, xmlDoc, doc); dup = xmlCopyDoc(doc, (int)NUM2INT(level)); if(dup == NULL) return Qnil; dup->type = doc->type; return Nokogiri_wrap_xml_document(rb_obj_class(self), dup); }
Get the encoding for this Document
static VALUE encoding(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->encoding) return Qnil; return NOKOGIRI_STR_NEW2(doc->encoding); }
Set the encoding string for this Document
static VALUE set_encoding(VALUE self, VALUE encoding) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding)); return encoding; }
Create a Nokogiri::XML::DocumentFragment from tags Returns an empty fragment if tags is nil.
# File lib/nokogiri/xml/document.rb, line 168 168: def fragment tags = nil 169: DocumentFragment.new(self, tags, self.root) 170: end
The name of this document. Always returns “document“
# File lib/nokogiri/xml/document.rb, line 85 85: def name 86: 'document' 87: end
Get the hash of namespaces on the root Nokogiri::XML::Node
# File lib/nokogiri/xml/document.rb, line 161 161: def namespaces 162: root ? root.namespaces : {} 163: end
Remove all namespaces from all nodes in the document.
This could be useful for developers who either don’t understand namespaces or don’t care about them.
The following example shows a use case, and you can decide for yourself whether this is a good thing or not:
doc = Nokogiri::XML <<-EOXML <root> <car xmlns:part="http://general-motors.com/"> <part:tire>Michelin Model XGV</part:tire> </car> <bicycle xmlns:part="http://schwinn.com/"> <part:tire>I'm a bicycle tire!</part:tire> </bicycle> </root> EOXML doc.xpath("//tire").to_s # => "" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>" doc.remove_namespaces! doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
For more information on why this probably is not a good thing in general, please direct your browser to tenderlovemaking.com/2009/04/23/namespaces-in-xml/
VALUE remove_namespaces_bang(VALUE self) { xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); recursively_remove_namespaces_from_node((xmlNodePtr)doc); return self; }
Get the root node for this document.
static VALUE root(VALUE self) { xmlDocPtr doc; xmlNodePtr root; Data_Get_Struct(self, xmlDoc, doc); root = xmlDocGetRootElement(doc); if(!root) return Qnil; return Nokogiri_wrap_xml_node(Qnil, root) ; }
Set the root element on this document
static VALUE set_root(VALUE self, VALUE root) { xmlDocPtr doc; xmlNodePtr new_root; xmlNodePtr old_root; Data_Get_Struct(self, xmlDoc, doc); old_root = NULL; if(NIL_P(root)) { old_root = xmlDocGetRootElement(doc); if(old_root) { xmlUnlinkNode(old_root); NOKOGIRI_ROOT_NODE(old_root); } return root; } Data_Get_Struct(root, xmlNode, new_root); /* If the new root's document is not the same as the current document, * then we need to dup the node in to this document. */ if(new_root->doc != doc) { old_root = xmlDocGetRootElement(doc); if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) { rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); } } xmlDocSetRootElement(doc, new_root); if(old_root) NOKOGIRI_ROOT_NODE(old_root); return root; }
Explore a document with shortcut methods.
# File lib/nokogiri/xml/document.rb, line 138 138: def slop! 139: unless decorators(XML::Node).include? Nokogiri::Decorators::Slop 140: decorators(XML::Node) << Nokogiri::Decorators::Slop 141: decorate! 142: end 143: 144: self 145: end
Get the url name for this document.
static VALUE url(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL); return Qnil; }
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.