#include <cassert> // assert()
#include <cstring> // memcpy(), memcmp(), strcmp(), strlen()
void debug(const char*t,...);
bool memstrcmp(const char*s, const char*e, const char*v);
struct Xml {
struct Node{
Xml*xml; // Zeiger hoch zum „Dokument“
enum type_t{ //scope sub name value
root, //0..len PI,Element,Comment 0 0
Element, //<tag..> Attr;Element,Text,Comm. tag 0
Attr, //tag="value" 0 tag value
Text, //text Entity 0 text
CDATASection, //<![CDATA[var]]> 0 0 var
EntityReference, //?
Entity, //&tag; 0 tag 0
ProcessingInstruction,//<?tag ?> Attr tag between tag and ?>
Comment, //<!-- --> 0 0 between <!-- and -->
Document, //HTML only
DocumentType, //<!DOCTYPE >
DocumentFragment, //HTML only
Notation //?
}type;
const char*name;
const char*value;
/* Knotenverkettung am Beispiel: 3 Knoten, der mittlere hat kein Kindelement
Während „next“ am Ende 0 ist, zeigt „prev“ auf das letzte Element.
Von einem beliebigen Knoten (außer root) ist „parent->sub->prev“ das letzte Element.
^
+---<-------+---<-------+
+--------- | -->------ | -->------ | -+
| | | | |
| +------+ | +------+ | +------+ | |
| | Node | | | Node | | | Node | | |
| |parent|-+ |parent|-+ |parent|-+ |
| | next |--->| next |--->| next |>0 |
+-| prev |<---| prev |<---| prev |<---+
| sub |-+ | sub |>0 | sub |-+
+------+ | +------+ +------+ |
v v
*/
Node*next,*prev,*parent,*sub;
Node(Xml*ml,type_t t=root) {memset(this,0,sizeof*this); xml=ml; type=t; prev=this;}
void add_child_back(Node*n) {
if (sub) sub->add_sibling_back(n);
else{sub=n;n->parent=this;}}
void add_sibling_back(Node*n) {
assert(isFirstChild());
n->parent=parent; prev->next=n; n->prev=prev; prev=n;}
void add_sibling_front(Node*n) {
assert(isFirstChild());
n->parent=parent; parent->sub=n; n->next=this; n->prev=prev; prev=n;}
bool isChild(Node*n) const {return n->parent==this;}
bool isLastChild() const {return !next;}
bool isFirstChild() const {return this==parent->sub;}
const Node*findElement(const char*tag=0) const {return findNode(Node::Element,tag);}
const Node*findNode(Node::type_t t,const char*tag=0) const;
bool enumNodes(Node::type_t t,const char*tag,bool(*)(const Node*,void*),void*) const;
const Node*findChildNode(Node::type_t t,const char*tag) const;
const Node*findChildNode(Node::type_t t,bool(*)(const Node*,void*),void*) const;
~Node(); // aushängen
//private:
// static bool stop_and_save(const Node*n,void*param) {*(const Node**)param=n; return false;}
};
Node*root;
unsigned line,pos; // Stelle des Fehlers bei parse(), 0-basiert
int hStream;
Xml() {memset(this,0,sizeof*this); root=newNode();}
~Xml() {delete root;}
bool parse(); // linearisierte Version
bool serialize() {return serialize(root);}
enum{BUFSIZE=1024*1024};
private:
char*buf;
unsigned idx,len;
template<int MAX> struct Stringbuf{
unsigned slen;
char s[MAX];
Stringbuf():slen(0) {}
void clear() {slen=0;}
bool push(char c) {if (slen>=sizeof s) return false; s[slen++]=c; return true;}
bool operator==(const char*v) const {return memstrcmp(s,s+slen,v);}
bool operator!=(const char*v) const {return !operator==(v);}
bool endsWith(const char*v) const {size_t lv=strlen(v); return lv>=slen && !memcmp(s+slen-lv,v,lv);}
};
int get() {
if (idx==len) readNext(); // Puffer ausgelesen? Nächsten Block lesen!
if (idx==len) return -1; // Puffer immer noch ausgelesen? EOF liefern
int ret=(unsigned char)buf[idx++]; // Zeichen liefern
if (ret=='\n') {++line; pos=0;} else ++pos; // Zeilen und Spalten mitzählen
return ret;
}
bool readNext();
bool innerParse();
int parseAttr(Node*el,int c);
bool serialize(const Node*);
char*newString();
bool out(const char*t,...);
Node*newNode(Node::type_t t=Node::root);
Stringbuf<512>sb; // Nur für den Parser
};
Vorgefundene Kodierung: UTF-8 | 0
|