#include "xml.h"
Xml::Xml(HANDLE h):hStream(h) {
hHeap=HeapCreate(HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE,0,0);
root=newNode(Node::root);
}
Xml::~Xml() {
HeapDestroy(hHeap);
}
Xml::Node*Xml::newNode(Xml::Node::type_t t) {
// MSVC6 kann anscheinend kein Placement-New
Node*p=(Node*)HeapAlloc(hHeap,HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,sizeof(Node));
p->xml=this;
p->type=t;
p->prev=p;
return p;
}
// in-place substitution (<s> gets shorter)
static int memunescape(char*s,int l) {
int r=0;
for(char*d=s;l;--l) {
char c=*s++;
if (c=='&') {
char*q=(char*)memchr(s,';',l);
if (q) {
static const char such[]="gt\0lt\0amp\0quot\0apos\0";
static const char ersetz[]="><&\"'";
// TODO: Die numerischen Formen "{" und "" ebenfalls ersetzen!
for (const char*su=such,*er=ersetz;*su;++er) {
size_t lsu=strlen(su);
if (s+lsu==q && !memcmp(s,su,lsu)) {
s=q+1; l-=lsu+1; c=*er; break;
}
su+=lsu+1;
}
}
}
*d++=c;
r++;
}
return r;
}
char*Xml::newString() {
sb.slen=memunescape(sb.s,sb.slen);
char*p=(char*)HeapAlloc(hHeap,HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE,sb.slen+1);
memcpy(p,sb.s,sb.slen);
p[sb.slen]=0;
return p;
}
// Speicherblock lesen (kann bei advance() passieren)
bool Xml::readNext() {
idx=0; len=0;
extern volatile bool wantStop;
if (wantStop) return false;
DWORD br;
if (!ReadFile(hStream,buf,BUFSIZE,&br,0)) return false; // Fehler
if (!br) return false; //End-Of-File
len=br;
return true;
}
// el = Element node newly created
// idx = position in buf (onto whitespace or ">" or "/>" or "?>")
// On return: idx onto ">" or "/>" or "?>" or erraneous character
// attribute nodes added to el->sub
int Xml::parseAttr(Node*el, int c) {
for(;;) {
if (c<=' ') {
if ((c=get())<0) return c;
continue; // skip whitespace
}
switch (c) {
case '>':
case '?':
case '/': return c; // fertig
}
Node*k=newNode(Node::Attr);
sb.clear();
for(;;) {
if (!sb.push(c)) return c;
if ((c=get())<0) return c;
if (c<=' ' || c=='=' || c=='>' || c=='?') break;
}
k->name=newString();
if (c=='=') {
int q=get();
if (q!='"' && q!='\'') return c; // must be in single or double quotes for XML, not for HTML
sb.clear();
for(;;) {
if ((c=get())<0) return c;
if (c==q) break;
if (!sb.push(c)) return c;
}
k->value=newString();
if ((c=get())<0) return c; // Zeichen hinter Quote
}
el->add_child_back(k);
}
}
bool Xml::parse() {
line=pos=0;
buf=new char[len=BUFSIZE];
bool ret=readNext() && !(ec=innerParse());
delete buf; buf=0;
return ret;
}
enum{
eUnexpectedEOF=0x10,
eIncomplete=0x20,
eNotInsideElement=0xF0,
eBufferOverflow=0xE0,
eNoMatchingTag=0xD0,
eWrongEnd=0xC0,
eWrongChar=0xB0,
};
int Xml::innerParse() {
Node*cur=root;
for (int c=get();c>=0;) if (c=='<') switch (c=get()) {
case '/': { // end of element
if (cur->type!=Node::Element) return eNotInsideElement;
sb.clear();
for(;;) {
if ((c=get())<0) return eUnexpectedEOF;
if (c<=' ') break;
if (c=='>') break; // locate end of tag
if (!sb.push(c)) return eBufferOverflow;
}
if (sb.slen // Not only "</>"
&& sb!=cur->name) return eNoMatchingTag; // Error! Tag must be the same
while(c!='>') { // locate end marker (Bug: skipping false attributes)
if ((c=get())<0) return eUnexpectedEOF+1;
}
cur=cur->parent; // ascend
c=get(); // das Zeichen hinter '>'
}break;
case '?': {
Node*n=newNode(Node::ProcessingInstruction);
sb.clear();
for(;;) {
if ((c=get())<0) return eUnexpectedEOF+2;
if (c<=' ' || c=='>' || c=='/' || c=='?') break;
if (!sb.push(c)) return eBufferOverflow+1;
}
n->name=newString();
if ((c=parseAttr(n,c))<0) return eUnexpectedEOF+3;
if (c!='?') return eWrongEnd; // wrong termination
if ((c=get())!='>') return eWrongEnd+1;
cur->add_child_back(n);
c=get(); // das Zeichen hinter '>'
}break;
case '!': {
sb.clear();
for(;;) {
if ((c=get())<0) return eUnexpectedEOF+4;
if (c=='>') return eWrongEnd+2;
if (sb=="DOCTYPE" && (unsigned char)c<=' ') {
Node*n=newNode(Node::DocumentType);
do{
if ((c=get())<0) return eUnexpectedEOF+5;
}while (c<=' ');
sb.clear();
while (c!='>') {
if (!sb.push(c)) return eBufferOverflow+2;
if ((c=get())<0) return eUnexpectedEOF+6;
}
n->value=newString();
cur->add_child_back(n);
break;
}
if (!sb.push(c)) return eBufferOverflow+3;
if (sb=="--") {
Node*n=newNode(Node::Comment);
sb.clear();
do{
if ((c=get())<0) return eUnexpectedEOF+7;
if (!sb.push(c)) return eBufferOverflow+4;
}while (!sb.endsWith("-->"));
sb.slen-=3;
n->value=newString();
cur->add_child_back(n);
break;
}
if (sb=="[CDATA[") {
Node*n=newNode(Node::CDATASection);
sb.clear();
do{
if ((c=get())<0) return eUnexpectedEOF+8;
if (!sb.push(c)) return eBufferOverflow+5;
}while (!sb.endsWith("]]>"));
sb.slen-=3;
n->value=newString();
cur->add_child_back(n);
break;
}
if (c<=' ') return eWrongEnd+3;
}//for
c=get(); // das Zeichen hinter '>'
}break;
default:{
Node*n=newNode(Node::Element);
sb.clear();
while ((unsigned char)c<=' ') if ((c=get())<0) return eUnexpectedEOF+9;
if (!('A'<=c && c<='Z') && !('a'<=c && c<='z')) return eWrongChar;
do{
if (!sb.push(c)) return eBufferOverflow+6;
if ((c=get())<0) return eUnexpectedEOF+10;
}while ('0'<=c && c<='9' || 'A'<=c && c<='Z' || 'a'<=c && c<='z');
n->name=newString();
cur->add_child_back(n);
switch (c=parseAttr(n,c)) {
case '/': if ((c=get())!='>') return eWrongEnd+4; break;
case '>': cur=n; break; // descend
default: return eWrongEnd+5; // not allowed end marker
}
c=get(); // das Zeichen hinter '>'
}
}else{
Node*n=newNode(Node::Text);
sb.clear();
do{
if (!sb.push(c)) return eBufferOverflow+7;
if ((c=get())<0) break;
}while (c!='<');
n->value=newString();
cur->add_child_back(n);
}
if (cur->type!=Node::root) return eIncomplete; // true when root at end of input
return 0;
}
static bool stop_and_save(const Xml::Node*n,void*param) {
*(const Xml::Node**)param=n;
return false;
}
const Xml::Node*Xml::Node::findNode(Node::type_t t,const char*tag) const{
Node*ret=0;
enumNodes(t,tag,stop_and_save,&ret);
return ret;
}
bool Xml::Node::enumNodes(Node::type_t t,const char*tag,bool(*cb)(const Node*,void*),void*param) const{
for(const Node*n=this;n;n=n->next) {
if ((t==-1 || n->type==t) && (!tag || !strcmp(n->name,tag)) && !cb(n,param)) return false;
if (!n->sub->enumNodes(t,tag,cb,param)) return false; // recurse through sub-tree
}
return true;
}
const Xml::Node*Xml::Node::findChildNode(Node::type_t t,const char*tag) const{
for(Node*n=sub;n;n=n->next) {
if ((t==-1 || n->type==t) && !strcmp(n->name,tag)) return n;
}
return 0;
}
const Xml::Node*Xml::Node::findChildNode(Node::type_t t,bool(*test)(const Node*,void*),void*p) const{
for(Node*n=sub;n;n=n->next) {
if ((t==-1 || n->type==t) && test(n,p)) return n;
}
return 0;
}
Xml::Node::~Node() {
prev->next=next;
if (next) next->prev=prev;
if (parent && parent->sub==this) parent->sub=next;
parent=0;
}
bool Xml::serialize(const Node*p) {
switch (p->type) {
case Node::Element: out("<%s",p->name); break;
// TODO: Attribute dahinter!
case Node::Text: out("%s",p->value); break;
case Node::ProcessingInstruction: out("<?%s",p->value); break;
case Node::Comment: out("<!--%s-->",p->value); break;
}
for (const Node*n=p->sub;n;n=n->next) serialize(n);
switch (p->type) {
case Node::Element: out("</%s>",p->name); break;
}
return true;
}
bool _cdecl Xml::out(const char*t,...) {
char buf[4000];
va_list va;
va_start(va,t);
int l=_vsnprintf(buf,sizeof buf,t,va);
va_end(va);
DWORD bw;
if (!WriteFile(hStream,buf,l,&bw,0)) return false;
if (int(bw)!=l) return false;
return true;
}
Vorgefundene Kodierung: UTF-8 | 0
|