Source file: /~heha/hs/cdcat1plugin.zip/src/xml.cpp

#include "xml.h"

Xml::Xml(HANDLE h):hStream(h) {
 hHeap=HeapCreate(HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE,0,0);
 root=newNode(Node::root);
}

Xml::~Xml() {
 HeapDestroy(hHeap);
}

Xml::Node*Xml::newNode(Xml::Node::type_t t) {
// MSVC6 kann anscheinend kein Placement-New
 Node*p=(Node*)HeapAlloc(hHeap,HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,sizeof(Node));
 p->xml=this;
 p->type=t;
 p->prev=p;
 return p;
}

// in-place substitution (<s> gets shorter)
static int memunescape(char*s,int l) {
 int r=0;
 for(char*d=s;l;--l) {
  char c=*s++;
  if (c=='&') {
   char*q=(char*)memchr(s,';',l);
   if (q) {
    static const char such[]="gt\0lt\0amp\0quot\0apos\0";
    static const char ersetz[]="><&\"'";
    // TODO: Die numerischen Formen "&#123;" und "&#x12;" ebenfalls ersetzen!
    for (const char*su=such,*er=ersetz;*su;++er) {
     size_t lsu=strlen(su);
     if (s+lsu==q && !memcmp(s,su,lsu)) {
      s=q+1; l-=lsu+1; c=*er; break;
     }
     su+=lsu+1;
    }
   }
  }
  *d++=c;
  r++;
 }
 return r;
}

char*Xml::newString() {
 sb.slen=memunescape(sb.s,sb.slen);
 char*p=(char*)HeapAlloc(hHeap,HEAP_GENERATE_EXCEPTIONS|HEAP_NO_SERIALIZE,sb.slen+1);
 memcpy(p,sb.s,sb.slen);
 p[sb.slen]=0;
 return p;
}


// Speicherblock lesen (kann bei advance() passieren)
bool Xml::readNext() {
 idx=0; len=0;
 extern volatile bool wantStop;
 if (wantStop) return false;
 DWORD br;
 if (!ReadFile(hStream,buf,BUFSIZE,&br,0)) return false;	// Fehler
 if (!br) return false; //End-Of-File
 len=br;
 return true;
}

// el = Element node newly created
// idx = position in buf (onto whitespace or ">" or "/>" or "?>")
// On return: idx onto ">" or "/>" or "?>" or erraneous character
// attribute nodes added to el->sub
int Xml::parseAttr(Node*el, int c) {
 for(;;) {
  if (c<=' ') {
   if ((c=get())<0) return c;
   continue;	// skip whitespace
  }
  switch (c) {
   case '>': 
   case '?':
   case '/': return c;	// fertig
  }
  Node*k=newNode(Node::Attr);
  sb.clear();
  for(;;) {
   if (!sb.push(c)) return c;
   if ((c=get())<0) return c;
   if (c<=' ' || c=='=' || c=='>' || c=='?') break;
  }
  k->name=newString();
  if (c=='=') {
   int q=get();
   if (q!='"' && q!='\'') return c;	// must be in single or double quotes for XML, not for HTML
   sb.clear();
   for(;;) {
    if ((c=get())<0) return c;
    if (c==q) break;
    if (!sb.push(c)) return c;
   }
   k->value=newString();
   if ((c=get())<0) return c;		// Zeichen hinter Quote
  }
  el->add_child_back(k);
 }
}

bool Xml::parse() {
 line=pos=0;
 buf=new char[len=BUFSIZE];
 bool ret=readNext() && !(ec=innerParse());
 delete buf; buf=0;
 return ret;
}

enum{
 eUnexpectedEOF=0x10,
 eIncomplete=0x20,
 eNotInsideElement=0xF0,
 eBufferOverflow=0xE0,
 eNoMatchingTag=0xD0,
 eWrongEnd=0xC0,
 eWrongChar=0xB0,
};

int Xml::innerParse() {
 Node*cur=root;
 for (int c=get();c>=0;) if (c=='<') switch (c=get()) {
  case '/': {		// end of element
   if (cur->type!=Node::Element) return eNotInsideElement;
   sb.clear();
   for(;;) {
    if ((c=get())<0) return eUnexpectedEOF;
    if (c<=' ') break;
    if (c=='>') break;			// locate end of tag
    if (!sb.push(c)) return eBufferOverflow;
   }
   if (sb.slen	// Not only "</>"
   && sb!=cur->name) return eNoMatchingTag; // Error! Tag must be the same
   while(c!='>') {	// locate end marker (Bug: skipping false attributes)
    if ((c=get())<0) return eUnexpectedEOF+1;
   }
   cur=cur->parent;	// ascend
   c=get();	// das Zeichen hinter '>'
  }break;
  case '?': {
   Node*n=newNode(Node::ProcessingInstruction);
   sb.clear();
   for(;;) {
    if ((c=get())<0) return eUnexpectedEOF+2;
    if (c<=' ' || c=='>' || c=='/' || c=='?') break;
    if (!sb.push(c)) return eBufferOverflow+1;
   }
   n->name=newString();
   if ((c=parseAttr(n,c))<0) return eUnexpectedEOF+3;
   if (c!='?') return eWrongEnd;	// wrong termination
   if ((c=get())!='>') return eWrongEnd+1;
   cur->add_child_back(n);
   c=get();	// das Zeichen hinter '>'
  }break;
  case '!': {
   sb.clear();
   for(;;) {
    if ((c=get())<0) return eUnexpectedEOF+4;
    if (c=='>') return eWrongEnd+2;
    if (sb=="DOCTYPE" && (unsigned char)c<=' ') {
     Node*n=newNode(Node::DocumentType);
     do{
      if ((c=get())<0) return eUnexpectedEOF+5;
     }while (c<=' ');
     sb.clear();
     while (c!='>') {
      if (!sb.push(c)) return eBufferOverflow+2;
      if ((c=get())<0) return eUnexpectedEOF+6;
     }
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (!sb.push(c)) return eBufferOverflow+3;
    if (sb=="--") {
     Node*n=newNode(Node::Comment);
     sb.clear();
     do{
      if ((c=get())<0) return eUnexpectedEOF+7;
      if (!sb.push(c)) return eBufferOverflow+4;
     }while (!sb.endsWith("-->"));
     sb.slen-=3;
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (sb=="[CDATA[") {
     Node*n=newNode(Node::CDATASection);
     sb.clear();
     do{
      if ((c=get())<0) return eUnexpectedEOF+8;
      if (!sb.push(c)) return eBufferOverflow+5;
     }while (!sb.endsWith("]]>"));
     sb.slen-=3;
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (c<=' ') return eWrongEnd+3;
   }//for
   c=get();	// das Zeichen hinter '>'
  }break;
  default:{
   Node*n=newNode(Node::Element);
   sb.clear();
   while ((unsigned char)c<=' ') if ((c=get())<0) return eUnexpectedEOF+9;
   if (!('A'<=c && c<='Z') && !('a'<=c && c<='z')) return eWrongChar;
   do{
    if (!sb.push(c)) return eBufferOverflow+6;
    if ((c=get())<0) return eUnexpectedEOF+10;
   }while ('0'<=c && c<='9' || 'A'<=c && c<='Z' || 'a'<=c && c<='z');
   n->name=newString();
   cur->add_child_back(n);
   switch (c=parseAttr(n,c)) {
    case '/': if ((c=get())!='>') return eWrongEnd+4; break;
    case '>': cur=n; break;	// descend
    default: return eWrongEnd+5;	// not allowed end marker
   }
   c=get();	// das Zeichen hinter '>'
  }
 }else{
  Node*n=newNode(Node::Text);
  sb.clear();
  do{
   if (!sb.push(c)) return eBufferOverflow+7;
   if ((c=get())<0) break;
  }while (c!='<');
  n->value=newString();
  cur->add_child_back(n);
 }
 if (cur->type!=Node::root) return eIncomplete;	// true when root at end of input
 return 0;
}

static bool stop_and_save(const Xml::Node*n,void*param) {
 *(const Xml::Node**)param=n;
 return false;
}

const Xml::Node*Xml::Node::findNode(Node::type_t t,const char*tag) const{
 Node*ret=0;
 enumNodes(t,tag,stop_and_save,&ret);
 return ret;
}

bool Xml::Node::enumNodes(Node::type_t t,const char*tag,bool(*cb)(const Node*,void*),void*param) const{
 for(const Node*n=this;n;n=n->next) {
  if ((t==-1 || n->type==t) && (!tag || !strcmp(n->name,tag)) && !cb(n,param)) return false;
  if (!n->sub->enumNodes(t,tag,cb,param)) return false;	// recurse through sub-tree
 }
 return true;
}

const Xml::Node*Xml::Node::findChildNode(Node::type_t t,const char*tag) const{
 for(Node*n=sub;n;n=n->next) {
  if ((t==-1 || n->type==t) && !strcmp(n->name,tag)) return n;
 }
 return 0;
}

const Xml::Node*Xml::Node::findChildNode(Node::type_t t,bool(*test)(const Node*,void*),void*p) const{
 for(Node*n=sub;n;n=n->next) {
  if ((t==-1 || n->type==t) && test(n,p)) return n;
 }
 return 0;
}

Xml::Node::~Node() {
 prev->next=next;
 if (next) next->prev=prev;
 if (parent && parent->sub==this) parent->sub=next;
 parent=0;
}

bool Xml::serialize(const Node*p) {
 switch (p->type) {
  case Node::Element: out("<%s",p->name); break;
  // TODO: Attribute dahinter!
  case Node::Text: out("%s",p->value); break;
  case Node::ProcessingInstruction: out("<?%s",p->value); break;
  case Node::Comment: out("<!--%s-->",p->value); break;
 }
 for (const Node*n=p->sub;n;n=n->next) serialize(n);
 switch (p->type) {
  case Node::Element: out("</%s>",p->name); break;
 }
 return true;
}

bool _cdecl Xml::out(const char*t,...) {
 char buf[4000];
 va_list va;
 va_start(va,t);
 int l=_vsnprintf(buf,sizeof buf,t,va);
 va_end(va);
 DWORD bw;
 if (!WriteFile(hStream,buf,l,&bw,0)) return false;
 if (int(bw)!=l) return false;
 return true;
}
Detected encoding: ASCII (7 bit)2