Source file: /~heha/hs/cdcat1plugin.zip/src-linux/xml.cpp

#include "xml.h"

#include <unistd.h>	  // read
#include <cstdio>	  // vsnprintf
#include <cstdarg>	  // va_list

Xml::Node*Xml::newNode(Xml::Node::type_t t) {
 return new Node(this,t);
}

// in-place substitution (<s> gets shorter)
static int memunescape(char*s,int l) {
 int r=0;
 for(char*d=s;l;--l) {
  char c=*s++;
  if (c=='&') {
   char*q=(char*)memchr(s,';',l);
   if (q) {
    static const char such[]="gt\0lt\0amp\0quot\0apos\0";
    static const char ersetz[]="><&\"'";
    // TODO: Die numerischen Formen "&#123;" und "&#x12;" ebenfalls ersetzen!
    for (const char*su=such,*er=ersetz;*su;++er) {
     size_t lsu=strlen(su);
     if (s+lsu==q && !memcmp(s,su,lsu)) {
      s=q+1; l-=lsu+1; c=*er; break;
     }
     su+=lsu+1;
    }
   }
  }
  *d++=c;
  r++;
 }
 return r;
}

char*Xml::newString() {
 sb.slen=memunescape(sb.s,sb.slen);
 char*p=new char[sb.slen+1];
 memcpy(p,sb.s,sb.slen);
 p[sb.slen]=0;
 return p;
}


// Speicherblock lesen (kann bei advance() passieren)
bool Xml::readNext() {
 debug("%s(), hStream=%d",__FUNCTION__,hStream);
 idx=0; len=0;
 extern volatile bool wantStop;
 if (wantStop) return false;
 ssize_t br=read(hStream,buf,BUFSIZE);
 if (br<=0) return false;
 len=br;
 return true;
}

// el = Element node newly created
// idx = position in buf (onto whitespace or ">" or "/>" or "?>")
// On return: idx onto ">" or "/>" or "?>" or erraneous character
// attribute nodes added to el->sub
int Xml::parseAttr(Node*el, int c) {
 debug("%s(\"%s\",'%c')",__FUNCTION__,el->name,c);
 for(;;) {
  if (c<=' ') {
   if ((c=get())<0) return c;
   continue;	// skip whitespace
  }
  switch (c) {
   case '>': 
   case '?':
   case '/': return c;	// fertig
  }
  Node*k=newNode(Node::Attr);
  sb.clear();
  for(;;) {
   if (!sb.push(c)) return c;
   if ((c=get())<0) return c;
   if (c<=' ' || c=='=' || c=='>' || c=='?') break;
  }
  k->name=newString();
  if (c=='=') {
   int q=get();
   if (q!='"' && q!='\'') return c;	// must be in single or double quotes for XML, not for HTML
   sb.clear();
   for(;;) {
    if ((c=get())<0) return c;
    if (c==q) break;
    if (!sb.push(c)) return c;
   }
   k->value=newString();
   if ((c=get())<0) return c;		// Zeichen hinter Quote
  }
  el->add_child_back(k);
  debug("Node::Attr(%s=\"%s\")",k->name,k->value);
 }
}

bool Xml::parse() {
 line=pos=0;
 buf=new char[len=BUFSIZE];
 bool ret= readNext() && innerParse();
 delete buf; buf=0;
 return ret;
}

bool Xml::innerParse() {
 debug("%s Start",__FUNCTION__);
 Node*cur=root;
 for (int c=get();c>=0;) if (c=='<') switch (c=get()) {
  case '/': {		// end of element
   if (cur->type!=Node::Element) return false;
   sb.clear();
   for(;;) {
    if ((c=get())<0) return false;
    if (c<=' ') break;
    if (c=='>') break;			// locate end of tag
    if (!sb.push(c)) return false;
   }
   if (sb.slen	// Not only "</>"
   && sb!=cur->name) return false; // Error! Tag must be the same
   while(c!='>') {	// locate end marker (Bug: skipping false attributes)
    if ((c=get())<0) return false;
   }
   cur=cur->parent;	// ascend
   c=get();	// das Zeichen hinter '>'
  }break;
  case '?': {
   Node*n=newNode(Node::ProcessingInstruction);
   sb.clear();
   for(;;) {
    if ((c=get())<0) return false;
    if (c<=' ' || c=='>' || c=='/' || c=='?') break;
    if (!sb.push(c)) return false;
   }
   n->name=newString();
   if ((c=parseAttr(n,c))<0) return false;
   if (c!='?') return false;	// wrong termination
   if ((c=get())!='>') return false;
   cur->add_child_back(n);
   c=get();	// das Zeichen hinter '>'
  }break;
  case '!': {
   sb.clear();
   for(;;) {
    if ((c=get())<0) return false;
    if (c=='>') return false;
    if (sb=="DOCTYPE" && (unsigned char)c<=' ') {
     Node*n=newNode(Node::DocumentType);
     do{
      if ((c=get())<0) return false;
     }while (c<=' ');
     sb.clear();
     while (c!='>') {
      if (!sb.push(c)) return false;
      if ((c=get())<0) return false;
     }
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (!sb.push(c)) return false;
    if (sb=="--") {
     Node*n=newNode(Node::Comment);
     sb.clear();
     do{
      if ((c=get())<0) return false;
      if (!sb.push(c)) return false;
     }while (!sb.endsWith("-->"));
     sb.slen-=3;
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (sb=="[CDATA[") {
     Node*n=newNode(Node::CDATASection);
     sb.clear();
     do{
      if ((c=get())<0) return false;
      if (!sb.push(c)) return false;
     }while (!sb.endsWith("]]>"));
     sb.slen-=3;
     n->value=newString();
     cur->add_child_back(n);
     break;
    }
    if (c<=' ') return false;
   }//for
   c=get();	// das Zeichen hinter '>'
  }break;
  default:{
   Node*n=newNode(Node::Element);
   sb.clear();
   while ((unsigned char)c<=' ') if ((c=get())<0) return false;
   if (!('A'<=c && c<='Z') && !('a'<=c && c<='z')) return false;
   do{
    if (!sb.push(c)) return false;
    if ((c=get())<0) return false;
   }while ('0'<=c && c<='9' || 'A'<=c && c<='Z' || 'a'<=c && c<='z');
   n->name=newString();
   cur->add_child_back(n);
   switch (c=parseAttr(n,c)) {
    case '/': if ((c=get())!='>') return false; break;
    case '>': cur=n; break;	// descend
    default: return false;	// not allowed end marker
   }
   debug("Node::Element(\"%s\")",n->name);
   c=get();	// das Zeichen hinter '>'
  }
 }else{
  Node*n=newNode(Node::Text);
  sb.clear();
  do{
   if (!sb.push(c)) return false;
   if ((c=get())<0) break;
  }while (c!='<');
  n->value=newString();
  cur->add_child_back(n);
//  debug("Node::Text(\"%s\")",n->value);
 }
 debug("fertig");
 return cur->type==Node::root;	// true when root at end of input
}

const Xml::Node*Xml::Node::findNode(Node::type_t t,const char*tag) const{
 const Node*ret=0;
 auto f=[](const Node*n,void*p)->bool {*(const Node**)p=n; return false;};
 enumNodes(t,tag,f,&ret);
 return ret;
}

// Diese Funktion muss für this==nullptr funktionieren und dann <true> liefern
bool Xml::Node::enumNodes(Node::type_t t,const char*tag,bool(*cb)(const Node*,void*),void*p) const{
 if (!this) debug("h");
 for(const Node*n=this;n;n=n->next) {
  if ((t==-1 || n->type==t) && (!tag || !strcmp(n->name,tag)) && !cb(n,p)) return false;
  if (!n->sub) continue;	// geht sonst nicht unter Linux
  if (!n->sub->enumNodes(t,tag,cb,p)) return false;	// recurse through sub-tree
 }
 return true;
}

const Xml::Node*Xml::Node::findChildNode(Node::type_t t,const char*tag) const{
 for(const Node*n=sub;n;n=n->next) {
  if ((t==-1 || n->type==t) && !strcmp(n->name,tag)) return n;
 }
 return 0;
}

const Xml::Node*Xml::Node::findChildNode(Node::type_t t,bool(*test)(const Node*,void*),void*p) const{
 for(const Node*n=sub;n;n=n->next) {
  if ((t==-1 || n->type==t) && test(n,p)) return n;
 }
 return 0;
}

Xml::Node::~Node() {
 prev->next=next;
 if (next) next->prev=prev;
 if (parent && parent->sub==this) parent->sub=next;
 parent=0;
}

bool Xml::serialize(const Node*p) {
 switch (p->type) {
  case Node::Element: out("<%s",p->name); break;
  // TODO: Attribute dahinter!
  case Node::Text: out("%s",p->value); break;
  case Node::ProcessingInstruction: out("<?%s",p->value); break;
  case Node::Comment: out("<!--%s-->",p->value); break;
 }
 for (const Node*n=p->sub;n;n=n->next) serialize(n);
 switch (p->type) {
  case Node::Element: out("</%s>",p->name); break;
 }
 return true;
}

bool Xml::out(const char*t,...) {
 char buf[4000];
 va_list va;
 va_start(va,t);
 int l=vsnprintf(buf,sizeof buf,t,va);
 va_end(va);
 ssize_t bw=write(hStream,buf,l);
 if (bw!=l) return false;
 return true;
}
Detected encoding: UTF-80