Commit 1988b0b6 authored by Daniel Burke's avatar Daniel Burke

ttxml: covering my shame

parent 4cb3b393
<xmlthisisaverylongtagnameIhopeitmesseswithyourstuff> <xmlthisisaverylongtagnameIhopeitmesseswithyourstuff>
foobar foobar
<one foobar barfoo="Hello \"World\"!" foo=bar> <one foobar barfoo="Hello \"World\"!" foo=bar>
<two/> <two/>
</one> </one>
<one></one> <one></one>
<one></one> <one></one>
</xml> </xml>
<xml/><one barfoo3></one></xml> <xml/><one barfoo3></one></xml>
/* Licensed under GPL - see LICENSE file for details */ /* Licensed under GPL - see LICENSE file for details */
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "ttxml.h" #include "ttxml.h"
#ifndef BUFFER #ifndef BUFFER
#define BUFFER 3264 #define BUFFER 3264
#endif #endif
#define XML_LETTER 1 #define XML_LETTER 1
#define XML_NUMBER 2 #define XML_NUMBER 2
#define XML_SPACE 4 #define XML_SPACE 4
#define XML_SLASH 8 #define XML_SLASH 8
#define XML_OPEN 16 #define XML_OPEN 16
#define XML_EQUALS 32 #define XML_EQUALS 32
#define XML_CLOSE 64 #define XML_CLOSE 64
#define XML_QUOTE 128 #define XML_QUOTE 128
#define XML_OTHER 256 #define XML_OTHER 256
#define XML_ALL 0xFFFFFFFF #define XML_ALL 0xFFFFFFFF
typedef struct XMLBUF typedef struct XMLBUF
{ {
FILE * fptr; FILE * fptr;
char * buf; char * buf;
int len; int len;
int read_index; int read_index;
int eof; int eof;
} XMLBUF; } XMLBUF;
/* Allocate a new XmlNode */ /* Allocate a new XmlNode */
static XmlNode* xml_new(char * name) static XmlNode* xml_new(char * name)
{ {
XmlNode * ret = malloc(sizeof(XmlNode)); XmlNode * ret = malloc(sizeof(XmlNode));
if(!ret)return NULL; if(!ret)return NULL;
ret->attrib = NULL; ret->attrib = NULL;
ret->nattrib = 0; ret->nattrib = 0;
ret->child = ret->next = NULL; ret->child = ret->next = NULL;
ret->name = name; ret->name = name;
return ret; return ret;
} }
/* free a previously allocated XmlNode */ /* free a previously allocated XmlNode */
void xml_free(XmlNode *target) void xml_free(XmlNode *target)
{ {
int i; int i;
for(i=0; i<target->nattrib*2; i++) for(i=0; i<target->nattrib*2; i++)
if(target->attrib[i]) if(target->attrib[i])
free(target->attrib[i]); free(target->attrib[i]);
if(target->attrib)free(target->attrib); if(target->attrib)free(target->attrib);
if(target->child)xml_free(target->child); if(target->child)xml_free(target->child);
if(target->next)xml_free(target->next); if(target->next)xml_free(target->next);
free(target->name); free(target->name);
free(target); free(target);
} }
/* Raise flags if we have a character of special meaning. /* Raise flags if we have a character of special meaning.
* This is where I've hidden the switch statements :-p * This is where I've hidden the switch statements :-p
*/ */
static int is_special(char item) static int is_special(char item)
{ {
if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z')) if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
return XML_LETTER; return XML_LETTER;
if( item >= '0' && item <='9' ) if( item >= '0' && item <='9' )
return XML_NUMBER; return XML_NUMBER;
if( item == 0x20 || item == '\t' || item == 0x0D || item == 0x0A ) if( item == 0x20 || item == '\t' || item == 0x0D || item == 0x0A )
return XML_SPACE; return XML_SPACE;
if( item == '/' ) if( item == '/' )
return XML_SLASH; return XML_SLASH;
if( item == '<' ) if( item == '<' )
return XML_OPEN; return XML_OPEN;
if( item == '=' ) if( item == '=' )
return XML_EQUALS; return XML_EQUALS;
if( item == '>' ) if( item == '>' )
return XML_CLOSE; return XML_CLOSE;
if( item == '"' || item == '\'' ) if( item == '"' || item == '\'' )
return XML_QUOTE; return XML_QUOTE;
return 128; return 128;
} }
/* Refresh the buffer, if possible */ /* Refresh the buffer, if possible */
static void xml_read_file(XMLBUF *xml) static void xml_read_file(XMLBUF *xml)
{ {
int size; int size;
if(xml->eof)return; if(xml->eof)return;
size = fread( xml->buf, 1, xml->len, xml->fptr); size = fread( xml->buf, 1, xml->len, xml->fptr);
if( size != xml->len ) if( size != xml->len )
{ {
xml->len = size; xml->len = size;
xml->buf[size]=0; xml->buf[size]=0;
xml->eof = 1; xml->eof = 1;
} }
} }
/* All reading of the XML buffer done through these two functions */ /* All reading of the XML buffer done through these two functions */
/*** read a byte without advancing the offset */ /*** read a byte without advancing the offset */
static char xml_peek(XMLBUF *xml) static char xml_peek(XMLBUF *xml)
{ {
return xml->buf[xml->read_index]; return xml->buf[xml->read_index];
} }
/*** read a byte and advance the offset */ /*** read a byte and advance the offset */
static char xml_read_byte(XMLBUF *xml) static char xml_read_byte(XMLBUF *xml)
{ {
char ret = xml_peek(xml); char ret = xml_peek(xml);
xml->read_index++; xml->read_index++;
if(xml->read_index >= xml->len) if(xml->read_index >= xml->len)
{ {
if(xml->eof) if(xml->eof)
{ {
xml->read_index = xml->len; xml->read_index = xml->len;
return ret; return ret;
} }
xml->read_index = 0 ; xml->read_index = 0 ;
xml_read_file(xml); xml_read_file(xml);
} }
return ret; return ret;
} }
/* skip over bytes matching the is_special mask */ /* skip over bytes matching the is_special mask */
static void xml_skip( XMLBUF *xml, int mask) static void xml_skip( XMLBUF *xml, int mask)
{ {
while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) ) while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )
xml_read_byte(xml); xml_read_byte(xml);
} }
/* character matching tests for the feed functions */ /* character matching tests for the feed functions */
static char quotechar = 0; static char quotechar = 0;
static int test_quote(const char x) static int test_quote(const char x)
{ {
static int escaped=0; static int escaped=0;
if( escaped || '\\' == x ) if( escaped || '\\' == x )
{ {
escaped = !escaped; escaped = !escaped;
return 1; return 1;
} }
if( x != quotechar ) if( x != quotechar )
return 1; return 1;
return 0; return 0;
} }
static int feed_mask = 0; static int feed_mask = 0;
static int test_mask(const char x) static int test_mask(const char x)
{ {
return !(is_special(x) & feed_mask); return !(is_special(x) & feed_mask);
} }
/* /*
* char* xml_feed(x, test) * char* xml_feed(x, test)
* *
* Reads as many contiguous chars that pass test() into a newly allocated * Reads as many contiguous chars that pass test() into a newly allocated
* string. * string.
* *
* Instead of calling xml_read_byte and flogging realloc() for each byte, * Instead of calling xml_read_byte and flogging realloc() for each byte,
* it checks the buffer itself. * it checks the buffer itself.
*/ */
static char* xml_feed( XMLBUF *xml, int (*test)(char) ) static char* xml_feed( XMLBUF *xml, int (*test)(char) )
{ {
int offset = xml->read_index; int offset = xml->read_index;
int delta; int delta;
char *ret = NULL; char *ret = NULL;
int size = 0; int size = 0;
/* perform first and N middle realloc()'s */ /* perform first and N middle realloc()'s */
while( test(xml->buf[offset]) ) while( test(xml->buf[offset]) )
{ {
offset ++; offset ++;
if(offset >= xml->len) if(offset >= xml->len)
{ {
delta = offset - xml->read_index; delta = offset - xml->read_index;
ret = realloc(ret, size + delta + 1); ret = realloc(ret, size + delta + 1);
memcpy(ret+size, xml->buf + xml->read_index, delta); memcpy(ret+size, xml->buf + xml->read_index, delta);
size += delta; size += delta;
ret[size]=0; ret[size]=0;
if(xml->eof)return ret; if(xml->eof)return ret;
xml_read_file(xml); xml_read_file(xml);
xml->read_index = 0; xml->read_index = 0;
offset = 0; offset = 0;
} }
} }
/* perform final realloc() if needed */ /* perform final realloc() if needed */
if(offset > xml->read_index) if(offset > xml->read_index)
{ {
delta = offset - xml->read_index; delta = offset - xml->read_index;
ret = realloc(ret, size + delta + 1); ret = realloc(ret, size + delta + 1);
memcpy(ret+size, xml->buf + xml->read_index, delta); memcpy(ret+size, xml->buf + xml->read_index, delta);
xml->read_index = offset; xml->read_index = offset;
size += delta; size += delta;
ret[size]=0; ret[size]=0;
} }
return ret; return ret;
} }
/* this reads attributes from tags, of the form... /* this reads attributes from tags, of the form...
* *
* <tag attr1="some arguments" attr2=argument> * <tag attr1="some arguments" attr2=argument>
* *
* It is aware of quotes, and will allow anything inside quoted arguments * It is aware of quotes, and will allow anything inside quoted arguments
*/ */
static void xml_read_attr(struct XMLBUF *xml, XmlNode *node) static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
{ {
int n=0; int n=0;
// how does this tag finish? // how does this tag finish?
while(xml->len) while(xml->len)
{ {
if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) ) if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
return; return;
n = ++node->nattrib; n = ++node->nattrib;
node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) ); node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );
node->attrib[--n*2+1] = 0; node->attrib[--n*2+1] = 0;
feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH; feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
node->attrib[n*2] = xml_feed(xml, test_mask ); node->attrib[n*2] = xml_feed(xml, test_mask );
if( xml_peek(xml) == '=' ) if( xml_peek(xml) == '=' )
{ {
xml_read_byte(xml); xml_read_byte(xml);
if( is_special(xml_peek(xml)) & XML_QUOTE ) if( is_special(xml_peek(xml)) & XML_QUOTE )
{ {
quotechar = xml_read_byte(xml); quotechar = xml_read_byte(xml);
node->attrib[n*2+1] = xml_feed(xml, test_quote); node->attrib[n*2+1] = xml_feed(xml, test_quote);
xml_read_byte(xml); xml_read_byte(xml);
} }
else else
{ {
feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH; feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
node->attrib[n*2+1] = xml_feed(xml, test_mask); node->attrib[n*2+1] = xml_feed(xml, test_mask);
} }
} }
xml_skip(xml, XML_SPACE); xml_skip(xml, XML_SPACE);
} }
} }
/* The big decision maker, is it a regular node, or a text node. /* The big decision maker, is it a regular node, or a text node.
* If it's a node, it will check if it should have children, and if so * If it's a node, it will check if it should have children, and if so
* will recurse over them. * will recurse over them.
* Text nodes don't have children, so no recursing. * Text nodes don't have children, so no recursing.
*/ */
static XmlNode* xml_parse(struct XMLBUF *xml) static XmlNode* xml_parse(struct XMLBUF *xml)
{ {
int offset; int offset;
int toff; int toff;
char *tmp; char *tmp;
XmlNode **this, *ret = NULL; XmlNode **this, *ret = NULL;
this = &ret; this = &ret;
xml_skip(xml, XML_SPACE); // skip whitespace xml_skip(xml, XML_SPACE); // skip whitespace
offset=0; offset=0;
while( (xml->read_index < xml->len) || !xml->eof ) while( (xml->read_index < xml->len) || !xml->eof )
{ {
switch(is_special(xml_peek(xml))) switch(is_special(xml_peek(xml)))
{ {
case XML_OPEN: case XML_OPEN:
xml_read_byte(xml); xml_read_byte(xml);
if(xml_peek(xml) == '/') if(xml_peek(xml) == '/')
return ret; // parents close tag return ret; // parents close tag
// read the tag name // read the tag name
feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE; feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
*this = xml_new( xml_feed(xml, test_mask)); *this = xml_new( xml_feed(xml, test_mask));
xml_skip(xml, XML_SPACE); // skip any whitespace xml_skip(xml, XML_SPACE); // skip any whitespace
xml_read_attr(xml, *this); // read attributes xml_read_attr(xml, *this); // read attributes
// how does this tag finish? // how does this tag finish?
switch(is_special(xml_peek(xml))) switch(is_special(xml_peek(xml)))
{ {
case XML_CLOSE: // child-nodes ahead case XML_CLOSE: // child-nodes ahead
xml_read_byte(xml); xml_read_byte(xml);
(*this)->child = xml_parse(xml); (*this)->child = xml_parse(xml);
xml_skip(xml, XML_ALL ^ XML_CLOSE); xml_skip(xml, XML_ALL ^ XML_CLOSE);
xml_read_byte(xml); xml_read_byte(xml);
break; break;
case XML_SLASH: // self closing tag case XML_SLASH: // self closing tag
xml_read_byte(xml); xml_read_byte(xml);
xml_read_byte(xml); xml_read_byte(xml);
break; break;
} }
break; break;
default: // text node default: // text node
*this = xml_new(0); *this = xml_new(0);
xml_skip(xml, XML_SPACE); // skip any whitespace xml_skip(xml, XML_SPACE); // skip any whitespace
feed_mask = XML_OPEN; feed_mask = XML_OPEN;
(*this)->nattrib=1; (*this)->nattrib=1;
(*this)->attrib = malloc(sizeof(char*)*2); (*this)->attrib = malloc(sizeof(char*)*2);
(*this)->attrib[1] = NULL; (*this)->attrib[1] = NULL;
tmp = (*this)->attrib[0] = xml_feed(xml, test_mask); tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
/* trim the whitespace off the end of text nodes, /* trim the whitespace off the end of text nodes,
* by overwriting the spaces will null termination. */ * by overwriting the spaces will null termination. */
toff = strlen(tmp)-1; toff = strlen(tmp)-1;
while( ( is_special(tmp[toff]) & XML_SPACE ) ) while( ( is_special(tmp[toff]) & XML_SPACE ) )
{ {
tmp[toff] = 0; tmp[toff] = 0;
toff --; toff --;
} }
break; break;
} }
this = &(*this)->next; this = &(*this)->next;
xml_skip(xml, XML_SPACE); // skip whitespace xml_skip(xml, XML_SPACE); // skip whitespace
} }
return ret; return ret;
} }
/* bootstrap the structures for xml_parse() to be able to get started */ /* bootstrap the structures for xml_parse() to be able to get started */
XmlNode* xml_load(const char * filename) XmlNode* xml_load(const char * filename)
{ {
struct XMLBUF xml; struct XMLBUF xml;
XmlNode *ret = NULL; XmlNode *ret = NULL;
// printf("xml_load(\"%s\");\n", filename); // printf("xml_load(\"%s\");\n", filename);
xml.eof = 0; xml.eof = 0;
xml.read_index = 0; xml.read_index = 0;
xml.fptr = fopen(filename, "rb"); xml.fptr = fopen(filename, "rb");
if(!xml.fptr) if(!xml.fptr)
return NULL; return NULL;
xml.buf = malloc(BUFFER+1); xml.buf = malloc(BUFFER+1);
xml.buf[BUFFER]=0; xml.buf[BUFFER]=0;
xml.len = BUFFER; xml.len = BUFFER;
if(!xml.buf) if(!xml.buf)
goto xml_load_fail_malloc_buf; goto xml_load_fail_malloc_buf;
xml_read_file(&xml); xml_read_file(&xml);
ret = xml_parse(&xml); ret = xml_parse(&xml);
free(xml.buf); free(xml.buf);
xml_load_fail_malloc_buf: xml_load_fail_malloc_buf:
fclose(xml.fptr); fclose(xml.fptr);
return ret; return ret;
} }
/* very basic function that will get you the first node with a given name */ /* very basic function that will get you the first node with a given name */
XmlNode * xml_find(XmlNode *xml, const char *name) XmlNode * xml_find(XmlNode *xml, const char *name)
{ {
XmlNode * ret; XmlNode * ret;
if(xml->name)if(!strcmp(xml->name, name))return xml; if(xml->name)if(!strcmp(xml->name, name))return xml;
if(xml->child) if(xml->child)
{ {
ret = xml_find(xml->child, name); ret = xml_find(xml->child, name);
if(ret)return ret; if(ret)return ret;
} }
if(xml->next) if(xml->next)
{ {
ret = xml_find(xml->next, name); ret = xml_find(xml->next, name);
if(ret)return ret; if(ret)return ret;
} }
return NULL; return NULL;
} }
/* very basic attribute lookup function */ /* very basic attribute lookup function */
char* xml_attr(XmlNode *x, const char *name) char* xml_attr(XmlNode *x, const char *name)
{ {
int i; int i;
for(i=0; i<x->nattrib; i++) for(i=0; i<x->nattrib; i++)
if(x->attrib[i*2]) if(x->attrib[i*2])
if(!strcmp(x->attrib[i*2], name)) if(!strcmp(x->attrib[i*2], name))
return x->attrib[i*2+1]; return x->attrib[i*2+1];
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment