00001 #include <eggdrop/eggdrop.h>
00002 #include <errno.h>
00003
00004 static const char *name_terminators = "= \t\n\r?/>";
00005
00006 int xml_parse_children(xml_node_t *parent, char **bufptr);
00007
00008 static xml_entity_t entities[] = {
00009 {"lt", "<"},
00010 {"gt", ">"},
00011 {"amp", "&"},
00012 {"apos", "'"},
00013 {"quot", "\""}
00014 };
00015 static int nentities = 5;
00016
00017 static int skip_space(char **bufptr)
00018 {
00019 char *start, *ptr;
00020
00021 start = ptr = *bufptr;
00022 while (isspace(*ptr)) ptr++;
00023 *bufptr = ptr;
00024 return(ptr - start);
00025 }
00026
00027 static void copy_text(char **ptr, int *ptrlen, int *ptrmax, char *str, int len)
00028 {
00029 if (*ptrlen + len + 1 > *ptrmax) {
00030 *ptrmax = *ptrlen + len + 64;
00031 *ptr = realloc(*ptr, *ptrmax);
00032 }
00033 memcpy(*ptr + *ptrlen, str, len);
00034 *ptrlen += len;
00035 }
00036
00037 char *xml_entity_lookup(const char *name)
00038 {
00039 int i, num;
00040 static char numcode[2] = {0, 0};
00041
00042
00043 if (*name == '#') {
00044 name++;
00045 if (*name == 'x') num = strtol(name+1, NULL, 16);
00046 else num = strtol(name, NULL, 10);
00047 numcode[0] = num;
00048 return(numcode);
00049 }
00050
00051 for (i = 0; i < nentities; i++) {
00052 if (!strcasecmp(entities[i].name, name)) return(entities[i].value);
00053 }
00054 return(NULL);
00055 }
00056
00057 int xml_decode_text(char *text, int len, char **outtext, int *outlen)
00058 {
00059 char *amp, *colon, *entity;
00060 int outmax, amplen;
00061
00062 *outlen = 0;
00063 outmax = len+1;
00064 *outtext = malloc(outmax);
00065
00066 len -= skip_space(&text);
00067
00068 while ((amp = memchr(text, '&', len))) {
00069 amplen = amp - text;
00070 colon = memchr(amp, ';', len - amplen);
00071 if (!colon) break;
00072 copy_text(outtext, outlen, &outmax, text, amplen);
00073 len -= (colon+1 - text);
00074 text = colon+1;
00075 *colon = 0;
00076 entity = xml_entity_lookup(amp+1);
00077 if (entity) copy_text(outtext, outlen, &outmax, entity, strlen(entity));
00078 }
00079 while (len > 0 && isspace(text[len-1])) len--;
00080 copy_text(outtext, outlen, &outmax, text, len);
00081 return(0);
00082 }
00083
00084 static void append_text(xml_node_t *node, char *text, int len, int decode)
00085 {
00086 char *finaltext;
00087 int finallen;
00088
00089 if (decode) xml_decode_text(text, len, &finaltext, &finallen);
00090 else {
00091 finaltext = text;
00092 finallen = len;
00093 }
00094
00095 node->text = realloc(node->text, node->len + finallen + 1);
00096 memcpy(node->text + node->len, finaltext, finallen);
00097 node->len += finallen;
00098 node->text[node->len] = 0;
00099
00100 if (finaltext != text) free(finaltext);
00101 }
00102
00103 static char *read_name(char **bufptr)
00104 {
00105 int n;
00106 char *name;
00107
00108 n = strcspn(*bufptr, name_terminators);
00109 if (!n) return NULL;
00110 name = malloc(n+1);
00111 memcpy(name, *bufptr, n);
00112 name[n] = 0;
00113 *bufptr += n;
00114 return(name);
00115 }
00116
00117 static char *read_value(char **bufptr)
00118 {
00119 const char *term;
00120 char *value;
00121 int n;
00122
00123 if (**bufptr == '\'') term = "'";
00124 else if (**bufptr == '"') term = "\"";
00125 else {
00126 term = name_terminators;
00127 (*bufptr)--;
00128 }
00129
00130
00131 (*bufptr)++;
00132 n = strcspn(*bufptr, term);
00133 value = malloc(n+1);
00134 memcpy(value, *bufptr, n);
00135 value[n] = 0;
00136
00137 if (term != name_terminators) n++;
00138 *bufptr += n;
00139 return(value);
00140 }
00141
00142 static void read_attributes(xml_node_t *node, char **bufptr)
00143 {
00144 xml_attr_t *attr;
00145 char *name, *value;
00146
00147 while (1) {
00148 skip_space(bufptr);
00149 name = read_name(bufptr);
00150 if (!name || **bufptr != '=') {
00151 if (name) free(name);
00152 return;
00153 }
00154 (*bufptr)++;
00155 value = read_value(bufptr);
00156 attr = xml_attr_new(name, value);
00157 xml_node_append_attr(node, attr);
00158 }
00159 }
00160
00161
00162 int xml_parse_node(xml_node_t *parent, char **bufptr)
00163 {
00164 xml_node_t *node;
00165 char *ptr, *buf = *bufptr;
00166
00167 if (*buf++ != '<') return(-1);
00168 skip_space(&buf);
00169
00170
00171 if (*buf == '/') {
00172 char *name;
00173
00174 if (!parent->name) return(-1);
00175
00176 buf++;
00177 name = read_name(&buf);
00178 if (strcasecmp(name, parent->name)) {
00179 free(name);
00180 xml_set_error("closing tag doesn't match opening tag");
00181 return(-1);
00182 }
00183 free(name);
00184
00185 ptr = strchr(buf, '>');
00186 if (!ptr) return(-1);
00187 *bufptr = ptr+1;
00188 return(0);
00189 }
00190
00191
00192 if (!strncmp(buf, "!--", 3)) {
00193
00194 buf += 3;
00195 ptr = strstr(buf, "-->");
00196 if (!ptr) {
00197 xml_set_error("comment has no end");
00198 return(-1);
00199 }
00200
00201 node = xml_node_new();
00202 node->type = XML_COMMENT;
00203 append_text(node, buf, ptr-buf, 1);
00204 xml_node_append(parent, node);
00205 *bufptr = ptr+3;
00206 return(1);
00207 }
00208 else if (!strncasecmp(buf, "![CDATA[", 8)) {
00209
00210 buf += 7;
00211 ptr = strstr(buf, "]]>");
00212 if (!ptr) {
00213 xml_set_error("CDATA has no end");
00214 return(-1);
00215 }
00216
00217 append_text(parent, buf, ptr-buf, 0);
00218 *bufptr = ptr+3;
00219 return(1);
00220 }
00221 else if (!strncasecmp(buf, "!DOCTYPE", 8)) {
00222
00223 buf += 8;
00224 ptr = strchr(buf, '>');
00225 if (!ptr) return(-1);
00226 *bufptr = ptr+1;
00227 return(1);
00228 }
00229 else if (*buf == '?') {
00230
00231 buf++;
00232 skip_space(&buf);
00233 node = xml_node_new();
00234 node->type = XML_PROCESSING_INSTRUCTION;
00235 }
00236 else {
00237
00238 node = xml_node_new();
00239 node->type = XML_ELEMENT;
00240 }
00241
00242 node->name = read_name(&buf);
00243 read_attributes(node, &buf);
00244 if (node->type == XML_PROCESSING_INSTRUCTION) {
00245 if (strncmp(buf, "?>", 2)) {
00246 xml_node_free(node);
00247 xml_set_error("invalid processing instruction");
00248 return(-1);
00249 }
00250 xml_node_append(parent, node);
00251 *bufptr = buf+2;
00252 return(1);
00253 }
00254
00255
00256 if (!strncmp(buf, "/>", 2)) {
00257 xml_node_append(parent, node);
00258 *bufptr = buf+2;
00259 return(1);
00260 }
00261 else if (*buf != '>') {
00262 xml_set_error("invalid tag");
00263 return(-1);
00264 }
00265
00266
00267 buf++;
00268 if (xml_parse_children(node, &buf) != -1) {
00269 *bufptr = buf;
00270 xml_node_append(parent, node);
00271 return(1);
00272 }
00273
00274
00275 xml_node_free(node);
00276 return(-1);
00277 }
00278
00279
00280 int xml_parse_children(xml_node_t *parent, char **bufptr)
00281 {
00282 char *ptr, *buf = *bufptr;
00283 int ret;
00284
00285 do {
00286 ptr = strchr(buf, '<');
00287 if (!ptr) {
00288 append_text(parent, buf, strlen(buf), 1);
00289 return(0);
00290 }
00291
00292
00293 append_text(parent, buf, ptr - buf, 1);
00294 buf = ptr;
00295
00296 ret = xml_parse_node(parent, &buf);
00297 *bufptr = buf;
00298 } while (ret == 1);
00299 return(ret);
00300 }
00301
00302 xml_node_t *xml_parse_file(const char *fname)
00303 {
00304 FILE *fp;
00305 xml_node_t *root;
00306 char *buf, *ptr;
00307 int len;
00308
00309 fp = fopen(fname, "r");
00310 if (!fp) {
00311 xml_set_error(strerror(errno));
00312 return(NULL);
00313 }
00314 fseek(fp, 0l, SEEK_END);
00315 len = ftell(fp);
00316 fseek(fp, 0l, SEEK_SET);
00317 buf = malloc(len+1);
00318 if (!buf) {
00319 fclose(fp);
00320 xml_set_error("out of memory");
00321 return(NULL);
00322 }
00323 len = fread(buf, 1, len, fp);
00324 buf[len] = '\0';
00325 fclose(fp);
00326 root = xml_node_new();
00327 root->type = XML_DOCUMENT;
00328 ptr = buf;
00329 xml_parse_children(root, &ptr);
00330 free(buf);
00331 return xml_root_element(root);
00332 }