[xml/sgml-pkgs] Bug#692456: libxml2: Schema validation problem with [0-9]{0, n}|TEXT

Junior Polegato linux at juniorpolegato.com.br
Tue Nov 6 12:37:18 UTC 2012


Package: libxml2
Version: 2.8.0+dfsg1-6
Severity: important

Dear Maintainer,
Hi!
I have had problems with schema validation pattern [0-9]{0,n}|TEXT.
I run the code below and you can see in output validation isn't work in cases
0, 1 and 2, but in cases 3 and 4 woks fine.
Why? What's wrong? In my mind, the patterns are same case.
Thanks,
Junior Polegato

-------------- Output --------------
0. [0-9]{0,4}|TEXT
0.0. "" => valid: True
0.1. "123" => valid: True
0.2. "12345" => valid: False
0.3. "TEXT" => valid: False
0.4. "TEST" => valid: False

1. TEXT|[0-9]{0,4}
1.0. "" => valid: False
1.1. "123" => valid: True
1.2. "12345" => valid: False
1.3. "TEXT" => valid: True
1.4. "TEST" => valid: False

2. [0-9]{0,4}|TEXT|
2.0. "" => valid: True
2.1. "123" => valid: True
2.2. "12345" => valid: False
2.3. "TEXT" => valid: False
2.4. "TEST" => valid: False

3. [0-9]{0,4}|TEXT|1234
3.0. "" => valid: True
3.1. "123" => valid: True
3.2. "12345" => valid: False
3.3. "TEXT" => valid: True
3.4. "TEST" => valid: False

4. TEXT|[0-9]{0,4}|
4.0. "" => valid: True
4.1. "123" => valid: True
4.2. "12345" => valid: False
4.3. "TEXT" => valid: True
4.4. "TEST" => valid: False

-------------- Code --------------
#include <string.h>
#include <libxml/parser.h>
#include <libxml/xmlschemas.h>

// gcc -Wall `xml2-config --cflags --libs` -o v v.c && ./v

#define DEBUG 0

void Validity(void * ctx, const char *fmt, ...) {
    va_list ap;
    va_start(ap, fmt);
    char *msg;
    msg = va_arg(ap, char*);
    while ((int)msg != -1) {
        printf(fmt, msg);
        msg = va_arg(ap, char*);
    }
    va_end(ap);
}

void NoValidity(void * ctx, const char *fmt, ...){}

void djosStructuredErrorFunc(void * userData, xmlErrorPtr error)
{
    printf("----------------------------------------\n");
    printf("domain      : %i\n", error->domain  );
    printf("code        : %i\n", error->code    );
    printf("message     : %s\n", error->message );
    printf("level       : %i\n", error->level   );
    printf("file        : %s\n", error->file    );
    printf("line        : %i\n", error->line    );
    printf("str1        : %s\n", error->str1    );
    printf("str2        : %s\n", error->str2    );
    printf("str3        : %s\n", error->str3    );
    printf("int1        : %i\n", error->int1    );
    printf("int2        : %i\n", error->int2    );
    printf("ctxt        : %p\n", error->ctxt    );
    printf("node        : %p\n", error->node    );
    printf("----------------------------------------\n");
}

int is_valid(const char *xml, const char *schema_xml)
{
    xmlDocPtr schema_doc = xmlReadMemory(schema_xml, strlen(schema_xml), NULL,
NULL, XML_PARSE_NONET);
    if (schema_doc == NULL) {
        /* the schema cannot be loaded or is not well-formed */
        return -1;
    }
    xmlDocPtr doc = xmlReadMemory(xml, strlen(xml), NULL, NULL, 0);
    if (doc == NULL) {
        /* the schema cannot be loaded or is not well-formed */
        return -1;
    }
    xmlSchemaParserCtxtPtr parser_ctxt = xmlSchemaNewDocParserCtxt(schema_doc);
    if (parser_ctxt == NULL) {
        /* unable to create a parser context for the schema */
        xmlFreeDoc(schema_doc);
        xmlFreeDoc(doc);
        return -2;
    }
    xmlSchemaPtr schema = xmlSchemaParse(parser_ctxt);
    if (schema == NULL) {
        /* the schema itself is not valid */
        xmlSchemaFreeParserCtxt(parser_ctxt);
        xmlFreeDoc(schema_doc);
        xmlFreeDoc(doc);
        return -3;
    }
    xmlSchemaValidCtxtPtr valid_ctxt = xmlSchemaNewValidCtxt(schema);
    if (valid_ctxt == NULL) {
        /* unable to create a validation context for the schema */
        xmlSchemaFree(schema);
        xmlSchemaFreeParserCtxt(parser_ctxt);
        xmlFreeDoc(schema_doc);
        xmlFreeDoc(doc);
        return -4;
    }
    if (DEBUG == 0)
        xmlSchemaSetValidErrors(valid_ctxt, NoValidity, NoValidity, NULL);
    else if (DEBUG == 1)
        xmlSchemaSetValidErrors(valid_ctxt, Validity, Validity, NULL);
    else if (DEBUG == 2){
        xmlSchemaSetValidErrors(valid_ctxt, NoValidity, NoValidity, NULL);
        xmlSchemaSetValidStructuredErrors(valid_ctxt, djosStructuredErrorFunc,
parser_ctxt);
    }
    int is_valid = (xmlSchemaValidateDoc(valid_ctxt, doc) == 0);
    xmlSchemaFreeValidCtxt(valid_ctxt);
    xmlSchemaFree(schema);
    xmlSchemaFreeParserCtxt(parser_ctxt);
    xmlFreeDoc(schema_doc);
    xmlFreeDoc(doc);
    /* force the return value to be non-negative on success */
    return is_valid ? 1 : 0;
}

int main(int argc, char* argv[]){
    int i, j;
    const char patterns[5][30] = {"[0-9]{0,4}|TEXT", "TEXT|[0-9]{0,4}",
"[0-9]{0,4}|TEXT|", "[0-9]{0,4}|TEXT|1234", "TEXT|[0-9]{0,4}|"};
    const char texts[5][10] = {"", "123", "12345", "TEXT", "TEST"};
    const char bool[2][6] = {"False", "True"};
    char xml[100];
    char schema_xml[1024];

    for (i=0; i< 5; i++){
        strcpy(schema_xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
                     "    <xs:schema
xmlns:xs=\"http://www.w3.org/2001/XMLSchema\">"
                     "        <xs:simpleType name=\"TTest\">"
                     "            <xs:restriction base=\"xs:string\">"
                     "                <xs:whiteSpace value=\"preserve\"/>"
                     "                <xs:pattern value=\"");
        strcat(schema_xml, patterns[i]);
        strcat(schema_xml, "\"/>"
                     "            </xs:restriction>"
                     "        </xs:simpleType>"
                     "        <xs:element name=\"Test\" type=\"TTest\"/>"
                     "    </xs:schema>");

        printf("\n%i. %s\n", i, patterns[i]);
        for (j=0; j< 5; j++){
            strcpy(xml, "<?xml version=\"1.0\" encoding=\"utf-8\"?><Test>");
            strcat(xml, texts[j]);
            strcat(xml, "</Test>");
            printf("%i.%i. \"%s\" => valid: %s\n", i, j, texts[j],
bool[is_valid(xml, schema_xml)]);
        }
    }
    return 0;
}



-- System Information:
Debian Release: wheezy/sid
  APT prefers testing
  APT policy: (900, 'testing'), (65, 'unstable'), (60, 'stable')
Architecture: i386 (i686)

Kernel: Linux 3.2.0-3-686-pae (SMP w/2 CPU cores)
Locale: LANG=pt_BR.UTF-8, LC_CTYPE=pt_BR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash

Versions of packages libxml2 depends on:
ii  libc6              2.13-35
ii  liblzma5           5.1.1alpha+20120614-1
ii  multiarch-support  2.13-35
ii  zlib1g             1:1.2.7.dfsg-13

Versions of packages libxml2 recommends:
ii  xml-core  0.13+nmu2

libxml2 suggests no packages.

-- no debconf information



More information about the debian-xml-sgml-pkgs mailing list