[xml/sgml-pkgs] Bug#692456: libxml2: Schema validation problem with [0-9]{0, n}|TEXT
Junior Polegato
linux at juniorpolegato.com.br
Tue Nov 6 12:37:18 UTC 2012
Package: libxml2
Version: 2.8.0+dfsg1-6
Severity: important
Dear Maintainer,
Hi!
I have had problems with schema validation pattern [0-9]{0,n}|TEXT.
I run the code below and you can see in output validation isn't work in cases
0, 1 and 2, but in cases 3 and 4 woks fine.
Why? What's wrong? In my mind, the patterns are same case.
Thanks,
Junior Polegato
-------------- Output --------------
0. [0-9]{0,4}|TEXT
0.0. "" => valid: True
0.1. "123" => valid: True
0.2. "12345" => valid: False
0.3. "TEXT" => valid: False
0.4. "TEST" => valid: False
1. TEXT|[0-9]{0,4}
1.0. "" => valid: False
1.1. "123" => valid: True
1.2. "12345" => valid: False
1.3. "TEXT" => valid: True
1.4. "TEST" => valid: False
2. [0-9]{0,4}|TEXT|
2.0. "" => valid: True
2.1. "123" => valid: True
2.2. "12345" => valid: False
2.3. "TEXT" => valid: False
2.4. "TEST" => valid: False
3. [0-9]{0,4}|TEXT|1234
3.0. "" => valid: True
3.1. "123" => valid: True
3.2. "12345" => valid: False
3.3. "TEXT" => valid: True
3.4. "TEST" => valid: False
4. TEXT|[0-9]{0,4}|
4.0. "" => valid: True
4.1. "123" => valid: True
4.2. "12345" => valid: False
4.3. "TEXT" => valid: True
4.4. "TEST" => valid: False
-------------- Code --------------
#include <string.h>
#include <libxml/parser.h>
#include <libxml/xmlschemas.h>
// gcc -Wall `xml2-config --cflags --libs` -o v v.c && ./v
#define DEBUG 0
void Validity(void * ctx, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
char *msg;
msg = va_arg(ap, char*);
while ((int)msg != -1) {
printf(fmt, msg);
msg = va_arg(ap, char*);
}
va_end(ap);
}
void NoValidity(void * ctx, const char *fmt, ...){}
void djosStructuredErrorFunc(void * userData, xmlErrorPtr error)
{
printf("----------------------------------------\n");
printf("domain : %i\n", error->domain );
printf("code : %i\n", error->code );
printf("message : %s\n", error->message );
printf("level : %i\n", error->level );
printf("file : %s\n", error->file );
printf("line : %i\n", error->line );
printf("str1 : %s\n", error->str1 );
printf("str2 : %s\n", error->str2 );
printf("str3 : %s\n", error->str3 );
printf("int1 : %i\n", error->int1 );
printf("int2 : %i\n", error->int2 );
printf("ctxt : %p\n", error->ctxt );
printf("node : %p\n", error->node );
printf("----------------------------------------\n");
}
int is_valid(const char *xml, const char *schema_xml)
{
xmlDocPtr schema_doc = xmlReadMemory(schema_xml, strlen(schema_xml), NULL,
NULL, XML_PARSE_NONET);
if (schema_doc == NULL) {
/* the schema cannot be loaded or is not well-formed */
return -1;
}
xmlDocPtr doc = xmlReadMemory(xml, strlen(xml), NULL, NULL, 0);
if (doc == NULL) {
/* the schema cannot be loaded or is not well-formed */
return -1;
}
xmlSchemaParserCtxtPtr parser_ctxt = xmlSchemaNewDocParserCtxt(schema_doc);
if (parser_ctxt == NULL) {
/* unable to create a parser context for the schema */
xmlFreeDoc(schema_doc);
xmlFreeDoc(doc);
return -2;
}
xmlSchemaPtr schema = xmlSchemaParse(parser_ctxt);
if (schema == NULL) {
/* the schema itself is not valid */
xmlSchemaFreeParserCtxt(parser_ctxt);
xmlFreeDoc(schema_doc);
xmlFreeDoc(doc);
return -3;
}
xmlSchemaValidCtxtPtr valid_ctxt = xmlSchemaNewValidCtxt(schema);
if (valid_ctxt == NULL) {
/* unable to create a validation context for the schema */
xmlSchemaFree(schema);
xmlSchemaFreeParserCtxt(parser_ctxt);
xmlFreeDoc(schema_doc);
xmlFreeDoc(doc);
return -4;
}
if (DEBUG == 0)
xmlSchemaSetValidErrors(valid_ctxt, NoValidity, NoValidity, NULL);
else if (DEBUG == 1)
xmlSchemaSetValidErrors(valid_ctxt, Validity, Validity, NULL);
else if (DEBUG == 2){
xmlSchemaSetValidErrors(valid_ctxt, NoValidity, NoValidity, NULL);
xmlSchemaSetValidStructuredErrors(valid_ctxt, djosStructuredErrorFunc,
parser_ctxt);
}
int is_valid = (xmlSchemaValidateDoc(valid_ctxt, doc) == 0);
xmlSchemaFreeValidCtxt(valid_ctxt);
xmlSchemaFree(schema);
xmlSchemaFreeParserCtxt(parser_ctxt);
xmlFreeDoc(schema_doc);
xmlFreeDoc(doc);
/* force the return value to be non-negative on success */
return is_valid ? 1 : 0;
}
int main(int argc, char* argv[]){
int i, j;
const char patterns[5][30] = {"[0-9]{0,4}|TEXT", "TEXT|[0-9]{0,4}",
"[0-9]{0,4}|TEXT|", "[0-9]{0,4}|TEXT|1234", "TEXT|[0-9]{0,4}|"};
const char texts[5][10] = {"", "123", "12345", "TEXT", "TEST"};
const char bool[2][6] = {"False", "True"};
char xml[100];
char schema_xml[1024];
for (i=0; i< 5; i++){
strcpy(schema_xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
" <xs:schema
xmlns:xs=\"http://www.w3.org/2001/XMLSchema\">"
" <xs:simpleType name=\"TTest\">"
" <xs:restriction base=\"xs:string\">"
" <xs:whiteSpace value=\"preserve\"/>"
" <xs:pattern value=\"");
strcat(schema_xml, patterns[i]);
strcat(schema_xml, "\"/>"
" </xs:restriction>"
" </xs:simpleType>"
" <xs:element name=\"Test\" type=\"TTest\"/>"
" </xs:schema>");
printf("\n%i. %s\n", i, patterns[i]);
for (j=0; j< 5; j++){
strcpy(xml, "<?xml version=\"1.0\" encoding=\"utf-8\"?><Test>");
strcat(xml, texts[j]);
strcat(xml, "</Test>");
printf("%i.%i. \"%s\" => valid: %s\n", i, j, texts[j],
bool[is_valid(xml, schema_xml)]);
}
}
return 0;
}
-- System Information:
Debian Release: wheezy/sid
APT prefers testing
APT policy: (900, 'testing'), (65, 'unstable'), (60, 'stable')
Architecture: i386 (i686)
Kernel: Linux 3.2.0-3-686-pae (SMP w/2 CPU cores)
Locale: LANG=pt_BR.UTF-8, LC_CTYPE=pt_BR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
Versions of packages libxml2 depends on:
ii libc6 2.13-35
ii liblzma5 5.1.1alpha+20120614-1
ii multiarch-support 2.13-35
ii zlib1g 1:1.2.7.dfsg-13
Versions of packages libxml2 recommends:
ii xml-core 0.13+nmu2
libxml2 suggests no packages.
-- no debconf information
More information about the debian-xml-sgml-pkgs
mailing list