diff options
Diffstat (limited to 'libgcobol/xmlparse.cc')
| -rw-r--r-- | libgcobol/xmlparse.cc | 593 |
1 files changed, 390 insertions, 203 deletions
diff --git a/libgcobol/xmlparse.cc b/libgcobol/xmlparse.cc index 69849e3..54b9f02 100644 --- a/libgcobol/xmlparse.cc +++ b/libgcobol/xmlparse.cc @@ -46,6 +46,8 @@ #include <libxml/SAX2.h> #include <libxml/parser.h> +#include <syslog.h> + #include "config.h" #include "libgcobol-fp.h" #include "ec.h" @@ -76,118 +78,120 @@ void sayso( const char func[], int line, #define SAYSO_DATAZ(S) sayso(__func__, __LINE__, -1, S) #define SAYSO_DATA(N, S) sayso(__func__, __LINE__, N, S) +#define CTX ctx __attribute__ ((unused)) + struct xml_ec_value_t { int ibm_code; const char msg[80]; -} xml_ec_values[] = { +} xml_ec_values[] = { // Table 73. XML PARSE exceptions that allow continuation - { 1, "invalid character between elements" }, - { 2, "invalid start before element content" }, - { 3, "duplicate attribute" }, - { 4, "markup character '<' in an attribute value" }, - { 5, "start/end tag mismatch" }, - { 6, "invalid character in element" }, - { 7, "invalid start in element content. " }, - { 8, "CDATA closing character sequence ']]>' not opened" }, - { 10, "comment the character sequence '--' without '>'" }, - { 11, "invalid character in a processing instruction" }, - { 12, "XML declaration was not start of document" }, - { 13, "invalid digit in a hexadecimal character reference" }, - { 14, "invalid digit in a decimal character reference" }, - { 15, "encoding declaration value name must start with [a-zA-Z] character" }, - { 16, "character reference did not refer to a legal XML character" }, - { 17, "invalid character in an entity reference name" }, - { 70, "EBCDIC document, supported EBCDIC page, unsupported declaration" }, - { 71, "EBCDIC document, unsupported EBCDIC page " }, - { 72, "EBCDIC document, unsupported EBCDIC page, unsupported declaration" }, - { 73, "EBCDIC document, unsupported EBCDIC page and declaration " }, - { 80, "ASCII document, supported ASCII page, unsupported declaration" }, - { 81, "ASCII document, unsupported ASCII page " }, - { 82, "ASCII document, unsupported ASCII page, unsupported declaration" }, - { 83, "ASCII document, unsupported ASCII page and declaration " }, - { 84, "ASCII document, invalid UTF-8, external UTF-8, no declaration. " }, - { 85, "ASCII document, invalid UTF-8, external UTF-8, invalid declaration" }, - { 86, "ASCII document, invalid UTF-8, external ASCII" }, - { 87, "ASCII document, invalid UTF-8, external and document UTF-8" }, - { 88, "ASCII document, invalid UTF-8, unsupported ASCII/UTF-8, UTF-8 declaration" }, - { 89, "ASCII document, invalid UTF-8, external UTF-8, ASCII declaration" }, - { 92, "alphanumeric document expected, document is UTF-16. " }, - - // XML PARSE exceptions that allow continuation (continued) + { 1, "invalid character between elements" }, + { 2, "invalid start before element content" }, + { 3, "duplicate attribute" }, + { 4, "markup character '<' in an attribute value" }, + { 5, "start/end tag mismatch" }, + { 6, "invalid character in element" }, + { 7, "invalid start in element content. " }, + { 8, "CDATA closing character sequence ']]>' not opened" }, + { 10, "comment the character sequence '--' without '>'" }, + { 11, "invalid character in a processing instruction" }, + { 12, "XML declaration was not start of document" }, + { 13, "invalid digit in a hexadecimal character reference" }, + { 14, "invalid digit in a decimal character reference" }, + { 15, "encoding declaration value name must start with [a-zA-Z] character" }, + { 16, "character reference did not refer to a legal XML character" }, + { 17, "invalid character in an entity reference name" }, + { 70, "EBCDIC document, supported EBCDIC page, unsupported declaration" }, + { 71, "EBCDIC document, unsupported EBCDIC page " }, + { 72, "EBCDIC document, unsupported EBCDIC page, unsupported declaration" }, + { 73, "EBCDIC document, unsupported EBCDIC page and declaration " }, + { 80, "ASCII document, supported ASCII page, unsupported declaration" }, + { 81, "ASCII document, unsupported ASCII page " }, + { 82, "ASCII document, unsupported ASCII page, unsupported declaration" }, + { 83, "ASCII document, unsupported ASCII page and declaration " }, + { 84, "ASCII document, invalid UTF-8, external UTF-8, no declaration. " }, + { 85, "ASCII document, invalid UTF-8, external UTF-8, invalid declaration" }, + { 86, "ASCII document, invalid UTF-8, external ASCII" }, + { 87, "ASCII document, invalid UTF-8, external and document UTF-8" }, + { 88, "ASCII document, invalid UTF-8, unsupported ASCII/UTF-8, UTF-8 declaration" }, + { 89, "ASCII document, invalid UTF-8, external UTF-8, ASCII declaration" }, + { 92, "alphanumeric document expected, document is UTF-16. " }, + + // XML PARSE exceptions that allow continuation (continued) //// 100,001 - 165,535 EBCDIC document encoding does not match code page //// 200,001 - 265,535 ASCII document encoding does not match code page - // XML PARSE exceptions that do not allow continuation - { 100, "end of document before start of XML declaration" }, - { 101, "end of document before end of XML declaration" }, - { 102, "end of document before root element" }, - { 103, "end of document before version information in XML declaration" }, - { 104, "end of document before version information value in XML declaration" }, - { 106, "end of document before encoding declaration value in XML declaration" }, - { 108, "end of document before standalone declaration value in XML declaration" }, - { 109, "end of document before attribute name" }, - { 110, "end of document before attribute value" }, - { 111, "end of document before character/entity reference in attribute value" }, - { 112, "end of document before empty element tag" }, - { 113, "end of document before root element name" }, - { 114, "end of document before element name" }, - { 115, "end of document before character data in element content" }, - { 116, "end of document before processing instruction in element content" }, - { 117, "end of document before comment or CDATA section in element content" }, - { 118, "end of document before comment in element content" }, - { 119, "end of document before CDATA section in element content" }, - { 120, "end of document before character/entity reference in element content" }, - { 121, "end of document before after close of root element" }, - { 122, "possible invalid start of a document type" }, - { 123, "duplicate document type" }, - { 124, "root element name must start with [A-Za-z_:]" }, - { 125, "first attribute name must start with [A-Za-z_:]" }, - { 126, "invalid character in or after element name" }, - { 127, "attribute name not followed by '=' " }, - { 128, "invalid attribute value delimiter" }, - { 130, "attribute name must start with [A-Za-z_:]" }, - { 131, "invalid character in or after attribute name" }, - { 132, "empty element tag not terminated with '/>'" }, - { 133, "element end tag name name must start with [A-Za-z_:]" }, - { 134, "element end tag not terminated with '>'" }, - { 135, "element name must start with [A-Za-z_:]" }, - { 136, "invalid start of comment/CDATA in element" }, - { 137, "invalid start of comment" }, - { 138, "processing instruction target name must start with [A-Za-z_:]" }, - { 139, "invalid character in/afterprocessing instruction target name" }, - { 140, "processing instruction not terminated with '?>'" }, - { 141, "invalid character following '&' in a character/entity reference" }, - { 142, "missing version information in XML declaration" }, - { 143, "missing '=' after 'version' in XML declaration " }, - { 144, "missing XML version declaration " }, - { 145, "invalid character in XML version information" }, - { 146, "invalid character following XML version information value " }, - { 147, "invalid attribute in XML declaration" }, - { 148, "missing '=' after 'encoding' in XML declaration" }, - { 149, "missing XML encoding declaration value" }, - { 150, "invalid XML encoding declaration value" }, - { 151, "invalid character afer XML declaration" }, - { 152, "invalid attribute XML declaration" }, - { 153, "missing '=' after standalone XML declaration" }, - { 154, "missing standalone XML declaration value" }, - { 155, "standalone declaration must be 'yes' or 'no'" }, - { 156, "invalid standalone XML declaration value" }, - { 157, "invalid character following XML standalone declaration value" }, - { 158, "unterminated XML declaration " }, - { 159, "start of document type declaration after end of root element" }, - { 160, "start of element after end of root element" }, - { 161, "invalid UTF-8 byte sequence" }, - { 162, "UTF-8 character that has a Unicode code point above x'FFFF'" }, - { 315, "UTF-16 document little-endian unsupported" }, - { 316, "UCS4 document unsupported" }, - { 317, "unrecognized document encoding" }, - { 318, "UTF-8 document unsupported " }, - { 320, "mismatched national document data item to document encoding EBCDIC" }, - { 321, "mismatched national document data item to document encoding ASCII" }, - { 322, "mismatched native alphanumeric document data item to document encoding EBCDIC" }, - { 323, "mismatched host alphanumeric document data item to document encoding ASCII" }, - { 324, "mismatched national document data item to document encoding UTF-8" }, - { 325, "mismatched host alphanumeric document datat to document encoding UTF-8" }, + // XML PARSE exceptions that do not allow continuation + { 100, "end of document before start of XML declaration" }, + { 101, "end of document before end of XML declaration" }, + { 102, "end of document before root element" }, + { 103, "end of document before version information in XML declaration" }, + { 104, "end of document before version information value in XML declaration" }, + { 106, "end of document before encoding declaration value in XML declaration" }, + { 108, "end of document before standalone declaration value in XML declaration" }, + { 109, "end of document before attribute name" }, + { 110, "end of document before attribute value" }, + { 111, "end of document before character/entity reference in attribute value" }, + { 112, "end of document before empty element tag" }, + { 113, "end of document before root element name" }, + { 114, "end of document before element name" }, + { 115, "end of document before character data in element content" }, + { 116, "end of document before processing instruction in element content" }, + { 117, "end of document before comment or CDATA section in element content" }, + { 118, "end of document before comment in element content" }, + { 119, "end of document before CDATA section in element content" }, + { 120, "end of document before character/entity reference in element content" }, + { 121, "end of document before after close of root element" }, + { 122, "possible invalid start of a document type" }, + { 123, "duplicate document type" }, + { 124, "root element name must start with [A-Za-z_:]" }, + { 125, "first attribute name must start with [A-Za-z_:]" }, + { 126, "invalid character in or after element name" }, + { 127, "attribute name not followed by '=' " }, + { 128, "invalid attribute value delimiter" }, + { 130, "attribute name must start with [A-Za-z_:]" }, + { 131, "invalid character in or after attribute name" }, + { 132, "empty element tag not terminated with '/>'" }, + { 133, "element end tag name name must start with [A-Za-z_:]" }, + { 134, "element end tag not terminated with '>'" }, + { 135, "element name must start with [A-Za-z_:]" }, + { 136, "invalid start of comment/CDATA in element" }, + { 137, "invalid start of comment" }, + { 138, "processing instruction target name must start with [A-Za-z_:]" }, + { 139, "invalid character in/afterprocessing instruction target name" }, + { 140, "processing instruction not terminated with '?>'" }, + { 141, "invalid character following '&' in a character/entity reference" }, + { 142, "missing version information in XML declaration" }, + { 143, "missing '=' after 'version' in XML declaration " }, + { 144, "missing XML version declaration " }, + { 145, "invalid character in XML version information" }, + { 146, "invalid character following XML version information value " }, + { 147, "invalid attribute in XML declaration" }, + { 148, "missing '=' after 'encoding' in XML declaration" }, + { 149, "missing XML encoding declaration value" }, + { 150, "invalid XML encoding declaration value" }, + { 151, "invalid character afer XML declaration" }, + { 152, "invalid attribute XML declaration" }, + { 153, "missing '=' after standalone XML declaration" }, + { 154, "missing standalone XML declaration value" }, + { 155, "standalone declaration must be 'yes' or 'no'" }, + { 156, "invalid standalone XML declaration value" }, + { 157, "invalid character following XML standalone declaration value" }, + { 158, "unterminated XML declaration " }, + { 159, "start of document type declaration after end of root element" }, + { 160, "start of element after end of root element" }, + { 161, "invalid UTF-8 byte sequence" }, + { 162, "UTF-8 character that has a Unicode code point above x'FFFF'" }, + { 315, "UTF-16 document little-endian unsupported" }, + { 316, "UCS4 document unsupported" }, + { 317, "unrecognized document encoding" }, + { 318, "UTF-8 document unsupported " }, + { 320, "mismatched national document data item to document encoding EBCDIC" }, + { 321, "mismatched national document data item to document encoding ASCII" }, + { 322, "mismatched native alphanumeric document data item to document encoding EBCDIC" }, + { 323, "mismatched host alphanumeric document data item to document encoding ASCII" }, + { 324, "mismatched national document data item to document encoding UTF-8" }, + { 325, "mismatched host alphanumeric document datat to document encoding UTF-8" }, { 500, "internal error" }, }, *eoxml_ec_values = xml_ec_values + COUNT_OF(xml_ec_values); @@ -240,17 +244,17 @@ ATTRIBUTE-CHARACTER The single character that corresponds with the predefined en ATTRIBUTE-CHARACTERS The value within quotation marks or apostrophes. This can be a substring of the attribute value if the value includes an entity reference. ATTRIBUTE-NAME The attribute name; the string to the left of the equal sign ATTRIBUTE-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference. - + CONTENT-CHARACTER The single character that corresponds with the predefined entity reference in the element content - + CONTENT-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.1 DOCUMENT-TYPE-DECLARATION The entire document type declaration, including the opening and closing character sequences "<!DOCTYPE" and ">" ENCODING-DECLARATION The value, between quotes or apostrophes, of the encoding declaration in the XML declaration END-OF-CDATA-SECTION The string "]]>" END-OF-DOCUMENT Empty with length zero - + EXCEPTION The part of the document that was successfully scanned, up to and including the point at which the exception was detected.2 Special register XML-CODE contains the unique error code that identifies the exception. - + PROCESSING-INSTRUCTION-TARGET The processing instruction target name, which occurs immediately after the processing instruction opening sequence, "<?" STANDALONE-DECLARATION The value, between quotation marks or apostrophes ("yes" or "no"), of the stand-alone declaration in the XML declaration START-OF-CDATA-SECTION The string "<![CDATA[" @@ -276,7 +280,7 @@ xml_event( const char event_name[], size_t len, char text[] ) { auto p = std::copy( event_name, event_name + strlen(event_name), PTRCAST(char, __ggsr__xml_event.data) ); std::fill(PTRCAST(unsigned char, p), pend, 0x20); - + __ggsr__xml_text.data = reinterpret_cast<unsigned char*>(text); __ggsr__xml_text.capacity = __ggsr__xml_text.allocated = len; __ggsr__xml_code.data = 0; @@ -300,70 +304,80 @@ xml_event( const char event_name[], const xmlChar * value ) { xml_event(event_name, strlen(text), text); } -static void attributeDecl(void * ctx, - const xmlChar * elem, - const xmlChar * fullname, - int type, - int def, - const xmlChar * defaultValue, - xmlEnumerationPtr tree) +/* + * Many static handler functions are defined but not used while we learn what + * is needed. + */ +#pragma GCC diagnostic ignored "-Wunused-function" + +static void attributeDecl(void * CTX, + const xmlChar * elem, + const xmlChar * fullname, + int type __attribute__ ((unused)), + int def __attribute__ ((unused)), + const xmlChar * defaultValue, + xmlEnumerationPtr tree __attribute__ ((unused)) ) { fprintf(stderr, "%s:%d: elem=%s, name=%s, default=%s\n", __func__, __LINE__, elem, fullname, defaultValue); } -static void cdataBlock(void * ctx, - const xmlChar * data, +static void cdataBlock(void * CTX, + const xmlChar * data, int len) -{ +{ SAYSO_DATA(len, data); xml_event("CONTENT-CHARACTERS", len, data); } -static void characters(void * ctx, - const xmlChar * data, +static void characters(void * CTX, + const xmlChar * data, int len) { SAYSO_DATA(len, data); xml_event("CONTENT-CHARACTERS", len, data); } -static void comment(void * ctx, const xmlChar * value) { +static void comment(void * CTX, const xmlChar * value) { SAYSO_DATAZ(value); xml_event("COMMENT", value); } -static void elementDecl(void * ctx, - const xmlChar * name, - int type, - xmlElementContentPtr content) +static void elementDecl(void * CTX, + const xmlChar * name, + int type __attribute__ ((unused)), + xmlElementContentPtr content __attribute__ ((unused)) ) { SAYSO_DATAZ(name); } -static void endDocument(void * ctx) +static void endDocument(void * CTX) { SAYSO(); } -static void endElementNs(void * ctx, - const xmlChar * localname, - const xmlChar * prefix, - const xmlChar * URI) +static void endElementNs(void * CTX, + const xmlChar * localname, + const xmlChar * prefix, + const xmlChar * URI __attribute__ ((unused)) ) { + SAYSO_DATAZ(prefix); SAYSO_DATAZ(localname); xml_event("END-OF-ELEMENT", localname); } -static void endElement(void * ctx, +static void endElement(void * CTX, const xmlChar * name) { SAYSO_DATAZ(name); } -static void entityDecl(void * ctx, - const xmlChar * name, - int type, - const xmlChar * publicId, - const xmlChar * systemId, - xmlChar * content) -{ SAYSO_DATAZ(name); } +static void entityDecl(void * CTX, + const xmlChar * name, + int type __attribute__ ((unused)), + const xmlChar * publicId __attribute__ ((unused)), + const xmlChar * systemId __attribute__ ((unused)), + xmlChar * content ) +{ + SAYSO_DATAZ(name); + SAYSO_DATAZ(content); +} -static void error(void * ctx, const char * msg, ...) +static void error(void * CTX, const char * msg, ...) { va_list ap; va_start (ap, msg); @@ -373,13 +387,17 @@ static void error(void * ctx, const char * msg, ...) va_end (ap); } -static void externalSubset(void * ctx, - const xmlChar * name, - const xmlChar * ExternalID, +static void externalSubset(void * CTX, + const xmlChar * name, + const xmlChar * ExternalID, const xmlChar * SystemID) -{ SAYSO_DATAZ(name); } +{ + SAYSO_DATAZ(name); + SAYSO_DATAZ(ExternalID); + SAYSO_DATAZ(SystemID); +} -static void fatalError(void * ctx, const char * msg, ...) +static void fatalError(void * CTX, const char * msg, ...) { va_list ap; va_start (ap, msg); @@ -389,43 +407,54 @@ static void fatalError(void * ctx, const char * msg, ...) va_end (ap); } -static xmlEntityPtr getEntity(void * ctx, +#if 0 +static xmlEntityPtr getEntity(void * CTX, const xmlChar * name) { SAYSO_DATAZ(name); } -static xmlEntityPtr getParameterEntity(void * ctx, +static xmlEntityPtr getParameterEntity(void * CTX, const xmlChar * name) { SAYSO_DATAZ(name); } +#endif -static int hasExternalSubset(void * ctx) -{ SAYSO(); } +static int hasExternalSubset(void * CTX) +{ SAYSO(); return 0; } -static int hasInternalSubset(void * ctx) -{ SAYSO(); } +static int hasInternalSubset(void * CTX) +{ SAYSO(); return 0; } -static void ignorableWhitespace(void * ctx, - const xmlChar * ch, +static void ignorableWhitespace(void * CTX, + const xmlChar * ch, int len) { SAYSO_DATA(len, ch); } -static void internalSubset(void * ctx, - const xmlChar * name, - const xmlChar * ExternalID, +static void internalSubset(void * CTX, + const xmlChar * name, + const xmlChar * ExternalID, const xmlChar * SystemID) -{ SAYSO_DATAZ(name); } +{ + SAYSO_DATAZ(name); + SAYSO_DATAZ(ExternalID); + SAYSO_DATAZ(SystemID); +} -static int isStandalone (void * ctx) +#if 0 +static int isStandalone (void * CTX) { SAYSO(); } +#endif - -static void notationDecl(void * ctx, - const xmlChar * name, - const xmlChar * publicId, +static void notationDecl(void * CTX, + const xmlChar * name, + const xmlChar * publicId, const xmlChar * systemId) -{ SAYSO_DATAZ(name); } +{ + SAYSO_DATAZ(name); + SAYSO_DATAZ(publicId); + SAYSO_DATAZ(systemId); +} -static void processingInstruction(void * ctx, - const xmlChar * target, +static void processingInstruction(void * CTX, + const xmlChar * target, const xmlChar * data) { SAYSO_DATAZ(target); @@ -434,17 +463,19 @@ static void processingInstruction(void * ctx, xml_event("PROCESSING-INSTRUCTION-DATA", data); } -static void reference(void * ctx, +static void reference(void * CTX, const xmlChar * name) { SAYSO_DATAZ(name); } -static xmlParserInputPtr resolveEntity( void * ctx, - const xmlChar * publicId, +#if 0 +static xmlParserInputPtr resolveEntity( void * CTX, + const xmlChar * publicId, const xmlChar * systemId) { SAYSO(); } +#endif -static void setDocumentLocator(void * ctx, - xmlSAXLocatorPtr loc) +static void setDocumentLocator(void * CTX, + xmlSAXLocatorPtr loc __attribute__ ((unused)) ) { SAYSO(); } /* @@ -452,36 +483,48 @@ static void setDocumentLocator(void * ctx, * Use xmlCtxtGetVersion(), xmlCtxtGetDeclaredEncoding() and * xmlCtxtGetStandalone() to get data from the XML declaration. */ -static void startDocument(void * ctx) -{ SAYSO(); } +static void startDocument(void * CTX) +{ + SAYSO(); +} -static void startElementNs(void * ctx, - const xmlChar * localname, - const xmlChar * prefix, - const xmlChar * URI, - int nb_namespaces, - const xmlChar ** namespaces, - int nb_attributes, - int nb_defaulted, - const xmlChar ** attributes) +static void startElementNs(void * CTX, + const xmlChar * localname, + const xmlChar * prefix, + const xmlChar * URI, + int nb_namespaces __attribute__ ((unused)), + const xmlChar ** namespaces __attribute__ ((unused)), + int nb_attributes __attribute__ ((unused)), + int nb_defaulted __attribute__ ((unused)), + const xmlChar ** attributes __attribute__ ((unused))) { + SAYSO_DATAZ(prefix); + SAYSO_DATAZ(URI); SAYSO_DATAZ(localname); xml_event("START-OF-ELEMENT", localname); } -static void startElement(void * ctx, - const xmlChar * name, +static void startElement(void * CTX, + const xmlChar * name, const xmlChar ** atts) -{ SAYSO_DATAZ(name); } +{ + SAYSO_DATAZ(name); + for( int i=0; atts[i]; i++ ) SAYSO_DATAZ(atts[i]); +} -static void unparsedEntityDecl(void * ctx, - const xmlChar * name, - const xmlChar * publicId, - const xmlChar * systemId, +static void unparsedEntityDecl(void * CTX, + const xmlChar * name, + const xmlChar * publicId, + const xmlChar * systemId, const xmlChar * notationName) -{ SAYSO_DATAZ(name); } +{ + SAYSO_DATAZ(name); + SAYSO_DATAZ(publicId); + SAYSO_DATAZ(systemId); + SAYSO_DATAZ(notationName); +} -static void warning(void * ctx, const char * msg, ... ) +static void warning(void * CTX, const char * msg, ... ) { va_list ap; va_start (ap, msg); @@ -494,15 +537,15 @@ static void warning(void * ctx, const char * msg, ... ) /* * xmlSAXHandler is a structure of function pointers that the SAX parser calls * as it encounters XML elements in the input. Each pointer is a callback - * function, locally defined in this file. These we term "handlers". - * + * function, locally defined in this file. These we term "handlers". + * * Each handler sets the XML registers per IBM, and then calls * cobol_callback(), which is a function pointer supplied by the COBOL program - * to be the processing procedure for XML PARSE. - * + * to be the processing procedure for XML PARSE. + * * There is no obvious way to abort parsing at the C level. See: * http://veillard.com/XML/messages/0540.html - * + * * > The simplest to implement this would not be to add a new SAX * > callback but rather modify the xmlParserCtxtPtr passed to the * > callbacks. The best seems to be: @@ -512,7 +555,7 @@ static void warning(void * ctx, const char * msg, ... ) * > Doing both should led to a quick termination of parsing * > (but endElement(s)/endDocument will certainly be called anyway). * - * Another hack might be to set the input to all blanks in cobol_callback. + * Another hack might be to set the input to all blanks in cobol_callback. */ static xmlSAXHandler handlers; @@ -548,7 +591,7 @@ initialize_handlers( callback_t *callback ) { //// Everything is available on the context, so this is useless in our case handlers.setDocumentLocator = setDocumentLocator; -#endif +#endif handlers.cdataBlock = cdataBlock; handlers.characters = characters; @@ -565,14 +608,158 @@ initialize_handlers( callback_t *callback ) { handlers.warning = warning; } +static xmlChar * +xmlchar_of( const char input[] ) { + return const_cast<xmlChar*>( reinterpret_cast<const xmlChar*>(input) ); +} + +static const char * +xmlParserErrors_str( xmlParserErrors erc, const char name[] ) { + const char *msg = "???"; + + switch( erc ) { + case XML_ERR_OK: + msg = "Success"; + break; + case XML_ERR_INTERNAL_ERROR: + msg = "Internal assertion failure"; + break; + case XML_ERR_NO_MEMORY: + msg = "Out of memory"; + break; + case XML_ERR_UNSUPPORTED_ENCODING: + msg = "Unsupported character encoding"; + break; +#if LIBXML_VERSION >= 21400 + case XML_ERR_RESOURCE_LIMIT: + msg = "Internal resource limit like maximum amplification factor exceeded"; + break; + case XML_ERR_ARGUMENT: + msg = "Invalid argument"; + break; + case XML_ERR_SYSTEM: + msg = "Unexpected error from the OS or an external library"; + break; +#endif + case XML_IO_ENOENT: + msg = "File not found"; + break; + default: + msg = strdup(name); + if( ! msg ) msg = "unknown XML error"; + break; + } + return msg; +} + +#define xmlerror_str(E) xmlParserErrors_str( (E), #E ) + +/* + * The global context is NULL if XML PARSE is not in progress. + */ +static class context_t { + const int priority; + public: + xmlParserCtxt * ctxt; + context_t() : ctxt(nullptr), priority(LOG_INFO) { + const int option = LOG_PERROR, facility = LOG_USER; +#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME + /* Declared in errno.h, when available. */ + static const char * const ident = program_invocation_short_name; +#elif defined (HAVE_GETPROGNAME) + /* Declared in stdlib.h. */ + static const char * const ident = getprogname(); +#else + /* Avoid a NULL entry. */ + static const char * const ident = "unnamed_COBOL_program"; +#endif + // TODO: Program to set option in library via command-line and/or environment. + // Library listens to program, not to the environment. + openlog(ident, option, facility); + + initialize_handlers(nullptr); + } + + void + push( cblc_field_t *input_field, size_t input_offset, size_t len, bool done ) { + if( ! ctxt ) { + init(); + } + assert(cobol_callback); // caller must set + + if( input_offset < len ) { + int size = len - input_offset; + const char *chunk = PTRCAST(char, input_field->data + input_offset); + int terminate = done? 1 : 0; + + auto erc = (xmlParserErrors )xmlParseChunk( ctxt, chunk, size, terminate ); + if( erc != 0 ) { + auto msg = xmlerror_str(erc); + syslog(priority, "XML PARSE: XML error: %s", msg); + } + + if( done ) this->done(); + + } + } + + void done() { + if( ctxt ) { + xmlFreeParserCtxt( ctxt ); + ctxt = nullptr; + } + } + + protected: + void init() { + const char *external_entities = nullptr; + void * const user_data = nullptr; + + ctxt = xmlCreatePushParserCtxt( &handlers, user_data, + nullptr, 0, external_entities); + } +} context; + +static int +xml_push_parse( cblc_field_t *input_field, + size_t input_offset, + size_t len, + cblc_field_t *encoding __attribute__ ((unused)), + cblc_field_t *validating __attribute__ ((unused)), + int returns_national __attribute__ ((unused)), + void (*callback)(void) ) +{ + ::cobol_callback = callback; + + context.push( input_field, input_offset, len, false); + +#if LIBXML_VERSION >= 21400 + const xmlChar * version = xmlCtxtGetVersion( context.ctxt ); +#else + const xmlChar * version = xmlchar_of("requires version 2.14"); +#endif + assert(version); + assert(nullptr == "function not ready and not called"); + return 0; +} + + +extern "C" // Parser calls via parser_xml_parse_end, probabably. +int +__gg__xml_parse_done() { + context.done(); + return 0; +} + + extern "C" -int +int __gg__xml_parse( const cblc_field_t *input_field, size_t input_offset, size_t len, - cblc_field_t *encoding, - cblc_field_t *validating, - int returns_national, + cblc_field_t *encoding __attribute__ ((unused)), + cblc_field_t *validating __attribute__ ((unused)), + int returns_national __attribute__ ((unused)), void (*callback)(void) ) { initialize_handlers(callback); @@ -582,7 +769,7 @@ __gg__xml_parse( const cblc_field_t *input_field, int erc = xmlSAXUserParseMemory(&handlers, nullptr, input, len); if( erc ) { - xmlErrorPtr msg = xmlCtxtGetLastError(nullptr); + const xmlError *msg = xmlCtxtGetLastError(nullptr); fprintf(stderr, "XML PARSE: error: line %d: %s (%d: %d.%d.%d)\n", msg->line, msg->message, erc, msg->domain, msg->level, msg->code); } |
