Commit 52b94080 authored by Benjamin Peterson's avatar Benjamin Peterson Committed by GitHub

closes bpo-38174: Update vendored expat library to 2.2.8. (GH-16346)

Fixes CVE-2019-15903. See full changelog at https://github.com/libexpat/libexpat/blob/R_2_2_8/expat/Changes.
parent 49d99f01
Update vendorized expat library version to 2.2.8, which resolves
CVE-2019-15903.
......@@ -31,34 +31,34 @@
*/
/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
......@@ -33,15 +33,6 @@
#ifndef Expat_INCLUDED
#define Expat_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
#endif
#include <stdlib.h>
#include "expat_external.h"
......@@ -53,8 +44,8 @@ struct XML_ParserStruct;
typedef struct XML_ParserStruct *XML_Parser;
typedef unsigned char XML_Bool;
#define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0)
#define XML_TRUE ((XML_Bool)1)
#define XML_FALSE ((XML_Bool)0)
/* The XML_Status enum gives the possible return values for several
API functions. The preprocessor #defines are included so this
......@@ -164,25 +155,23 @@ enum XML_Content_Quant {
typedef struct XML_cp XML_Content;
struct XML_cp {
enum XML_Content_Type type;
enum XML_Content_Quant quant;
XML_Char * name;
unsigned int numchildren;
XML_Content * children;
enum XML_Content_Type type;
enum XML_Content_Quant quant;
XML_Char *name;
unsigned int numchildren;
XML_Content *children;
};
/* This is called for an element declaration. See above for
description of the model argument. It's the caller's responsibility
to free model when finished with it.
*/
typedef void (XMLCALL *XML_ElementDeclHandler) (void *userData,
const XML_Char *name,
XML_Content *model);
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
XML_Content *model);
XMLPARSEAPI(void)
XML_SetElementDeclHandler(XML_Parser parser,
XML_ElementDeclHandler eldecl);
XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl);
/* The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
......@@ -192,17 +181,12 @@ XML_SetElementDeclHandler(XML_Parser parser,
value will be NULL in the case of "#REQUIRED". If "isrequired" is
true and default is non-NULL, then this is a "#FIXED" default.
*/
typedef void (XMLCALL *XML_AttlistDeclHandler) (
void *userData,
const XML_Char *elname,
const XML_Char *attname,
const XML_Char *att_type,
const XML_Char *dflt,
int isrequired);
typedef void(XMLCALL *XML_AttlistDeclHandler)(
void *userData, const XML_Char *elname, const XML_Char *attname,
const XML_Char *att_type, const XML_Char *dflt, int isrequired);
XMLPARSEAPI(void)
XML_SetAttlistDeclHandler(XML_Parser parser,
XML_AttlistDeclHandler attdecl);
XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl);
/* The XML declaration handler is called for *both* XML declarations
and text declarations. The way to distinguish is that the version
......@@ -212,15 +196,13 @@ XML_SetAttlistDeclHandler(XML_Parser parser,
was no standalone parameter in the declaration, that it was given
as no, or that it was given as yes.
*/
typedef void (XMLCALL *XML_XmlDeclHandler) (void *userData,
const XML_Char *version,
const XML_Char *encoding,
int standalone);
typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData,
const XML_Char *version,
const XML_Char *encoding,
int standalone);
XMLPARSEAPI(void)
XML_SetXmlDeclHandler(XML_Parser parser,
XML_XmlDeclHandler xmldecl);
XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl);
typedef struct {
void *(*malloc_fcn)(size_t size);
......@@ -248,7 +230,6 @@ XML_ParserCreate(const XML_Char *encoding);
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
/* Constructs a new parser using the memory management suite referred to
by memsuite. If memsuite is NULL, then use the standard library memory
suite. If namespaceSeparator is non-NULL it creates a parser with
......@@ -265,7 +246,7 @@ XML_ParserCreate_MM(const XML_Char *encoding,
/* Prepare a parser object to be re-used. This is particularly
valuable when memory allocation overhead is disproportionately high,
such as when a large number of small documents need to be parsed.
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
unknownEncodingHandler. The parser's external state is re-initialized
except for the values of ns and ns_triplets.
......@@ -278,31 +259,27 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encoding);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated.
*/
typedef void (XMLCALL *XML_StartElementHandler) (void *userData,
const XML_Char *name,
const XML_Char **atts);
typedef void (XMLCALL *XML_EndElementHandler) (void *userData,
const XML_Char *name);
typedef void(XMLCALL *XML_StartElementHandler)(void *userData,
const XML_Char *name,
const XML_Char **atts);
typedef void(XMLCALL *XML_EndElementHandler)(void *userData,
const XML_Char *name);
/* s is not 0 terminated. */
typedef void (XMLCALL *XML_CharacterDataHandler) (void *userData,
const XML_Char *s,
int len);
typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData,
const XML_Char *s, int len);
/* target and data are 0 terminated */
typedef void (XMLCALL *XML_ProcessingInstructionHandler) (
void *userData,
const XML_Char *target,
const XML_Char *data);
typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData,
const XML_Char *target,
const XML_Char *data);
/* data is 0 terminated */
typedef void (XMLCALL *XML_CommentHandler) (void *userData,
const XML_Char *data);
typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data);
typedef void (XMLCALL *XML_StartCdataSectionHandler) (void *userData);
typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData);
typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData);
typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData);
/* This is called for any characters in the XML document for which
there is no applicable handler. This includes both characters that
......@@ -317,25 +294,23 @@ typedef void (XMLCALL *XML_EndCdataSectionHandler) (void *userData);
default handler: for example, a comment might be split between
multiple calls.
*/
typedef void (XMLCALL *XML_DefaultHandler) (void *userData,
const XML_Char *s,
int len);
typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s,
int len);
/* This is called for the start of the DOCTYPE declaration, before
any DTD or internal subset is parsed.
*/
typedef void (XMLCALL *XML_StartDoctypeDeclHandler) (
void *userData,
const XML_Char *doctypeName,
const XML_Char *sysid,
const XML_Char *pubid,
int has_internal_subset);
typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *doctypeName,
const XML_Char *sysid,
const XML_Char *pubid,
int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
/* This is called for entity declarations. The is_parameter_entity
argument will be non-zero if the entity is a parameter entity, zero
......@@ -355,20 +330,14 @@ typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
Note that is_parameter_entity can't be changed to XML_Bool, since
that would break binary compatibility.
*/
typedef void (XMLCALL *XML_EntityDeclHandler) (
void *userData,
const XML_Char *entityName,
int is_parameter_entity,
const XML_Char *value,
int value_length,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
typedef void(XMLCALL *XML_EntityDeclHandler)(
void *userData, const XML_Char *entityName, int is_parameter_entity,
const XML_Char *value, int value_length, const XML_Char *base,
const XML_Char *systemId, const XML_Char *publicId,
const XML_Char *notationName);
XMLPARSEAPI(void)
XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler);
XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superseded by the EntityDeclHandler above.
......@@ -379,24 +348,20 @@ XML_SetEntityDeclHandler(XML_Parser parser,
entityName, systemId and notationName arguments will never be
NULL. The other arguments may be.
*/
typedef void (XMLCALL *XML_UnparsedEntityDeclHandler) (
void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)(
void *userData, const XML_Char *entityName, const XML_Char *base,
const XML_Char *systemId, const XML_Char *publicId,
const XML_Char *notationName);
/* This is called for a declaration of notation. The base argument is
whatever was set by XML_SetBase. The notationName will never be
NULL. The other arguments can be.
*/
typedef void (XMLCALL *XML_NotationDeclHandler) (
void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
/* When namespace processing is enabled, these are called once for
each namespace declaration. The call to the start and end element
......@@ -404,14 +369,12 @@ typedef void (XMLCALL *XML_NotationDeclHandler) (
declaration handlers. For an xmlns attribute, prefix will be
NULL. For an xmlns="" attribute, uri will be NULL.
*/
typedef void (XMLCALL *XML_StartNamespaceDeclHandler) (
void *userData,
const XML_Char *prefix,
const XML_Char *uri);
typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData,
const XML_Char *prefix,
const XML_Char *uri);
typedef void (XMLCALL *XML_EndNamespaceDeclHandler) (
void *userData,
const XML_Char *prefix);
typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData,
const XML_Char *prefix);
/* This is called if the document is not standalone, that is, it has an
external subset or a reference to a parameter entity, but does not
......@@ -422,7 +385,7 @@ typedef void (XMLCALL *XML_EndNamespaceDeclHandler) (
conditions above this handler will only be called if the referenced
entity was actually read.
*/
typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData);
typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData);
/* This is called for a reference to an external parsed general
entity. The referenced entity is not automatically parsed. The
......@@ -458,12 +421,11 @@ typedef int (XMLCALL *XML_NotStandaloneHandler) (void *userData);
Note that unlike other handlers the first argument is the parser,
not userData.
*/
typedef int (XMLCALL *XML_ExternalEntityRefHandler) (
XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
/* This is called in two situations:
1) An entity reference is encountered for which no declaration
......@@ -475,10 +437,9 @@ typedef int (XMLCALL *XML_ExternalEntityRefHandler) (
the event would be out of sync with the reporting of the
declarations or attribute values
*/
typedef void (XMLCALL *XML_SkippedEntityHandler) (
void *userData,
const XML_Char *entityName,
int is_parameter_entity);
typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData,
const XML_Char *entityName,
int is_parameter_entity);
/* This structure is filled in by the XML_UnknownEncodingHandler to
provide information to the parser about encodings that are unknown
......@@ -535,8 +496,8 @@ typedef void (XMLCALL *XML_SkippedEntityHandler) (
typedef struct {
int map[256];
void *data;
int (XMLCALL *convert)(void *data, const char *s);
void (XMLCALL *release)(void *data);
int(XMLCALL *convert)(void *data, const char *s);
void(XMLCALL *release)(void *data);
} XML_Encoding;
/* This is called for an encoding that is unknown to the parser.
......@@ -554,23 +515,19 @@ typedef struct {
If info does not describe a suitable encoding, then the parser will
return an XML_UNKNOWN_ENCODING error.
*/
typedef int (XMLCALL *XML_UnknownEncodingHandler) (
void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
XMLPARSEAPI(void)
XML_SetElementHandler(XML_Parser parser,
XML_StartElementHandler start,
XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
XML_EndElementHandler end);
XMLPARSEAPI(void)
XML_SetStartElementHandler(XML_Parser parser,
XML_StartElementHandler handler);
XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler);
XMLPARSEAPI(void)
XML_SetEndElementHandler(XML_Parser parser,
XML_EndElementHandler handler);
XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler);
XMLPARSEAPI(void)
XML_SetCharacterDataHandler(XML_Parser parser,
......@@ -580,8 +537,7 @@ XMLPARSEAPI(void)
XML_SetProcessingInstructionHandler(XML_Parser parser,
XML_ProcessingInstructionHandler handler);
XMLPARSEAPI(void)
XML_SetCommentHandler(XML_Parser parser,
XML_CommentHandler handler);
XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler);
XMLPARSEAPI(void)
XML_SetCdataSectionHandler(XML_Parser parser,
......@@ -601,20 +557,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser,
default handler, or to the skipped entity handler, if one is set.
*/
XMLPARSEAPI(void)
XML_SetDefaultHandler(XML_Parser parser,
XML_DefaultHandler handler);
XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler);
/* This sets the default handler but does not inhibit expansion of
internal entities. The entity reference will not be passed to the
default handler.
*/
XMLPARSEAPI(void)
XML_SetDefaultHandlerExpand(XML_Parser parser,
XML_DefaultHandler handler);
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler);
XMLPARSEAPI(void)
XML_SetDoctypeDeclHandler(XML_Parser parser,
XML_StartDoctypeDeclHandler start,
XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
XML_EndDoctypeDeclHandler end);
XMLPARSEAPI(void)
......@@ -622,16 +575,14 @@ XML_SetStartDoctypeDeclHandler(XML_Parser parser,
XML_StartDoctypeDeclHandler start);
XMLPARSEAPI(void)
XML_SetEndDoctypeDeclHandler(XML_Parser parser,
XML_EndDoctypeDeclHandler end);
XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end);
XMLPARSEAPI(void)
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
XML_UnparsedEntityDeclHandler handler);
XMLPARSEAPI(void)
XML_SetNotationDeclHandler(XML_Parser parser,
XML_NotationDeclHandler handler);
XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler);
XMLPARSEAPI(void)
XML_SetNamespaceDeclHandler(XML_Parser parser,
......@@ -659,8 +610,7 @@ XML_SetExternalEntityRefHandler(XML_Parser parser,
instead of the parser object.
*/
XMLPARSEAPI(void)
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,
void *arg);
XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg);
XMLPARSEAPI(void)
XML_SetSkippedEntityHandler(XML_Parser parser,
......@@ -740,7 +690,6 @@ XML_UseParserAsHandlerArg(XML_Parser parser);
XMLPARSEAPI(enum XML_Error)
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is
left to the application: this value will be passed through as the
......@@ -780,10 +729,10 @@ XML_GetIdAttributeIndex(XML_Parser parser);
info->valueEnd - info->valueStart = 4 bytes.
*/
typedef struct {
XML_Index nameStart; /* Offset to beginning of the attribute name. */
XML_Index nameEnd; /* Offset after the attribute name's last byte. */
XML_Index valueStart; /* Offset to beginning of the attribute value. */
XML_Index valueEnd; /* Offset after the attribute value's last byte. */
XML_Index nameStart; /* Offset to beginning of the attribute name. */
XML_Index nameEnd; /* Offset after the attribute name's last byte. */
XML_Index valueStart; /* Offset to beginning of the attribute value. */
XML_Index valueEnd; /* Offset after the attribute value's last byte. */
} XML_AttrInfo;
/* Returns an array of XML_AttrInfo structures for the attribute/value pairs
......@@ -819,20 +768,20 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
(resumable = 0) an already suspended parser. Some call-backs may
still follow because they would otherwise get lost. Examples:
- endElementHandler() for empty elements when stopped in
startElementHandler(),
- endNameSpaceDeclHandler() when stopped in endElementHandler(),
startElementHandler(),
- endNameSpaceDeclHandler() when stopped in endElementHandler(),
and possibly others.
Can be called from most handlers, including DTD related call-backs,
except when parsing an external parameter entity and resumable != 0.
Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise.
Possible error codes:
Possible error codes:
- XML_ERROR_SUSPENDED: when suspending an already suspended parser.
- XML_ERROR_FINISHED: when the parser has already finished.
- XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE.
When resumable != 0 (true) then parsing is suspended, that is,
XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED.
When resumable != 0 (true) then parsing is suspended, that is,
XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED.
Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer()
return XML_STATUS_ERROR with error code XML_ERROR_ABORTED.
......@@ -843,7 +792,7 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
the externalEntityRefHandler() to call XML_StopParser() on the parent
parser (recursively), if one wants to stop parsing altogether.
When suspended, parsing can be resumed by calling XML_ResumeParser().
When suspended, parsing can be resumed by calling XML_ResumeParser().
*/
XMLPARSEAPI(enum XML_Status)
XML_StopParser(XML_Parser parser, XML_Bool resumable);
......@@ -851,7 +800,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable);
/* Resumes parsing after it has been suspended with XML_StopParser().
Must not be called from within a handler call-back. Returns same
status codes as XML_Parse() or XML_ParseBuffer().
Additional error code XML_ERROR_NOT_SUSPENDED possible.
Additional error code XML_ERROR_NOT_SUSPENDED possible.
*Note*:
This must be called on the most deeply nested child parser instance
......@@ -863,12 +812,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable);
XMLPARSEAPI(enum XML_Status)
XML_ResumeParser(XML_Parser parser);
enum XML_Parsing {
XML_INITIALIZED,
XML_PARSING,
XML_FINISHED,
XML_SUSPENDED
};
enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED };
typedef struct {
enum XML_Parsing parsing;
......@@ -900,8 +844,7 @@ XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status);
Otherwise returns a new XML_Parser object.
*/
XMLPARSEAPI(XML_Parser)
XML_ExternalEntityParserCreate(XML_Parser parser,
const XML_Char *context,
XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context,
const XML_Char *encoding);
enum XML_ParamEntityParsing {
......@@ -945,8 +888,7 @@ XML_SetParamEntityParsing(XML_Parser parser,
Note: If parser == NULL, the function will do nothing and return 0.
*/
XMLPARSEAPI(int)
XML_SetHashSalt(XML_Parser parser,
unsigned long hash_salt);
XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt);
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_GetErrorCode returns information about the error.
......@@ -963,7 +905,7 @@ XML_GetErrorCode(XML_Parser parser);
be within the relevant markup. When called outside of the callback
functions, the position indicated will be just past the last parse
event (regardless of whether there was an associated callback).
They may also be called after returning from a call to XML_Parse
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
the location is the location of the character at which the error
......@@ -995,14 +937,12 @@ XML_GetCurrentByteCount(XML_Parser parser);
the handler that makes the call.
*/
XMLPARSEAPI(const char *)
XML_GetInputContext(XML_Parser parser,
int *offset,
int *size);
XML_GetInputContext(XML_Parser parser, int *offset, int *size);
/* For backwards compatibility with previous versions. */
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
/* Frees the content model passed to the element declaration handler */
XMLPARSEAPI(void)
......@@ -1062,21 +1002,20 @@ enum XML_FeatureEnum {
};
typedef struct {
enum XML_FeatureEnum feature;
const XML_LChar *name;
long int value;
enum XML_FeatureEnum feature;
const XML_LChar *name;
long int value;
} XML_Feature;
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
/* Expat follows the semantic versioning convention.
See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 7
#define XML_MICRO_VERSION 8
#ifdef __cplusplus
}
......
......@@ -35,10 +35,6 @@
/* External API definitions */
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
# define XML_USE_MSC_EXTENSIONS 1
#endif
/* Expat tries very hard to make the API boundary very specifically
defined. There are two macros defined to control this boundary;
each of these can be defined before including this header to
......@@ -61,12 +57,17 @@
compiled with the cdecl calling convention as the default since
system headers may assume the cdecl convention.
*/
/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"
#ifndef XMLCALL
# if defined(_MSC_VER)
# define XMLCALL __cdecl
# elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
# define XMLCALL __attribute__((cdecl))
# else
# if defined(_MSC_VER)
# define XMLCALL __cdecl
# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER)
# define XMLCALL __attribute__((cdecl))
# else
/* For any platform which uses this definition and supports more than
one calling convention, we need to extend this definition to
declare the convention used on that platform, if it's possible to
......@@ -77,49 +78,46 @@
pre-processor and how to specify the same calling convention as the
platform's malloc() implementation.
*/
# define XMLCALL
# endif
#endif /* not defined XMLCALL */
/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"
# define XMLCALL
# endif
#endif /* not defined XMLCALL */
#if !defined(XML_STATIC) && !defined(XMLIMPORT)
# ifndef XML_BUILDING_EXPAT
#if ! defined(XML_STATIC) && ! defined(XMLIMPORT)
# ifndef XML_BUILDING_EXPAT
/* using Expat from an application */
# ifdef XML_USE_MSC_EXTENSIONS
# define XMLIMPORT __declspec(dllimport)
# endif
# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__)
# define XMLIMPORT __declspec(dllimport)
# endif
# endif
#endif /* not defined XML_STATIC */
# endif
#endif /* not defined XML_STATIC */
#ifndef XML_ENABLE_VISIBILITY
# define XML_ENABLE_VISIBILITY 0
# define XML_ENABLE_VISIBILITY 0
#endif
#if !defined(XMLIMPORT) && XML_ENABLE_VISIBILITY
# define XMLIMPORT __attribute__ ((visibility ("default")))
#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY
# define XMLIMPORT __attribute__((visibility("default")))
#endif
/* If we didn't define it above, define it away: */
#ifndef XMLIMPORT
# define XMLIMPORT
# define XMLIMPORT
#endif
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
# define XML_ATTR_MALLOC __attribute__((__malloc__))
#if defined(__GNUC__) \
&& (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
# define XML_ATTR_MALLOC __attribute__((__malloc__))
#else
# define XML_ATTR_MALLOC
# define XML_ATTR_MALLOC
#endif
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
#if defined(__GNUC__) \
&& ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
#else
# define XML_ATTR_ALLOC_SIZE(x)
# define XML_ATTR_ALLOC_SIZE(x)
#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
......@@ -129,35 +127,30 @@ extern "C" {
#endif
#ifdef XML_UNICODE_WCHAR_T
# ifndef XML_UNICODE
# define XML_UNICODE
# endif
# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2)
# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc"
# endif
# ifndef XML_UNICODE
# define XML_UNICODE
# endif
# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2)
# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc"
# endif
#endif
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
# ifdef XML_UNICODE_WCHAR_T
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
# ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char;
typedef wchar_t XML_LChar;
# else
# else
typedef unsigned short XML_Char;
typedef char XML_LChar;
# endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
# endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
typedef char XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE */
#endif /* XML_UNICODE */
#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */
# if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
typedef __int64 XML_Index;
typedef unsigned __int64 XML_Size;
# else
#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */
typedef long long XML_Index;
typedef unsigned long long XML_Size;
# endif
#else
typedef long XML_Index;
typedef unsigned long XML_Size;
......
......@@ -32,34 +32,34 @@
/* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */
/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML,
/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML,
/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS,
/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS,
/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL,
/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI,
/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER,
......@@ -49,7 +49,7 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__)
#if defined(__GNUC__) && defined(__i386__) && ! defined(__MINGW32__)
/* We'll use this version by default only where we know it helps.
regparm() generates warnings on Solaris boxes. See SF bug #692878.
......@@ -59,8 +59,8 @@
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRFASTCALL __attribute__((regparm(3)))
# define FASTCALL __attribute__((regparm(3)))
# define PTRFASTCALL __attribute__((regparm(3)))
#endif
/* Using __fastcall seems to have an unexpected negative effect under
......@@ -74,55 +74,49 @@
/* Make sure all of these are defined if they aren't already. */
#ifndef FASTCALL
#define FASTCALL
# define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
# define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
# define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif /* __GNUC__ */
#endif
# if ! defined(__cplusplus) && ! defined(inline)
# ifdef __GNUC__
# define inline __inline
# endif /* __GNUC__ */
# endif
#endif /* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
# define inline inline
#else
#ifndef inline
#define inline
#endif
# ifndef inline
# define inline
# endif
#endif
#ifndef UNUSED_P
# ifdef __GNUC__
# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
# else
# define UNUSED_P(p) UNUSED_ ## p
# endif
# define UNUSED_P(p) (void)p
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef XML_ENABLE_VISIBILITY
#if XML_ENABLE_VISIBILITY
__attribute__ ((visibility ("default")))
#endif
# if XML_ENABLE_VISIBILITY
__attribute__((visibility("default")))
# endif
#endif
void
_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef);
_INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
#ifdef __cplusplus
}
......
......@@ -31,34 +31,34 @@
*/
/* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME,
/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME,
/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 2016 - 2017, Steve Holme, <steve_holme@hotmail.com>.
* Copyright (C) 2017, Expat development team
*
* All rights reserved.
* Licensed under the MIT license:
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
* THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
* THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization of the
* copyright holder.
*
***************************************************************************/
#if defined(_WIN32)
#include <windows.h>
#include <tchar.h>
HMODULE _Expat_LoadLibrary(LPCTSTR filename);
#if !defined(LOAD_WITH_ALTERED_SEARCH_PATH)
#define LOAD_WITH_ALTERED_SEARCH_PATH 0x00000008
#endif
#if !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
#define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
#endif
/* We use our own typedef here since some headers might lack these */
typedef HMODULE (APIENTRY *LOADLIBRARYEX_FN)(LPCTSTR, HANDLE, DWORD);
/* See function definitions in winbase.h */
#ifdef UNICODE
# ifdef _WIN32_WCE
# define LOADLIBRARYEX L"LoadLibraryExW"
# else
# define LOADLIBRARYEX "LoadLibraryExW"
# endif
#else
# define LOADLIBRARYEX "LoadLibraryExA"
#endif
/*
* _Expat_LoadLibrary()
*
* This is used to dynamically load DLLs using the most secure method available
* for the version of Windows that we are running on.
*
* Parameters:
*
* filename [in] - The filename or full path of the DLL to load. If only the
* filename is passed then the DLL will be loaded from the
* Windows system directory.
*
* Returns the handle of the module on success; otherwise NULL.
*/
HMODULE _Expat_LoadLibrary(LPCTSTR filename)
{
HMODULE hModule = NULL;
LOADLIBRARYEX_FN pLoadLibraryEx = NULL;
/* Get a handle to kernel32 so we can access it's functions at runtime */
HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32"));
if(!hKernel32)
return NULL; /* LCOV_EXCL_LINE */
/* Attempt to find LoadLibraryEx() which is only available on Windows 2000
and above */
pLoadLibraryEx = (LOADLIBRARYEX_FN) GetProcAddress(hKernel32, LOADLIBRARYEX);
/* Detect if there's already a path in the filename and load the library if
there is. Note: Both back slashes and forward slashes have been supported
since the earlier days of DOS at an API level although they are not
supported by command prompt */
if(_tcspbrk(filename, TEXT("\\/"))) {
/** !checksrc! disable BANNEDFUNC 1 **/
hModule = pLoadLibraryEx ?
pLoadLibraryEx(filename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
LoadLibrary(filename);
}
/* Detect if KB2533623 is installed, as LOAD_LIBARY_SEARCH_SYSTEM32 is only
supported on Windows Vista, Windows Server 2008, Windows 7 and Windows
Server 2008 R2 with this patch or natively on Windows 8 and above */
else if(pLoadLibraryEx && GetProcAddress(hKernel32, "AddDllDirectory")) {
/* Load the DLL from the Windows system directory */
hModule = pLoadLibraryEx(filename, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
}
else {
/* Attempt to get the Windows system path */
UINT systemdirlen = GetSystemDirectory(NULL, 0);
if(systemdirlen) {
/* Allocate space for the full DLL path (Room for the null terminator
is included in systemdirlen) */
size_t filenamelen = _tcslen(filename);
TCHAR *path = malloc(sizeof(TCHAR) * (systemdirlen + 1 + filenamelen));
if(path && GetSystemDirectory(path, systemdirlen)) {
/* Calculate the full DLL path */
_tcscpy(path + _tcslen(path), TEXT("\\"));
_tcscpy(path + _tcslen(path), filename);
/* Load the DLL from the Windows system directory */
/** !checksrc! disable BANNEDFUNC 1 **/
hModule = pLoadLibraryEx ?
pLoadLibraryEx(path, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
LoadLibrary(path);
}
free(path);
}
}
return hModule;
}
#else /* defined(_WIN32) */
/* ISO C requires a translation unit to contain at least one declaration
[-Wempty-translation-unit] */
typedef int _TRANSLATION_UNIT_LOAD_LIBRARY_C_NOT_EMTPY;
#endif /* defined(_WIN32) */
......@@ -31,152 +31,106 @@
*/
static const unsigned namingBitmap[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
0x40000000, 0xF580C900, 0x00000007, 0x02010800,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
0x00000000, 0x00004C40, 0x00000000, 0x00000000,
0x00000007, 0x00000000, 0x00000000, 0x00000000,
0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000,
0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF,
0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, 0x00000000, 0x07FFFFFE,
0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF,
0xB0000000, 0x00030003, 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF,
0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, 0xFFFDDFE0, 0x03EFFDFF,
0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF,
0x0000003F, 0x00000000, 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, 0x0007DAED, 0x50000000,
0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40,
0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000,
0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE,
0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F,
0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, 0x00FFFFFF, 0x00000000,
0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB,
0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000,
0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF,
0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF,
0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718,
0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF,
0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE,
0x03FF3F5F, 0x00000000, 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE,
0xFFFFFFFF, 0x77FFFFFF,
};
static const unsigned char nmstrtPages[] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
};
static const unsigned char namePages[] = {
0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
};
......@@ -11,6 +11,10 @@
* --------------------------------------------------------------------------
* HISTORY:
*
* 2019-08-03 (Sebastian Pipping)
* - Mark part of sip24_valid as to be excluded from clang-format
* - Re-format code using clang-format 9
*
* 2018-07-08 (Anton Maklakov)
* - Add "fall through" markers for GCC's -Wimplicit-fallthrough
*
......@@ -94,186 +98,186 @@
#include <stddef.h> /* size_t */
#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h> /* uint64_t uint32_t uint8_t */
# include <stdint.h> /* uint64_t uint32_t uint8_t */
#endif
/*
* Workaround to not require a C++11 compiler for using ULL suffix
* if this code is included and compiled as C++; related GCC warning is:
* warning: use of C++11 long long integer constant [-Wlong-long]
*/
#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low)
#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b))))
#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low)
#define SIP_U32TO8_LE(p, v) \
(p)[0] = (uint8_t)((v) >> 0); (p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24);
#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
#define SIP_U64TO8_LE(p, v) \
SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \
SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
#define SIP_U32TO8_LE(p, v) \
(p)[0] = (uint8_t)((v) >> 0); \
(p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); \
(p)[3] = (uint8_t)((v) >> 24);
#define SIP_U8TO64_LE(p) \
(((uint64_t)((p)[0]) << 0) | \
((uint64_t)((p)[1]) << 8) | \
((uint64_t)((p)[2]) << 16) | \
((uint64_t)((p)[3]) << 24) | \
((uint64_t)((p)[4]) << 32) | \
((uint64_t)((p)[5]) << 40) | \
((uint64_t)((p)[6]) << 48) | \
((uint64_t)((p)[7]) << 56))
#define SIP_U64TO8_LE(p, v) \
SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \
SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
#define SIP_U8TO64_LE(p) \
(((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8) \
| ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) \
| ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \
| ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
#define SIPHASH_INITIALIZER { 0, 0, 0, 0, { 0 }, 0, 0 }
#define SIPHASH_INITIALIZER \
{ 0, 0, 0, 0, {0}, 0, 0 }
struct siphash {
uint64_t v0, v1, v2, v3;
uint64_t v0, v1, v2, v3;
unsigned char buf[8], *p;
uint64_t c;
unsigned char buf[8], *p;
uint64_t c;
}; /* struct siphash */
#define SIP_KEYLEN 16
struct sipkey {
uint64_t k[2];
uint64_t k[2];
}; /* struct sipkey */
#define sip_keyof(k) sip_tokey(&(struct sipkey){ { 0 } }, (k))
#define sip_keyof(k) sip_tokey(&(struct sipkey){{0}}, (k))
static struct sipkey *sip_tokey(struct sipkey *key, const void *src) {
key->k[0] = SIP_U8TO64_LE((const unsigned char *)src);
key->k[1] = SIP_U8TO64_LE((const unsigned char *)src + 8);
return key;
static struct sipkey *
sip_tokey(struct sipkey *key, const void *src) {
key->k[0] = SIP_U8TO64_LE((const unsigned char *)src);
key->k[1] = SIP_U8TO64_LE((const unsigned char *)src + 8);
return key;
} /* sip_tokey() */
#ifdef SIPHASH_TOBIN
#define sip_binof(v) sip_tobin((unsigned char[8]){ 0 }, (v))
# define sip_binof(v) sip_tobin((unsigned char[8]){0}, (v))
static void *sip_tobin(void *dst, uint64_t u64) {
SIP_U64TO8_LE((unsigned char *)dst, u64);
return dst;
static void *
sip_tobin(void *dst, uint64_t u64) {
SIP_U64TO8_LE((unsigned char *)dst, u64);
return dst;
} /* sip_tobin() */
#endif /* SIPHASH_TOBIN */
#endif /* SIPHASH_TOBIN */
static void sip_round(struct siphash *H, const int rounds) {
int i;
static void
sip_round(struct siphash *H, const int rounds) {
int i;
for (i = 0; i < rounds; i++) {
H->v0 += H->v1;
H->v1 = SIP_ROTL(H->v1, 13);
H->v1 ^= H->v0;
H->v0 = SIP_ROTL(H->v0, 32);
for (i = 0; i < rounds; i++) {
H->v0 += H->v1;
H->v1 = SIP_ROTL(H->v1, 13);
H->v1 ^= H->v0;
H->v0 = SIP_ROTL(H->v0, 32);
H->v2 += H->v3;
H->v3 = SIP_ROTL(H->v3, 16);
H->v3 ^= H->v2;
H->v2 += H->v3;
H->v3 = SIP_ROTL(H->v3, 16);
H->v3 ^= H->v2;
H->v0 += H->v3;
H->v3 = SIP_ROTL(H->v3, 21);
H->v3 ^= H->v0;
H->v0 += H->v3;
H->v3 = SIP_ROTL(H->v3, 21);
H->v3 ^= H->v0;
H->v2 += H->v1;
H->v1 = SIP_ROTL(H->v1, 17);
H->v1 ^= H->v2;
H->v2 = SIP_ROTL(H->v2, 32);
}
H->v2 += H->v1;
H->v1 = SIP_ROTL(H->v1, 17);
H->v1 ^= H->v2;
H->v2 = SIP_ROTL(H->v2, 32);
}
} /* sip_round() */
static struct siphash *
sip24_init(struct siphash *H, const struct sipkey *key) {
H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
static struct siphash *sip24_init(struct siphash *H,
const struct sipkey *key) {
H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
H->p = H->buf;
H->c = 0;
H->p = H->buf;
H->c = 0;
return H;
return H;
} /* sip24_init() */
#define sip_endof(a) (&(a)[sizeof(a) / sizeof *(a)])
#define sip_endof(a) (&(a)[sizeof (a) / sizeof *(a)])
static struct siphash *sip24_update(struct siphash *H, const void *src,
size_t len) {
const unsigned char *p = (const unsigned char *)src, *pe = p + len;
uint64_t m;
static struct siphash *
sip24_update(struct siphash *H, const void *src, size_t len) {
const unsigned char *p = (const unsigned char *)src, *pe = p + len;
uint64_t m;
do {
while (p < pe && H->p < sip_endof(H->buf))
*H->p++ = *p++;
do {
while (p < pe && H->p < sip_endof(H->buf))
*H->p++ = *p++;
if (H->p < sip_endof(H->buf))
break;
if (H->p < sip_endof(H->buf))
break;
m = SIP_U8TO64_LE(H->buf);
H->v3 ^= m;
sip_round(H, 2);
H->v0 ^= m;
m = SIP_U8TO64_LE(H->buf);
H->v3 ^= m;
sip_round(H, 2);
H->v0 ^= m;
H->p = H->buf;
H->c += 8;
} while (p < pe);
H->p = H->buf;
H->c += 8;
} while (p < pe);
return H;
return H;
} /* sip24_update() */
static uint64_t sip24_final(struct siphash *H) {
const char left = (char)(H->p - H->buf);
uint64_t b = (H->c + left) << 56;
switch (left) {
case 7: b |= (uint64_t)H->buf[6] << 48;
/* fall through */
case 6: b |= (uint64_t)H->buf[5] << 40;
/* fall through */
case 5: b |= (uint64_t)H->buf[4] << 32;
/* fall through */
case 4: b |= (uint64_t)H->buf[3] << 24;
/* fall through */
case 3: b |= (uint64_t)H->buf[2] << 16;
/* fall through */
case 2: b |= (uint64_t)H->buf[1] << 8;
/* fall through */
case 1: b |= (uint64_t)H->buf[0] << 0;
/* fall through */
case 0: break;
}
H->v3 ^= b;
sip_round(H, 2);
H->v0 ^= b;
H->v2 ^= 0xff;
sip_round(H, 4);
return H->v0 ^ H->v1 ^ H->v2 ^ H->v3;
static uint64_t
sip24_final(struct siphash *H) {
const char left = (char)(H->p - H->buf);
uint64_t b = (H->c + left) << 56;
switch (left) {
case 7:
b |= (uint64_t)H->buf[6] << 48;
/* fall through */
case 6:
b |= (uint64_t)H->buf[5] << 40;
/* fall through */
case 5:
b |= (uint64_t)H->buf[4] << 32;
/* fall through */
case 4:
b |= (uint64_t)H->buf[3] << 24;
/* fall through */
case 3:
b |= (uint64_t)H->buf[2] << 16;
/* fall through */
case 2:
b |= (uint64_t)H->buf[1] << 8;
/* fall through */
case 1:
b |= (uint64_t)H->buf[0] << 0;
/* fall through */
case 0:
break;
}
H->v3 ^= b;
sip_round(H, 2);
H->v0 ^= b;
H->v2 ^= 0xff;
sip_round(H, 4);
return H->v0 ^ H->v1 ^ H->v2 ^ H->v3;
} /* sip24_final() */
static uint64_t siphash24(const void *src, size_t len,
const struct sipkey *key) {
struct siphash state = SIPHASH_INITIALIZER;
return sip24_final(sip24_update(sip24_init(&state, key), src, len));
static uint64_t
siphash24(const void *src, size_t len, const struct sipkey *key) {
struct siphash state = SIPHASH_INITIALIZER;
return sip24_final(sip24_update(sip24_init(&state, key), src, len));
} /* siphash24() */
/*
* SipHash-2-4 output with
* k = 00 01 02 ...
......@@ -285,107 +289,110 @@ static uint64_t siphash24(const void *src, size_t len,
* ...
* in = 00 01 02 ... 3e (63 bytes)
*/
static int sip24_valid(void) {
static const unsigned char vectors[64][8] = {
{ 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
{ 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
{ 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
{ 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
{ 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
{ 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
{ 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
{ 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
{ 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
{ 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
{ 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
{ 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
{ 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
{ 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
{ 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
{ 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
{ 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
{ 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
{ 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
{ 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
{ 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
{ 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
{ 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
{ 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
{ 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
{ 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
{ 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
{ 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
{ 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
{ 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
{ 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
{ 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
{ 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
{ 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
{ 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
{ 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
{ 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
{ 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
{ 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
{ 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
{ 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
{ 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
{ 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
{ 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
{ 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
{ 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
{ 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
{ 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
{ 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
{ 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
{ 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
{ 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
{ 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
{ 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
{ 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
{ 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
{ 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
{ 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
{ 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
{ 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
{ 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
{ 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
{ 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
{ 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, }
};
unsigned char in[64];
struct sipkey k;
size_t i;
sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011"
"\012\013\014\015\016\017");
for (i = 0; i < sizeof in; ++i) {
in[i] = (unsigned char)i;
if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i]))
return 0;
}
return 1;
static int
sip24_valid(void) {
/* clang-format off */
static const unsigned char vectors[64][8] = {
{ 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
{ 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
{ 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
{ 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
{ 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
{ 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
{ 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
{ 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
{ 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
{ 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
{ 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
{ 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
{ 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
{ 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
{ 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
{ 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
{ 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
{ 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
{ 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
{ 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
{ 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
{ 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
{ 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
{ 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
{ 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
{ 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
{ 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
{ 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
{ 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
{ 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
{ 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
{ 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
{ 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
{ 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
{ 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
{ 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
{ 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
{ 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
{ 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
{ 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
{ 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
{ 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
{ 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
{ 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
{ 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
{ 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
{ 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
{ 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
{ 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
{ 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
{ 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
{ 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
{ 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
{ 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
{ 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
{ 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
{ 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
{ 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
{ 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
{ 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
{ 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
{ 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
{ 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
{ 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, }
};
/* clang-format on */
unsigned char in[64];
struct sipkey k;
size_t i;
sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011"
"\012\013\014\015\016\017");
for (i = 0; i < sizeof in; ++i) {
in[i] = (unsigned char)i;
if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i]))
return 0;
}
return 1;
} /* sip24_valid() */
#ifdef SIPHASH_MAIN
#include <stdio.h>
# include <stdio.h>
int main(void) {
const int ok = sip24_valid();
int
main(void) {
const int ok = sip24_valid();
if (ok)
puts("OK");
else
puts("FAIL");
if (ok)
puts("OK");
else
puts("FAIL");
return !ok;
return ! ok;
} /* main() */
#endif /* SIPHASH_MAIN */
#endif /* SIPHASH_H */
......@@ -31,34 +31,34 @@
*/
/* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM,
/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM,
......@@ -40,20 +40,17 @@
#include <memory.h>
#include <string.h>
#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */
# include <expat_config.h>
#else /* !defined(HAVE_EXPAT_CONFIG_H) */
#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */
# include <expat_config.h>
#else /* !defined(HAVE_EXPAT_CONFIG_H) */
#define XML_NS 1
#define XML_DTD 1
#define XML_CONTEXT_BYTES 1024
# define XML_NS 1
# define XML_DTD 1
# define XML_CONTEXT_BYTES 1024
/* we will assume all Windows platforms are little endian */
#define BYTEORDER 1234
# define BYTEORDER 1234
#endif /* !defined(HAVE_EXPAT_CONFIG_H) */
#endif /* ndef WINCONFIG_H */
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -33,11 +33,11 @@
#include <stddef.h>
#ifdef _WIN32
#include "winconfig.h"
# include "winconfig.h"
#else
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
# ifdef HAVE_EXPAT_CONFIG_H
# include <expat_config.h>
# endif
#endif /* ndef _WIN32 */
#include "expat_external.h"
......@@ -52,107 +52,88 @@
*/
static const char KW_ANY[] = {
ASCII_A, ASCII_N, ASCII_Y, '\0' };
static const char KW_ATTLIST[] = {
ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
static const char KW_CDATA[] = {
ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_DOCTYPE[] = {
ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
static const char KW_ELEMENT[] = {
ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
static const char KW_EMPTY[] = {
ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
static const char KW_ENTITIES[] = {
ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
'\0' };
static const char KW_ENTITY[] = {
ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
static const char KW_FIXED[] = {
ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
static const char KW_ID[] = {
ASCII_I, ASCII_D, '\0' };
static const char KW_IDREF[] = {
ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
static const char KW_IDREFS[] = {
ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
static const char KW_ATTLIST[]
= {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
static const char KW_CDATA[]
= {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
static const char KW_DOCTYPE[]
= {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
static const char KW_ELEMENT[]
= {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
static const char KW_EMPTY[]
= {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
ASCII_I, ASCII_E, ASCII_S, '\0'};
static const char KW_ENTITY[]
= {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
static const char KW_FIXED[]
= {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
static const char KW_IDREF[]
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
static const char KW_IDREFS[]
= {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
#ifdef XML_DTD
static const char KW_IGNORE[] = {
ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
static const char KW_IGNORE[]
= {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
#endif
static const char KW_IMPLIED[] = {
ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
static const char KW_IMPLIED[]
= {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
#ifdef XML_DTD
static const char KW_INCLUDE[] = {
ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
static const char KW_INCLUDE[]
= {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
#endif
static const char KW_NDATA[] = {
ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_NMTOKEN[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
static const char KW_NMTOKENS[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
'\0' };
static const char KW_NOTATION[] =
{ ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
'\0' };
static const char KW_PCDATA[] = {
ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_PUBLIC[] = {
ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
static const char KW_REQUIRED[] = {
ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
'\0' };
static const char KW_SYSTEM[] = {
ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
static const char KW_NDATA[]
= {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
static const char KW_NMTOKEN[]
= {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
ASCII_E, ASCII_N, ASCII_S, '\0'};
static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
ASCII_I, ASCII_O, ASCII_N, '\0'};
static const char KW_PCDATA[]
= {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
static const char KW_PUBLIC[]
= {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
ASCII_R, ASCII_E, ASCII_D, '\0'};
static const char KW_SYSTEM[]
= {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
#ifndef MIN_BYTES_PER_CHAR
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
# define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
#endif
#ifdef XML_DTD
#define setTopLevel(state) \
((state)->handler = ((state)->documentEntity \
? internalSubset \
: externalSubset1))
# define setTopLevel(state) \
((state)->handler \
= ((state)->documentEntity ? internalSubset : externalSubset1))
#else /* not XML_DTD */
#define setTopLevel(state) ((state)->handler = internalSubset)
# define setTopLevel(state) ((state)->handler = internalSubset)
#endif /* not XML_DTD */
typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
const char *ptr, const char *end,
const ENCODING *enc);
static PROLOG_HANDLER
prolog0, prolog1, prolog2,
doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
internalSubset,
entity0, entity1, entity2, entity3, entity4, entity5, entity6,
entity7, entity8, entity9, entity10,
notation0, notation1, notation2, notation3, notation4,
attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
attlist7, attlist8, attlist9,
element0, element1, element2, element3, element4, element5, element6,
element7,
static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
attlist9, element0, element1, element2, element3, element4, element5,
element6, element7,
#ifdef XML_DTD
externalSubset0, externalSubset1,
condSect0, condSect1, condSect2,
externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
#endif /* XML_DTD */
declClose,
error;
declClose, error;
static int FASTCALL common(PROLOG_STATE *state, int tok);
static int PTRCALL
prolog0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
state->handler = prolog1;
......@@ -169,10 +150,8 @@ prolog0(PROLOG_STATE *state,
case XML_TOK_BOM:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_DOCTYPE_NONE;
......@@ -184,12 +163,8 @@ prolog0(PROLOG_STATE *state,
}
static int PTRCALL
prolog1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
......@@ -207,10 +182,8 @@ prolog1(PROLOG_STATE *state,
*/
return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_DOCTYPE_NONE;
......@@ -222,12 +195,11 @@ prolog1(PROLOG_STATE *state,
}
static int PTRCALL
prolog2(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
......@@ -243,12 +215,11 @@ prolog2(PROLOG_STATE *state,
}
static int PTRCALL
doctype0(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -261,12 +232,8 @@ doctype0(PROLOG_STATE *state,
}
static int PTRCALL
doctype1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -291,12 +258,11 @@ doctype1(PROLOG_STATE *state,
}
static int PTRCALL
doctype2(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -308,12 +274,11 @@ doctype2(PROLOG_STATE *state,
}
static int PTRCALL
doctype3(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -325,12 +290,11 @@ doctype3(PROLOG_STATE *state,
}
static int PTRCALL
doctype4(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -345,12 +309,11 @@ doctype4(PROLOG_STATE *state,
}
static int PTRCALL
doctype5(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_DOCTYPE_NONE;
......@@ -362,40 +325,28 @@ doctype5(PROLOG_STATE *state,
}
static int PTRCALL
internalSubset(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_ENTITY)) {
state->handler = entity0;
return XML_ROLE_ENTITY_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_ATTLIST)) {
state->handler = attlist0;
return XML_ROLE_ATTLIST_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_ELEMENT)) {
state->handler = element0;
return XML_ROLE_ELEMENT_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
KW_NOTATION)) {
state->handler = notation0;
return XML_ROLE_NOTATION_NONE;
......@@ -419,12 +370,8 @@ internalSubset(PROLOG_STATE *state,
#ifdef XML_DTD
static int PTRCALL
externalSubset0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
state->handler = externalSubset1;
if (tok == XML_TOK_XML_DECL)
return XML_ROLE_TEXT_DECL;
......@@ -432,12 +379,8 @@ externalSubset0(PROLOG_STATE *state,
}
static int PTRCALL
externalSubset1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_COND_SECT_OPEN:
state->handler = condSect0;
......@@ -464,12 +407,11 @@ externalSubset1(PROLOG_STATE *state,
#endif /* XML_DTD */
static int PTRCALL
entity0(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -484,12 +426,11 @@ entity0(PROLOG_STATE *state,
}
static int PTRCALL
entity1(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -501,12 +442,8 @@ entity1(PROLOG_STATE *state,
}
static int PTRCALL
entity2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -529,12 +466,11 @@ entity2(PROLOG_STATE *state,
}
static int PTRCALL
entity3(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -546,12 +482,11 @@ entity3(PROLOG_STATE *state,
}
static int PTRCALL
entity4(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -563,12 +498,8 @@ entity4(PROLOG_STATE *state,
}
static int PTRCALL
entity5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -586,12 +517,11 @@ entity5(PROLOG_STATE *state,
}
static int PTRCALL
entity6(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -604,12 +534,8 @@ entity6(PROLOG_STATE *state,
}
static int PTRCALL
entity7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -632,12 +558,11 @@ entity7(PROLOG_STATE *state,
}
static int PTRCALL
entity8(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -649,12 +574,11 @@ entity8(PROLOG_STATE *state,
}
static int PTRCALL
entity9(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -666,12 +590,11 @@ entity9(PROLOG_STATE *state,
}
static int PTRCALL
entity10(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ENTITY_NONE;
......@@ -683,12 +606,11 @@ entity10(PROLOG_STATE *state,
}
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
......@@ -700,12 +622,8 @@ notation0(PROLOG_STATE *state,
}
static int PTRCALL
notation1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
......@@ -724,12 +642,11 @@ notation1(PROLOG_STATE *state,
}
static int PTRCALL
notation2(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
......@@ -741,12 +658,11 @@ notation2(PROLOG_STATE *state,
}
static int PTRCALL
notation3(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
......@@ -759,12 +675,11 @@ notation3(PROLOG_STATE *state,
}
static int PTRCALL
notation4(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
......@@ -780,12 +695,11 @@ notation4(PROLOG_STATE *state,
}
static int PTRCALL
attlist0(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -798,12 +712,11 @@ attlist0(PROLOG_STATE *state,
}
static int PTRCALL
attlist1(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -819,34 +732,23 @@ attlist1(PROLOG_STATE *state,
}
static int PTRCALL
attlist2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME:
{
static const char * const types[] = {
KW_CDATA,
KW_ID,
KW_IDREF,
KW_IDREFS,
KW_ENTITY,
KW_ENTITIES,
KW_NMTOKEN,
KW_NMTOKENS,
};
int i;
for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
state->handler = attlist8;
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
}
}
case XML_TOK_NAME: {
static const char *const types[] = {
KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS,
KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
};
int i;
for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
state->handler = attlist8;
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
}
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
state->handler = attlist5;
return XML_ROLE_ATTLIST_NONE;
......@@ -860,12 +762,11 @@ attlist2(PROLOG_STATE *state,
}
static int PTRCALL
attlist3(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -879,12 +780,11 @@ attlist3(PROLOG_STATE *state,
}
static int PTRCALL
attlist4(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -899,12 +799,11 @@ attlist4(PROLOG_STATE *state,
}
static int PTRCALL
attlist5(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -916,12 +815,11 @@ attlist5(PROLOG_STATE *state,
}
static int PTRCALL
attlist6(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -933,12 +831,11 @@ attlist6(PROLOG_STATE *state,
}
static int PTRCALL
attlist7(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -954,33 +851,23 @@ attlist7(PROLOG_STATE *state,
/* default value */
static int PTRCALL
attlist8(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
KW_IMPLIED)) {
state->handler = attlist1;
return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
KW_REQUIRED)) {
state->handler = attlist1;
return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
KW_FIXED)) {
state->handler = attlist9;
return XML_ROLE_ATTLIST_NONE;
......@@ -994,12 +881,11 @@ attlist8(PROLOG_STATE *state,
}
static int PTRCALL
attlist9(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ATTLIST_NONE;
......@@ -1011,12 +897,11 @@ attlist9(PROLOG_STATE *state,
}
static int PTRCALL
element0(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1029,12 +914,8 @@ element0(PROLOG_STATE *state,
}
static int PTRCALL
element1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1059,19 +940,13 @@ element1(PROLOG_STATE *state,
}
static int PTRCALL
element2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
KW_PCDATA)) {
state->handler = element3;
return XML_ROLE_CONTENT_PCDATA;
......@@ -1099,12 +974,11 @@ element2(PROLOG_STATE *state,
}
static int PTRCALL
element3(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1124,12 +998,11 @@ element3(PROLOG_STATE *state,
}
static int PTRCALL
element4(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1142,12 +1015,11 @@ element4(PROLOG_STATE *state,
}
static int PTRCALL
element5(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1163,12 +1035,11 @@ element5(PROLOG_STATE *state,
}
static int PTRCALL
element6(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1193,12 +1064,11 @@ element6(PROLOG_STATE *state,
}
static int PTRCALL
element7(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_ELEMENT_NONE;
......@@ -1243,12 +1113,8 @@ element7(PROLOG_STATE *state,
#ifdef XML_DTD
static int PTRCALL
condSect0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
......@@ -1267,12 +1133,11 @@ condSect0(PROLOG_STATE *state,
}
static int PTRCALL
condSect1(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
......@@ -1285,12 +1150,11 @@ condSect1(PROLOG_STATE *state,
}
static int PTRCALL
condSect2(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
......@@ -1304,12 +1168,11 @@ condSect2(PROLOG_STATE *state,
#endif /* XML_DTD */
static int PTRCALL
declClose(PROLOG_STATE *state,
int tok,
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
switch (tok) {
case XML_TOK_PROLOG_S:
return state->role_none;
......@@ -1341,21 +1204,21 @@ declClose(PROLOG_STATE *state,
* LCOV_EXCL_START
*/
static int PTRCALL
error(PROLOG_STATE *UNUSED_P(state),
int UNUSED_P(tok),
const char *UNUSED_P(ptr),
const char *UNUSED_P(end),
const ENCODING *UNUSED_P(enc))
{
error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
const ENCODING *enc) {
UNUSED_P(state);
UNUSED_P(tok);
UNUSED_P(ptr);
UNUSED_P(end);
UNUSED_P(enc);
return XML_ROLE_NONE;
}
/* LCOV_EXCL_STOP */
static int FASTCALL
common(PROLOG_STATE *state, int tok)
{
common(PROLOG_STATE *state, int tok) {
#ifdef XML_DTD
if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
return XML_ROLE_INNER_PARAM_ENTITY_REF;
#endif
state->handler = error;
......@@ -1363,8 +1226,7 @@ common(PROLOG_STATE *state, int tok)
}
void
XmlPrologStateInit(PROLOG_STATE *state)
{
XmlPrologStateInit(PROLOG_STATE *state) {
state->handler = prolog0;
#ifdef XML_DTD
state->documentEntity = 1;
......@@ -1376,8 +1238,7 @@ XmlPrologStateInit(PROLOG_STATE *state)
#ifdef XML_DTD
void
XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
{
XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
state->handler = externalSubset0;
state->documentEntity = 0;
state->includeLevel = 0;
......
......@@ -36,7 +36,7 @@
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h"
......@@ -113,11 +113,8 @@ enum {
};
typedef struct prolog_state {
int (PTRCALL *handler) (struct prolog_state *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
int(PTRCALL *handler)(struct prolog_state *state, int tok, const char *ptr,
const char *end, const ENCODING *enc);
unsigned level;
int role_none;
#ifdef XML_DTD
......@@ -132,8 +129,8 @@ void XmlPrologStateInit(PROLOG_STATE *);
void XmlPrologStateInitExternalEntity(PROLOG_STATE *);
#endif /* XML_DTD */
#define XmlTokenRole(state, tok, ptr, end, enc) \
(((state)->handler)(state, tok, ptr, end, enc))
#define XmlTokenRole(state, tok, ptr, end, enc) \
(((state)->handler)(state, tok, ptr, end, enc))
#ifdef __cplusplus
}
......
......@@ -31,24 +31,23 @@
*/
#include <stddef.h>
#include <string.h> /* memcpy */
#include <string.h> /* memcpy */
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
# define bool int
# define false 0
# define true 1
/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
# define bool int
# define false 0
# define true 1
#else
# include <stdbool.h>
# include <stdbool.h>
#endif
#ifdef _WIN32
#include "winconfig.h"
# include "winconfig.h"
#else
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
# ifdef HAVE_EXPAT_CONFIG_H
# include <expat_config.h>
# endif
#endif /* ndef _WIN32 */
#include "expat_external.h"
......@@ -57,58 +56,49 @@
#include "nametab.h"
#ifdef XML_DTD
#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
#else
#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
# define IGNORE_SECTION_TOK_VTABLE /* as nothing */
#endif
#define VTABLE1 \
{ PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
PREFIX(nameMatchesAscii), \
PREFIX(nameLength), \
PREFIX(skipS), \
PREFIX(getAtts), \
PREFIX(charRefNumber), \
PREFIX(predefinedEntityName), \
PREFIX(updatePosition), \
PREFIX(isPublicId)
#define VTABLE1 \
{PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \
{PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \
PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \
PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \
PREFIX(updatePosition), PREFIX(isPublicId)
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
#define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
pages, 3 bits to add to that index and 5 bits to generate the mask.
*/
#define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
& (1u << (((byte)[1]) & 0x1F)))
#define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \
& (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
into pages, 3 bits to add to that index and 5 bits to generate the
mask.
*/
#define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
& (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
: ((n) == 3 \
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0))
#define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap \
[((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
+ ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
& (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
: ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
......@@ -120,88 +110,76 @@
(A & 0xC0) == 0xC0 means A > 0xBF
*/
#define UTF8_INVALID2(p) \
#define UTF8_INVALID2(p) \
((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
#define UTF8_INVALID3(p) \
(((p)[2] & 0x80) == 0 \
|| \
((*p) == 0xEF && (p)[1] == 0xBF \
? \
(p)[2] > 0xBD \
: \
((p)[2] & 0xC0) == 0xC0) \
|| \
((*p) == 0xE0 \
? \
(p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) \
(((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
|| \
((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
|| \
((*p) == 0xF0 \
? \
(p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID3(p) \
(((p)[2] & 0x80) == 0 \
|| ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \
: ((p)[2] & 0xC0) == 0xC0) \
|| ((*p) == 0xE0 \
? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
: ((p)[1] & 0x80) == 0 \
|| ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) \
(((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \
|| ((p)[2] & 0xC0) == 0xC0 \
|| ((*p) == 0xF0 \
? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
: ((p)[1] & 0x80) == 0 \
|| ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{
isNever(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
UNUSED_P(p);
return 0;
}
static int PTRFASTCALL
utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isName2(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
}
static int PTRFASTCALL
utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isName3(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
}
#define utf8_isName4 isNever
static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isNmstrt2(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
}
static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isNmstrt3(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
}
#define utf8_isNmstrt4 isNever
static int PTRFASTCALL
utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isInvalid2(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_INVALID2((const unsigned char *)p);
}
static int PTRFASTCALL
utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isInvalid3(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_INVALID3((const unsigned char *)p);
}
static int PTRFASTCALL
utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{
utf8_isInvalid4(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return UTF8_INVALID4((const unsigned char *)p);
}
......@@ -209,61 +187,44 @@ struct normal_encoding {
ENCODING enc;
unsigned char type[256];
#ifdef XML_MIN_SIZE
int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
#endif /* XML_MIN_SIZE */
int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
};
#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
#ifdef XML_MIN_SIZE
#define STANDARD_VTABLE(E) \
E ## byteType, \
E ## isNameMin, \
E ## isNmstrtMin, \
E ## byteToAscii, \
E ## charMatches,
# define STANDARD_VTABLE(E) \
E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
#else
#define STANDARD_VTABLE(E) /* as nothing */
# define STANDARD_VTABLE(E) /* as nothing */
#endif
#define NORMAL_VTABLE(E) \
E ## isName2, \
E ## isName3, \
E ## isName4, \
E ## isNmstrt2, \
E ## isNmstrt3, \
E ## isNmstrt4, \
E ## isInvalid2, \
E ## isInvalid3, \
E ## isInvalid4
#define NULL_VTABLE \
/* isName2 */ NULL, \
/* isName3 */ NULL, \
/* isName4 */ NULL, \
/* isNmstrt2 */ NULL, \
/* isNmstrt3 */ NULL, \
/* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, \
/* isInvalid3 */ NULL, \
/* isInvalid4 */ NULL
#define NORMAL_VTABLE(E) \
E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \
E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
#define NULL_VTABLE \
/* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \
/* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
static int FASTCALL checkCharRefNumber(int);
......@@ -271,75 +232,70 @@ static int FASTCALL checkCharRefNumber(int);
#include "ascii.h"
#ifdef XML_MIN_SIZE
#define sb_isNameMin isNever
#define sb_isNmstrtMin isNever
# define sb_isNameMin isNever
# define sb_isNmstrtMin isNever
#endif
#ifdef XML_MIN_SIZE
#define MINBPC(enc) ((enc)->minBytesPerChar)
# define MINBPC(enc) ((enc)->minBytesPerChar)
#else
/* minimum bytes per character */
#define MINBPC(enc) 1
# define MINBPC(enc) 1
#endif
#define SB_BYTE_TYPE(enc, p) \
#define SB_BYTE_TYPE(enc, p) \
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
sb_byteType(const ENCODING *enc, const char *p)
{
sb_byteType(const ENCODING *enc, const char *p) {
return SB_BYTE_TYPE(enc, p);
}
#define BYTE_TYPE(enc, p) \
(AS_NORMAL_ENCODING(enc)->byteType(enc, p))
# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
#else
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
#endif
#ifdef XML_MIN_SIZE
#define BYTE_TO_ASCII(enc, p) \
(AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
static int PTRFASTCALL
sb_byteToAscii(const ENCODING *enc, const char *p)
{
sb_byteToAscii(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return *p;
}
#else
#define BYTE_TO_ASCII(enc, p) (*(p))
# define BYTE_TO_ASCII(enc, p) (*(p))
#endif
#define IS_NAME_CHAR(enc, p, n) \
(AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) \
(AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
#define IS_INVALID_CHAR(enc, p, n) \
(AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
#define IS_INVALID_CHAR(enc, p, n) \
(AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
#ifdef XML_MIN_SIZE
#define IS_NAME_CHAR_MINBPC(enc, p) \
(AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
(AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
# define IS_NAME_CHAR_MINBPC(enc, p) \
(AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
# define IS_NMSTRT_CHAR_MINBPC(enc, p) \
(AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
#else
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
# define IS_NAME_CHAR_MINBPC(enc, p) (0)
# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
#endif
#ifdef XML_MIN_SIZE
#define CHAR_MATCHES(enc, p, c) \
(AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
# define CHAR_MATCHES(enc, p, c) \
(AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
static int PTRCALL
sb_charMatches(const ENCODING *enc, const char *p, int c)
{
sb_charMatches(const ENCODING *enc, const char *p, int c) {
UNUSED_P(enc);
return *p == c;
}
#else
/* c is an ASCII character */
#define CHAR_MATCHES(enc, p, c) (*(p) == c)
# define CHAR_MATCHES(enc, p, c) (*(p) == c)
#endif
#define PREFIX(ident) normal_ ## ident
#define PREFIX(ident) normal_##ident
#define XML_TOK_IMPL_C
#include "xmltok_impl.c"
#undef XML_TOK_IMPL_C
......@@ -354,42 +310,46 @@ sb_charMatches(const ENCODING *enc, const char *p, int c)
#undef IS_NMSTRT_CHAR_MINBPC
#undef IS_INVALID_CHAR
enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval1 = 0x00,
UTF8_cval2 = 0xc0,
UTF8_cval3 = 0xe0,
UTF8_cval4 = 0xf0
enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval1 = 0x00,
UTF8_cval2 = 0xc0,
UTF8_cval3 = 0xe0,
UTF8_cval4 = 0xf0
};
void
_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
{
const char * fromLim = *fromLimRef;
_INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef) {
const char *fromLim = *fromLimRef;
size_t walked = 0;
for (; fromLim > from; fromLim--, walked++) {
const unsigned char prev = (unsigned char)fromLim[-1];
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if ((prev & 0xf8u)
== 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if (walked + 1 >= 4) {
fromLim += 4 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
} else if ((prev & 0xf0u)
== 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
if (walked + 1 >= 3) {
fromLim += 3 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
} else if ((prev & 0xe0u)
== 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
if (walked + 1 >= 2) {
fromLim += 2 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
} else if ((prev & 0x80u)
== 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
break;
}
}
......@@ -397,16 +357,15 @@ _INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** from
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
char **toP, const char *toLim) {
bool input_incomplete = false;
bool output_exhausted = false;
/* Avoid copying partial characters (due to limited space). */
const ptrdiff_t bytesAvailable = fromLim - *fromP;
const ptrdiff_t bytesStorable = toLim - *toP;
UNUSED_P(enc);
if (bytesAvailable > bytesStorable) {
fromLim = *fromP + bytesStorable;
output_exhausted = true;
......@@ -414,7 +373,7 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
/* Avoid copying partial characters (from incomplete input). */
{
const char * const fromLimBefore = fromLim;
const char *const fromLimBefore = fromLim;
_INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
if (fromLim < fromLimBefore) {
input_incomplete = true;
......@@ -428,7 +387,7 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
*toP += bytesToCopy;
}
if (output_exhausted) /* needs to go first */
if (output_exhausted) /* needs to go first */
return XML_CONVERT_OUTPUT_EXHAUSTED;
else if (input_incomplete)
return XML_CONVERT_INPUT_INCOMPLETE;
......@@ -437,10 +396,8 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) {
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP;
const char *from = *fromP;
......@@ -459,30 +416,28 @@ utf8_toUtf16(const ENCODING *enc,
res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
}
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
*to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
| (from[2] & 0x3f));
from += 3;
break;
case BT_LEAD4:
{
unsigned long n;
if (toLim - to < 2) {
res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after;
}
if (fromLim - from < 4) {
res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
}
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
to[0] = (unsigned short)((n >> 10) | 0xD800);
to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
to += 2;
from += 4;
case BT_LEAD4: {
unsigned long n;
if (toLim - to < 2) {
res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after;
}
break;
if (fromLim - from < 4) {
res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
}
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
to[0] = (unsigned short)((n >> 10) | 0xD800);
to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
to += 2;
from += 4;
} break;
default:
*to++ = *from++;
break;
......@@ -497,56 +452,51 @@ after:
}
#ifdef XML_NS
static const struct normal_encoding utf8_encoding_ns = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#include "asciitab.h"
#include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
static const struct normal_encoding utf8_encoding_ns
= {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
{
# include "asciitab.h"
# include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
#endif
static const struct normal_encoding utf8_encoding = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
static const struct normal_encoding utf8_encoding
= {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
#ifdef XML_NS
static const struct normal_encoding internal_utf8_encoding_ns = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
#include "iasciitab.h"
#include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
static const struct normal_encoding internal_utf8_encoding_ns
= {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
{
# include "iasciitab.h"
# include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
#endif
static const struct normal_encoding internal_utf8_encoding = {
{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
{
static const struct normal_encoding internal_utf8_encoding
= {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "utf8tab.h"
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
},
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
char **toP, const char *toLim) {
UNUSED_P(enc);
for (;;) {
unsigned char c;
if (*fromP == fromLim)
......@@ -558,8 +508,7 @@ latin1_toUtf8(const ENCODING *UNUSED_P(enc),
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++;
}
else {
} else {
if (*toP == toLim)
return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++;
......@@ -568,10 +517,9 @@ latin1_toUtf8(const ENCODING *UNUSED_P(enc),
}
static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) {
UNUSED_P(enc);
while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
......@@ -583,33 +531,30 @@ latin1_toUtf16(const ENCODING *UNUSED_P(enc),
#ifdef XML_NS
static const struct normal_encoding latin1_encoding_ns = {
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
{
#include "asciitab.h"
#include "latin1tab.h"
},
STANDARD_VTABLE(sb_) NULL_VTABLE
};
static const struct normal_encoding latin1_encoding_ns
= {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
{
# include "asciitab.h"
# include "latin1tab.h"
},
STANDARD_VTABLE(sb_) NULL_VTABLE};
#endif
static const struct normal_encoding latin1_encoding = {
{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
{
static const struct normal_encoding latin1_encoding
= {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
},
STANDARD_VTABLE(sb_) NULL_VTABLE
};
},
STANDARD_VTABLE(sb_) NULL_VTABLE};
static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
char **toP, const char *toLim) {
UNUSED_P(enc);
while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++;
......@@ -621,40 +566,45 @@ ascii_toUtf8(const ENCODING *UNUSED_P(enc),
#ifdef XML_NS
static const struct normal_encoding ascii_encoding_ns = {
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
{
#include "asciitab.h"
/* BT_NONXML == 0 */
},
STANDARD_VTABLE(sb_) NULL_VTABLE
};
static const struct normal_encoding ascii_encoding_ns
= {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
{
# include "asciitab.h"
/* BT_NONXML == 0 */
},
STANDARD_VTABLE(sb_) NULL_VTABLE};
#endif
static const struct normal_encoding ascii_encoding = {
{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
{
static const struct normal_encoding ascii_encoding
= {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
/* BT_NONXML == 0 */
},
STANDARD_VTABLE(sb_) NULL_VTABLE
};
/* BT_NONXML == 0 */
},
STANDARD_VTABLE(sb_) NULL_VTABLE};
static int PTRFASTCALL
unicode_byte_type(char hi, char lo)
{
unicode_byte_type(char hi, char lo) {
switch ((unsigned char)hi) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
/* 0xD800–0xDBFF first 16-bit code unit or high surrogate (W1) */
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
return BT_LEAD4;
case 0xDC: case 0xDD: case 0xDE: case 0xDF:
/* 0xDC00–0xDFFF second 16-bit code unit or low surrogate (W2) */
case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
return BT_TRAIL;
case 0xFF:
switch ((unsigned char)lo) {
case 0xFF:
case 0xFE:
case 0xFF: /* noncharacter-FFFF */
case 0xFE: /* noncharacter-FFFE */
return BT_NONXML;
}
break;
......@@ -662,102 +612,105 @@ unicode_byte_type(char hi, char lo)
return BT_NONASCII;
}
#define DEFINE_UTF16_TO_UTF8(E) \
static enum XML_Convert_Result PTRCALL \
E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
const char *from = *fromP; \
fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
for (; from < fromLim; from += 2) { \
int plane; \
unsigned char lo2; \
unsigned char lo = GET_LO(from); \
unsigned char hi = GET_HI(from); \
switch (hi) { \
case 0: \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = lo; \
break; \
} \
/* fall through */ \
case 0x1: case 0x2: case 0x3: \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
break; \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
*(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
break; \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
if (fromLim - from < 4) { \
*fromP = from; \
return XML_CONVERT_INPUT_INCOMPLETE; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
*(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
from += 2; \
lo2 = GET_LO(from); \
*(*toP)++ = (((lo & 0x3) << 4) \
| ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) \
| 0x80); \
*(*toP)++ = ((lo2 & 0x3f) | 0x80); \
break; \
} \
} \
*fromP = from; \
if (from < fromLim) \
return XML_CONVERT_INPUT_INCOMPLETE; \
else \
return XML_CONVERT_COMPLETED; \
}
#define DEFINE_UTF16_TO_UTF8(E) \
static enum XML_Convert_Result PTRCALL E##toUtf8( \
const ENCODING *enc, const char **fromP, const char *fromLim, \
char **toP, const char *toLim) { \
const char *from = *fromP; \
UNUSED_P(enc); \
fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
for (; from < fromLim; from += 2) { \
int plane; \
unsigned char lo2; \
unsigned char lo = GET_LO(from); \
unsigned char hi = GET_HI(from); \
switch (hi) { \
case 0: \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = lo; \
break; \
} \
/* fall through */ \
case 0x1: \
case 0x2: \
case 0x3: \
case 0x4: \
case 0x5: \
case 0x6: \
case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
break; \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
*(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
break; \
case 0xD8: \
case 0xD9: \
case 0xDA: \
case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
if (fromLim - from < 4) { \
*fromP = from; \
return XML_CONVERT_INPUT_INCOMPLETE; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \
*(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
from += 2; \
lo2 = GET_LO(from); \
*(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) | 0x80); \
*(*toP)++ = ((lo2 & 0x3f) | 0x80); \
break; \
} \
} \
*fromP = from; \
if (from < fromLim) \
return XML_CONVERT_INPUT_INCOMPLETE; \
else \
return XML_CONVERT_COMPLETED; \
}
#define DEFINE_UTF16_TO_UTF16(E) \
static enum XML_Convert_Result PTRCALL \
E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
res = XML_CONVERT_INPUT_INCOMPLETE; \
} \
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
if ((*toP == toLim) && (*fromP < fromLim)) \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
else \
return res; \
}
#define DEFINE_UTF16_TO_UTF16(E) \
static enum XML_Convert_Result PTRCALL E##toUtf16( \
const ENCODING *enc, const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) { \
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
UNUSED_P(enc); \
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
res = XML_CONVERT_INPUT_INCOMPLETE; \
} \
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
if ((*toP == toLim) && (*fromP < fromLim)) \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
else \
return res; \
}
#define SET2(ptr, ch) \
(((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
......@@ -768,8 +721,7 @@ DEFINE_UTF16_TO_UTF16(little2_)
#undef GET_LO
#undef GET_HI
#define SET2(ptr, ch) \
(((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
......@@ -780,292 +732,279 @@ DEFINE_UTF16_TO_UTF16(big2_)
#undef GET_LO
#undef GET_HI
#define LITTLE2_BYTE_TYPE(enc, p) \
((p)[1] == 0 \
? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
: unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
#define LITTLE2_BYTE_TYPE(enc, p) \
((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
: unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c)
#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \
UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
little2_byteType(const ENCODING *enc, const char *p)
{
little2_byteType(const ENCODING *enc, const char *p) {
return LITTLE2_BYTE_TYPE(enc, p);
}
static int PTRFASTCALL
little2_byteToAscii(const ENCODING *enc, const char *p)
{
return LITTLE2_BYTE_TO_ASCII(enc, p);
little2_byteToAscii(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return LITTLE2_BYTE_TO_ASCII(p);
}
static int PTRCALL
little2_charMatches(const ENCODING *enc, const char *p, int c)
{
return LITTLE2_CHAR_MATCHES(enc, p, c);
little2_charMatches(const ENCODING *enc, const char *p, int c) {
UNUSED_P(enc);
return LITTLE2_CHAR_MATCHES(p, c);
}
static int PTRFASTCALL
little2_isNameMin(const ENCODING *enc, const char *p)
{
return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
little2_isNameMin(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return LITTLE2_IS_NAME_CHAR_MINBPC(p);
}
static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING *enc, const char *p)
{
return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
little2_isNmstrtMin(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
}
#undef VTABLE
#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
# undef VTABLE
# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
#else /* not XML_MIN_SIZE */
#undef PREFIX
#define PREFIX(ident) little2_ ## ident
#define MINBPC(enc) 2
# undef PREFIX
# define PREFIX(ident) little2_##ident
# define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
#define IS_NMSTRT_CHAR(enc, p, n) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define XML_TOK_IMPL_C
#include "xmltok_impl.c"
#undef XML_TOK_IMPL_C
#undef MINBPC
#undef BYTE_TYPE
#undef BYTE_TO_ASCII
#undef CHAR_MATCHES
#undef IS_NAME_CHAR
#undef IS_NAME_CHAR_MINBPC
#undef IS_NMSTRT_CHAR
#undef IS_NMSTRT_CHAR_MINBPC
#undef IS_INVALID_CHAR
# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
# define IS_NAME_CHAR(enc, p, n) 0
# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
# define IS_NMSTRT_CHAR(enc, p, n) (0)
# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
# define XML_TOK_IMPL_C
# include "xmltok_impl.c"
# undef XML_TOK_IMPL_C
# undef MINBPC
# undef BYTE_TYPE
# undef BYTE_TO_ASCII
# undef CHAR_MATCHES
# undef IS_NAME_CHAR
# undef IS_NAME_CHAR_MINBPC
# undef IS_NMSTRT_CHAR
# undef IS_NMSTRT_CHAR_MINBPC
# undef IS_INVALID_CHAR
#endif /* not XML_MIN_SIZE */
#ifdef XML_NS
static const struct normal_encoding little2_encoding_ns = {
{ VTABLE, 2, 0,
#if BYTEORDER == 1234
1
#else
0
#endif
},
{
#include "asciitab.h"
#include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE
};
static const struct normal_encoding little2_encoding_ns
= {{VTABLE, 2, 0,
# if BYTEORDER == 1234
1
# else
0
# endif
},
{
# include "asciitab.h"
# include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE};
#endif
static const struct normal_encoding little2_encoding = {
{ VTABLE, 2, 0,
static const struct normal_encoding little2_encoding
= {{VTABLE, 2, 0,
#if BYTEORDER == 1234
1
1
#else
0
0
#endif
},
{
},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE
};
},
STANDARD_VTABLE(little2_) NULL_VTABLE};
#if BYTEORDER != 4321
#ifdef XML_NS
# ifdef XML_NS
static const struct normal_encoding internal_little2_encoding_ns = {
{ VTABLE, 2, 0, 1 },
{
#include "iasciitab.h"
#include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE
};
static const struct normal_encoding internal_little2_encoding_ns
= {{VTABLE, 2, 0, 1},
{
# include "iasciitab.h"
# include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE};
#endif
# endif
static const struct normal_encoding internal_little2_encoding = {
{ VTABLE, 2, 0, 1 },
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE
};
static const struct normal_encoding internal_little2_encoding
= {{VTABLE, 2, 0, 1},
{
# define BT_COLON BT_NMSTRT
# include "iasciitab.h"
# undef BT_COLON
# include "latin1tab.h"
},
STANDARD_VTABLE(little2_) NULL_VTABLE};
#endif
#define BIG2_BYTE_TYPE(enc, p) \
((p)[0] == 0 \
? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
: unicode_byte_type((p)[0], (p)[1]))
#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
#define BIG2_BYTE_TYPE(enc, p) \
((p)[0] == 0 \
? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
: unicode_byte_type((p)[0], (p)[1]))
#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c)
#define BIG2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \
UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
big2_byteType(const ENCODING *enc, const char *p)
{
big2_byteType(const ENCODING *enc, const char *p) {
return BIG2_BYTE_TYPE(enc, p);
}
static int PTRFASTCALL
big2_byteToAscii(const ENCODING *enc, const char *p)
{
return BIG2_BYTE_TO_ASCII(enc, p);
big2_byteToAscii(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return BIG2_BYTE_TO_ASCII(p);
}
static int PTRCALL
big2_charMatches(const ENCODING *enc, const char *p, int c)
{
return BIG2_CHAR_MATCHES(enc, p, c);
big2_charMatches(const ENCODING *enc, const char *p, int c) {
UNUSED_P(enc);
return BIG2_CHAR_MATCHES(p, c);
}
static int PTRFASTCALL
big2_isNameMin(const ENCODING *enc, const char *p)
{
return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
big2_isNameMin(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return BIG2_IS_NAME_CHAR_MINBPC(p);
}
static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING *enc, const char *p)
{
return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
big2_isNmstrtMin(const ENCODING *enc, const char *p) {
UNUSED_P(enc);
return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
}
#undef VTABLE
#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
# undef VTABLE
# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
#else /* not XML_MIN_SIZE */
#undef PREFIX
#define PREFIX(ident) big2_ ## ident
#define MINBPC(enc) 2
# undef PREFIX
# define PREFIX(ident) big2_##ident
# define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
#define IS_NMSTRT_CHAR(enc, p, n) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define XML_TOK_IMPL_C
#include "xmltok_impl.c"
#undef XML_TOK_IMPL_C
#undef MINBPC
#undef BYTE_TYPE
#undef BYTE_TO_ASCII
#undef CHAR_MATCHES
#undef IS_NAME_CHAR
#undef IS_NAME_CHAR_MINBPC
#undef IS_NMSTRT_CHAR
#undef IS_NMSTRT_CHAR_MINBPC
#undef IS_INVALID_CHAR
# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
# define IS_NAME_CHAR(enc, p, n) 0
# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
# define IS_NMSTRT_CHAR(enc, p, n) (0)
# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
# define XML_TOK_IMPL_C
# include "xmltok_impl.c"
# undef XML_TOK_IMPL_C
# undef MINBPC
# undef BYTE_TYPE
# undef BYTE_TO_ASCII
# undef CHAR_MATCHES
# undef IS_NAME_CHAR
# undef IS_NAME_CHAR_MINBPC
# undef IS_NMSTRT_CHAR
# undef IS_NMSTRT_CHAR_MINBPC
# undef IS_INVALID_CHAR
#endif /* not XML_MIN_SIZE */
#ifdef XML_NS
static const struct normal_encoding big2_encoding_ns = {
{ VTABLE, 2, 0,
#if BYTEORDER == 4321
1
#else
0
#endif
},
{
#include "asciitab.h"
#include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE
};
static const struct normal_encoding big2_encoding_ns
= {{VTABLE, 2, 0,
# if BYTEORDER == 4321
1
# else
0
# endif
},
{
# include "asciitab.h"
# include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE};
#endif
static const struct normal_encoding big2_encoding = {
{ VTABLE, 2, 0,
static const struct normal_encoding big2_encoding
= {{VTABLE, 2, 0,
#if BYTEORDER == 4321
1
1
#else
0
0
#endif
},
{
},
{
#define BT_COLON BT_NMSTRT
#include "asciitab.h"
#undef BT_COLON
#include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE
};
},
STANDARD_VTABLE(big2_) NULL_VTABLE};
#if BYTEORDER != 1234
#ifdef XML_NS
# ifdef XML_NS
static const struct normal_encoding internal_big2_encoding_ns = {
{ VTABLE, 2, 0, 1 },
{
#include "iasciitab.h"
#include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE
};
static const struct normal_encoding internal_big2_encoding_ns
= {{VTABLE, 2, 0, 1},
{
# include "iasciitab.h"
# include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE};
#endif
# endif
static const struct normal_encoding internal_big2_encoding = {
{ VTABLE, 2, 0, 1 },
{
#define BT_COLON BT_NMSTRT
#include "iasciitab.h"
#undef BT_COLON
#include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE
};
static const struct normal_encoding internal_big2_encoding
= {{VTABLE, 2, 0, 1},
{
# define BT_COLON BT_NMSTRT
# include "iasciitab.h"
# undef BT_COLON
# include "latin1tab.h"
},
STANDARD_VTABLE(big2_) NULL_VTABLE};
#endif
#undef PREFIX
static int FASTCALL
streqci(const char *s1, const char *s2)
{
streqci(const char *s1, const char *s2) {
for (;;) {
char c1 = *s1++;
char c2 = *s2++;
......@@ -1079,22 +1018,21 @@ streqci(const char *s1, const char *s2)
c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
if (c1 != c2)
return 0;
if (!c1)
if (! c1)
break;
}
return 1;
}
static void PTRCALL
initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos)
{
initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
POSITION *pos) {
UNUSED_P(enc);
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
}
static int
toAscii(const ENCODING *enc, const char *ptr, const char *end)
{
toAscii(const ENCODING *enc, const char *ptr, const char *end) {
char buf[1];
char *p = buf;
XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
......@@ -1105,8 +1043,7 @@ toAscii(const ENCODING *enc, const char *ptr, const char *end)
}
static int FASTCALL
isSpace(int c)
{
isSpace(int c) {
switch (c) {
case 0x20:
case 0xD:
......@@ -1121,21 +1058,16 @@ isSpace(int c)
followed by name=val.
*/
static int
parsePseudoAttribute(const ENCODING *enc,
const char *ptr,
const char *end,
const char **namePtr,
const char **nameEndPtr,
const char **valPtr,
const char **nextTokPtr)
{
parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
const char **namePtr, const char **nameEndPtr,
const char **valPtr, const char **nextTokPtr) {
int c;
char open;
if (ptr == end) {
*namePtr = NULL;
return 1;
}
if (!isSpace(toAscii(enc, ptr, end))) {
if (! isSpace(toAscii(enc, ptr, end))) {
*nextTokPtr = ptr;
return 0;
}
......@@ -1191,12 +1123,9 @@ parsePseudoAttribute(const ENCODING *enc,
c = toAscii(enc, ptr, end);
if (c == open)
break;
if (!(ASCII_a <= c && c <= ASCII_z)
&& !(ASCII_A <= c && c <= ASCII_Z)
&& !(ASCII_0 <= c && c <= ASCII_9)
&& c != ASCII_PERIOD
&& c != ASCII_MINUS
&& c != ASCII_UNDERSCORE) {
if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
&& ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
&& c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
*nextTokPtr = ptr;
return 0;
}
......@@ -1205,68 +1134,52 @@ parsePseudoAttribute(const ENCODING *enc,
return 1;
}
static const char KW_version[] = {
ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
};
static const char KW_version[]
= {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
static const char KW_encoding[] = {
ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
};
static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
ASCII_i, ASCII_n, ASCII_g, '\0'};
static const char KW_standalone[] = {
ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
ASCII_n, ASCII_e, '\0'
};
static const char KW_standalone[]
= {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
static const char KW_yes[] = {
ASCII_y, ASCII_e, ASCII_s, '\0'
};
static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
static const char KW_no[] = {
ASCII_n, ASCII_o, '\0'
};
static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
static int
doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
const char *),
int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
const char *end, const char **badPtr, const char **versionPtr,
const char **versionEndPtr, const char **encodingName,
const ENCODING **encoding, int *standalone) {
const char *val = NULL;
const char *name = NULL;
const char *nameEnd = NULL;
ptr += 5 * enc->minBytesPerChar;
end -= 2 * enc->minBytesPerChar;
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
|| !name) {
if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
|| ! name) {
*badPtr = ptr;
return 0;
}
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
if (!isGeneralTextEntity) {
if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
if (! isGeneralTextEntity) {
*badPtr = name;
return 0;
}
}
else {
} else {
if (versionPtr)
*versionPtr = val;
if (versionEndPtr)
*versionEndPtr = ptr;
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
*badPtr = ptr;
return 0;
}
if (!name) {
if (! name) {
if (isGeneralTextEntity) {
/* a TextDecl must have an EncodingDecl */
*badPtr = ptr;
......@@ -1277,7 +1190,7 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
}
if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
int c = toAscii(enc, val, end);
if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
*badPtr = val;
return 0;
}
......@@ -1285,14 +1198,14 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
*encodingName = val;
if (encoding)
*encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
*badPtr = ptr;
return 0;
}
if (!name)
if (! name)
return 1;
}
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
|| isGeneralTextEntity) {
*badPtr = name;
return 0;
......@@ -1300,12 +1213,10 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
if (standalone)
*standalone = 1;
}
else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
} else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
if (standalone)
*standalone = 0;
}
else {
} else {
*badPtr = val;
return 0;
}
......@@ -1319,11 +1230,16 @@ doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
}
static int FASTCALL
checkCharRefNumber(int result)
{
checkCharRefNumber(int result) {
switch (result >> 8) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
case 0xDC: case 0xDD: case 0xDE: case 0xDF:
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
return -1;
case 0:
if (latin1_encoding.type[result] == BT_NONXML)
......@@ -1338,8 +1254,7 @@ checkCharRefNumber(int result)
}
int FASTCALL
XmlUtf8Encode(int c, char *buf)
{
XmlUtf8Encode(int c, char *buf) {
enum {
/* minN is minimum legal resulting value for N byte sequence */
min2 = 0x80,
......@@ -1375,8 +1290,7 @@ XmlUtf8Encode(int c, char *buf)
}
int FASTCALL
XmlUtf16Encode(int charNum, unsigned short *buf)
{
XmlUtf16Encode(int charNum, unsigned short *buf) {
if (charNum < 0)
return 0;
if (charNum < 0x10000) {
......@@ -1400,17 +1314,15 @@ struct unknown_encoding {
char utf8[256][4];
};
#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
int
XmlSizeOfUnknownEncoding(void)
{
XmlSizeOfUnknownEncoding(void) {
return sizeof(struct unknown_encoding);
}
static int PTRFASTCALL
unknown_isName(const ENCODING *enc, const char *p)
{
unknown_isName(const ENCODING *enc, const char *p) {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF)
......@@ -1419,8 +1331,7 @@ unknown_isName(const ENCODING *enc, const char *p)
}
static int PTRFASTCALL
unknown_isNmstrt(const ENCODING *enc, const char *p)
{
unknown_isNmstrt(const ENCODING *enc, const char *p) {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF)
......@@ -1429,18 +1340,15 @@ unknown_isNmstrt(const ENCODING *enc, const char *p)
}
static int PTRFASTCALL
unknown_isInvalid(const ENCODING *enc, const char *p)
{
unknown_isInvalid(const ENCODING *enc, const char *p) {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
char **toP, const char *toLim) {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
char buf[XML_UTF8_ENCODE_MAX];
for (;;) {
......@@ -1458,8 +1366,7 @@ unknown_toUtf8(const ENCODING *enc,
utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
} else {
if (n > toLim - *toP)
return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
......@@ -1470,20 +1377,16 @@ unknown_toUtf8(const ENCODING *enc,
}
static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) {
c = (unsigned short)
uenc->convert(uenc->userData, *fromP);
c = (unsigned short)uenc->convert(uenc->userData, *fromP);
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else
} else
(*fromP)++;
*(*toP)++ = c;
}
......@@ -1495,19 +1398,14 @@ unknown_toUtf16(const ENCODING *enc,
}
ENCODING *
XmlInitUnknownEncoding(void *mem,
int *table,
CONVERTER convert,
void *userData)
{
XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
void *userData) {
int i;
struct unknown_encoding *e = (struct unknown_encoding *)mem;
for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
((char *)mem)[i] = ((char *)&latin1_encoding)[i];
memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
for (i = 0; i < 128; i++)
if (latin1_encoding.type[i] != BT_OTHER
&& latin1_encoding.type[i] != BT_NONXML
&& table[i] != i)
&& latin1_encoding.type[i] != BT_NONXML && table[i] != i)
return 0;
for (i = 0; i < 256; i++) {
int c = table[i];
......@@ -1517,35 +1415,30 @@ XmlInitUnknownEncoding(void *mem,
e->utf16[i] = 0xFFFF;
e->utf8[i][0] = 1;
e->utf8[i][1] = 0;
}
else if (c < 0) {
} else if (c < 0) {
if (c < -4)
return 0;
/* Multi-byte sequences need a converter function */
if (!convert)
if (! convert)
return 0;
e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0;
e->utf16[i] = 0;
}
else if (c < 0x80) {
} else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
&& latin1_encoding.type[c] != BT_NONXML && c != i)
return 0;
e->normal.type[i] = latin1_encoding.type[c];
e->utf8[i][0] = 1;
e->utf8[i][1] = (char)c;
e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
}
else if (checkCharRefNumber(c) < 0) {
} else if (checkCharRefNumber(c) < 0) {
e->normal.type[i] = BT_NONXML;
/* This shouldn't really get used. */
e->utf16[i] = 0xFFFF;
e->utf8[i][0] = 1;
e->utf8[i][1] = 0;
}
else {
} else {
if (c > 0xFFFF)
return 0;
if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
......@@ -1590,44 +1483,32 @@ enum {
NO_ENC
};
static const char KW_ISO_8859_1[] = {
ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
ASCII_MINUS, ASCII_1, '\0'
};
static const char KW_US_ASCII[] = {
ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
'\0'
};
static const char KW_UTF_8[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
};
static const char KW_UTF_16[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
};
static const char KW_UTF_16BE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
'\0'
};
static const char KW_UTF_16LE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
'\0'
};
static const char KW_ISO_8859_1[]
= {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8,
ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'};
static const char KW_US_ASCII[]
= {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
ASCII_C, ASCII_I, ASCII_I, '\0'};
static const char KW_UTF_8[]
= {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
static const char KW_UTF_16[]
= {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
static const char KW_UTF_16BE[]
= {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
ASCII_6, ASCII_B, ASCII_E, '\0'};
static const char KW_UTF_16LE[]
= {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
ASCII_6, ASCII_L, ASCII_E, '\0'};
static int FASTCALL
getEncodingIndex(const char *name)
{
static const char * const encodingNames[] = {
KW_ISO_8859_1,
KW_US_ASCII,
KW_UTF_8,
KW_UTF_16,
KW_UTF_16BE,
KW_UTF_16LE,
getEncodingIndex(const char *name) {
static const char *const encodingNames[] = {
KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
};
int i;
if (name == NULL)
return NO_ENC;
for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
if (streqci(name, encodingNames[i]))
return i;
return UNKNOWN_ENC;
......@@ -1647,15 +1528,9 @@ getEncodingIndex(const char *name)
XML_PROLOG_STATE otherwise.
*/
static int
initScan(const ENCODING * const *encodingTable,
const INIT_ENCODING *enc,
int state,
const char *ptr,
const char *end,
const char **nextTokPtr)
{
initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
int state, const char *ptr, const char *end, const char **nextTokPtr) {
const ENCODING **encPtr;
if (ptr >= end)
......@@ -1680,20 +1555,17 @@ initScan(const ENCODING * const *encodingTable,
case 0xFE:
case 0xFF:
case 0xEF: /* possibly first byte of UTF-8 BOM */
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
break;
/* fall through */
case 0x00:
case 0x3C:
return XML_TOK_PARTIAL;
}
}
else {
} else {
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
case 0xFEFF:
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
break;
*nextTokPtr = ptr + 2;
*encPtr = encodingTable[UTF_16BE_ENC];
......@@ -1707,8 +1579,7 @@ initScan(const ENCODING * const *encodingTable,
*encPtr = encodingTable[UTF_16LE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFFFE:
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
break;
*nextTokPtr = ptr + 2;
*encPtr = encodingTable[UTF_16LE_ENC];
......@@ -1723,8 +1594,8 @@ initScan(const ENCODING * const *encodingTable,
*/
if (state == XML_CONTENT_STATE) {
int e = INIT_ENC_INDEX(enc);
if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
|| e == UTF_16LE_ENC || e == UTF_16_ENC)
if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
|| e == UTF_16_ENC)
break;
}
if (ptr + 2 == end)
......@@ -1747,8 +1618,7 @@ initScan(const ENCODING * const *encodingTable,
break;
*encPtr = encodingTable[UTF_16BE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
else if (ptr[1] == '\0') {
} else if (ptr[1] == '\0') {
/* We could recover here in the case:
- parsing an external entity
- second byte is 0
......@@ -1770,7 +1640,6 @@ initScan(const ENCODING * const *encodingTable,
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
#define NS(x) x
#define ns(x) x
#define XML_TOK_NS_C
......@@ -1781,22 +1650,19 @@ initScan(const ENCODING * const *encodingTable,
#ifdef XML_NS
#define NS(x) x ## NS
#define ns(x) x ## _ns
# define NS(x) x##NS
# define ns(x) x##_ns
#define XML_TOK_NS_C
#include "xmltok_ns.c"
#undef XML_TOK_NS_C
# define XML_TOK_NS_C
# include "xmltok_ns.c"
# undef XML_TOK_NS_C
#undef NS
#undef ns
# undef NS
# undef ns
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
CONVERTER convert,
void *userData)
{
XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
void *userData) {
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
if (enc)
((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
......
......@@ -38,16 +38,18 @@ extern "C" {
#endif
/* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
start of illegal ]]> sequence */
#define XML_TOK_TRAILING_RSQB \
-5 /* ] or ]] at the end of the scan; might be \
start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR \
-3 /* A CR at the end of the scan; \
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also
......@@ -62,24 +64,24 @@ extern "C" {
#define XML_TOK_DATA_NEWLINE 7
#define XML_TOK_CDATA_SECT_OPEN 8
#define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10 /* numeric character reference */
#define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13
#define XML_TOK_BOM 14 /* Byte order mark */
#define XML_TOK_BOM 14 /* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_NAME 18
#define XML_TOK_NMTOKEN 19
#define XML_TOK_POUND_NAME 20 /* #name */
#define XML_TOK_OR 21 /* | */
#define XML_TOK_POUND_NAME 20 /* #name */
#define XML_TOK_OR 21 /* | */
#define XML_TOK_PERCENT 22
#define XML_TOK_OPEN_PAREN 23
#define XML_TOK_CLOSE_PAREN 24
......@@ -90,14 +92,14 @@ extern "C" {
#define XML_TOK_INSTANCE_START 29
/* The following occur only in element type declarations */
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_COMMA 38
/* The following token is returned only by XmlAttributeValueTok */
......@@ -112,20 +114,20 @@ extern "C" {
#define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD
#define XML_TOK_IGNORE_SECT 42
# define XML_TOK_IGNORE_SECT 42
#endif /* XML_DTD */
#ifdef XML_DTD
#define XML_N_STATES 4
# define XML_N_STATES 4
#else /* not XML_DTD */
#define XML_N_STATES 3
# define XML_N_STATES 3
#endif /* not XML_DTD */
#define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1
#define XML_CDATA_SECTION_STATE 2
#ifdef XML_DTD
#define XML_IGNORE_SECTION_STATE 3
# define XML_IGNORE_SECTION_STATE 3
#endif /* XML_DTD */
#define XML_N_LITERAL_TYPES 2
......@@ -153,52 +155,41 @@ typedef struct {
struct encoding;
typedef struct encoding ENCODING;
typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char *,
const char **);
typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *,
const char **);
enum XML_Convert_Result {
XML_CONVERT_COMPLETED = 0,
XML_CONVERT_INPUT_INCOMPLETE = 1,
XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
XML_CONVERT_OUTPUT_EXHAUSTED
= 2 /* and therefore potentially input remaining as well */
};
struct encoding {
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *,
const char *,
const char *);
int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *,
const char *);
int(PTRFASTCALL *nameLength)(const ENCODING *, const char *);
const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
int (PTRCALL *getAtts)(const ENCODING *enc,
const char *ptr,
int attsMax,
ATTRIBUTE *atts);
int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int (PTRCALL *predefinedEntityName)(const ENCODING *,
const char *,
const char *);
void (PTRCALL *updatePosition)(const ENCODING *,
const char *ptr,
const char *end,
POSITION *);
int (PTRCALL *isPublicId)(const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr);
enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
const unsigned short *toLim);
int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
ATTRIBUTE *atts);
int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *,
const char *);
void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr,
const char *end, POSITION *);
int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr,
const char *end, const char **badPtr);
enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim, char **toP,
const char *toLim);
enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
const unsigned short *toLim);
int minBytesPerChar;
char isUtf8;
char isUtf16;
......@@ -225,66 +216,62 @@ struct encoding {
the prolog outside literals, comments and processing instructions.
*/
#define XmlTok(enc, state, ptr, end, nextTokPtr) \
#define XmlTok(enc, state, ptr, end, nextTokPtr) \
(((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
#define XmlPrologTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
#define XmlPrologTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
#define XmlContentTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
#define XmlContentTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
#ifdef XML_DTD
#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
#endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on the content
of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
(((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
#define XmlNameLength(enc, ptr) \
(((enc)->nameLength)(enc, ptr))
#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr))
#define XmlSkipS(enc, ptr) \
(((enc)->skipS)(enc, ptr))
#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr))
#define XmlGetAttributes(enc, ptr, attsMax, atts) \
#define XmlGetAttributes(enc, ptr, attsMax, atts) \
(((enc)->getAtts)(enc, ptr, attsMax, atts))
#define XmlCharRefNumber(enc, ptr) \
(((enc)->charRefNumber)(enc, ptr))
#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr))
#define XmlPredefinedEntityName(enc, ptr, end) \
#define XmlPredefinedEntityName(enc, ptr, end) \
(((enc)->predefinedEntityName)(enc, ptr, end))
#define XmlUpdatePosition(enc, ptr, end, pos) \
#define XmlUpdatePosition(enc, ptr, end, pos) \
(((enc)->updatePosition)(enc, ptr, end, pos))
#define XmlIsPublicId(enc, ptr, end, badPtr) \
#define XmlIsPublicId(enc, ptr, end, badPtr) \
(((enc)->isPublicId)(enc, ptr, end, badPtr))
#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
(((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
(((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
typedef struct {
......@@ -292,16 +279,11 @@ typedef struct {
const ENCODING **encPtr;
} INIT_ENCODING;
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc,
const char *ptr, const char *end, const char **badPtr,
const char **versionPtr, const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
const ENCODING **namedEncodingPtr, int *standalonePtr);
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
......@@ -310,34 +292,22 @@ int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void);
typedef int(XMLCALL *CONVERTER)(void *userData, const char *p);
typedef int (XMLCALL *CONVERTER) (void *userData, const char *p);
ENCODING *
XmlInitUnknownEncoding(void *mem,
int *table,
CONVERTER convert,
void *userData);
ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc,
const char *ptr, const char *end, const char **badPtr,
const char **versionPtr, const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
const ENCODING **namedEncodingPtr, int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
CONVERTER convert,
void *userData);
ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
void *userData);
#ifdef __cplusplus
}
#endif
......
......@@ -32,130 +32,124 @@
#ifdef XML_TOK_IMPL_C
#ifndef IS_INVALID_CHAR
#define IS_INVALID_CHAR(enc, ptr, n) (0)
#endif
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (IS_INVALID_CHAR(enc, ptr, n)) { \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
# ifndef IS_INVALID_CHAR
# define IS_INVALID_CHAR(enc, ptr, n) (0)
# endif
# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (IS_INVALID_CHAR(enc, ptr, n)) { \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define INVALID_CASES(ptr, nextTokPtr) \
INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
case BT_NONXML: \
case BT_MALFORM: \
case BT_TRAIL: \
*(nextTokPtr) = (ptr); \
# define INVALID_CASES(ptr, nextTokPtr) \
INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
case BT_NONXML: \
case BT_MALFORM: \
case BT_TRAIL: \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID;
#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (!IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
/* fall through */ \
case BT_NMSTRT: \
case BT_HEX: \
case BT_DIGIT: \
case BT_NAME: \
case BT_MINUS: \
ptr += MINBPC(enc); \
break; \
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
/* fall through */ \
case BT_NMSTRT: \
case BT_HEX: \
ptr += MINBPC(enc); \
break; \
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
#ifndef PREFIX
#define PREFIX(ident) ident
#endif
#define HAS_CHARS(enc, ptr, end, count) \
(end - ptr >= count * MINBPC(enc))
# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (! IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define HAS_CHAR(enc, ptr, end) \
HAS_CHARS(enc, ptr, end, 1)
# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
/* fall through */ \
case BT_NMSTRT: \
case BT_HEX: \
case BT_DIGIT: \
case BT_NAME: \
case BT_MINUS: \
ptr += MINBPC(enc); \
break; \
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
#define REQUIRE_CHARS(enc, ptr, end, count) \
{ \
if (! HAS_CHARS(enc, ptr, end, count)) { \
return XML_TOK_PARTIAL; \
} \
# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
case BT_NONASCII: \
if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
/* fall through */ \
case BT_NMSTRT: \
case BT_HEX: \
ptr += MINBPC(enc); \
break; \
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
# ifndef PREFIX
# define PREFIX(ident) ident
# endif
# define HAS_CHARS(enc, ptr, end, count) (end - ptr >= count * MINBPC(enc))
# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
# define REQUIRE_CHARS(enc, ptr, end, count) \
{ \
if (! HAS_CHARS(enc, ptr, end, count)) { \
return XML_TOK_PARTIAL; \
} \
}
#define REQUIRE_CHAR(enc, ptr, end) \
REQUIRE_CHARS(enc, ptr, end, 1)
# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
/* ptr points to character following "<!-" */
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
......@@ -175,9 +169,8 @@ PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
/* ptr points to character following "<!" */
static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
......@@ -199,12 +192,17 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
REQUIRE_CHARS(enc, ptr, end, 2);
/* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
case BT_S:
case BT_CR:
case BT_LF:
case BT_PERCNT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* fall through */
case BT_S: case BT_CR: case BT_LF:
case BT_S:
case BT_CR:
case BT_LF:
*nextTokPtr = ptr;
return XML_TOK_DECL_OPEN;
case BT_NMSTRT:
......@@ -220,12 +218,12 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr)
{
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
int *tokPtr) {
int upper = 0;
UNUSED_P(enc);
*tokPtr = XML_TOK_PI;
if (end - ptr != MINBPC(enc)*3)
if (end - ptr != MINBPC(enc) * 3)
return 1;
switch (BYTE_TO_ASCII(enc, ptr)) {
case ASCII_x:
......@@ -265,30 +263,31 @@ PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
/* ptr points to character following "<?" */
static int PTRCALL
PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
int tok;
const char *target = ptr;
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
case BT_CR:
case BT_LF:
if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
......@@ -304,7 +303,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
}
return XML_TOK_PARTIAL;
case BT_QUEST:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
......@@ -324,16 +323,16 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
static const char CDATA_LSQB[]
= {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
int i;
UNUSED_P(enc);
/* CDATA[ */
REQUIRE_CHARS(enc, ptr, end, 6);
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
......@@ -343,9 +342,8 @@ PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
}
static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
......@@ -361,11 +359,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
case BT_RSQB:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
}
......@@ -381,23 +379,25 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
case BT_LF:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_DATA_NEWLINE;
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
default:
ptr += MINBPC(enc);
break;
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
......@@ -418,23 +418,26 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
/* ptr points to character following "</" */
static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
case BT_CR:
case BT_LF:
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_GT:
*nextTokPtr = ptr + MINBPC(enc);
......@@ -445,13 +448,13 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
}
}
return XML_TOK_PARTIAL;
#ifdef XML_NS
# ifdef XML_NS
case BT_COLON:
/* no need to check qname syntax here,
since end-tag must match exactly */
ptr += MINBPC(enc);
break;
#endif
# endif
case BT_GT:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_END_TAG;
......@@ -466,9 +469,8 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
/* ptr points to character following "&#X" */
static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
......@@ -498,9 +500,8 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
/* ptr points to character following "&#" */
static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
......@@ -531,11 +532,10 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
const char **nextTokPtr) {
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM:
return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
default:
......@@ -544,7 +544,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_ENTITY_REF;
......@@ -560,15 +560,14 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
static int PTRCALL
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XML_NS
const char **nextTokPtr) {
# ifdef XML_NS
int hadColon = 0;
#endif
# endif
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
# ifdef XML_NS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
......@@ -578,14 +577,16 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
#endif
case BT_S: case BT_CR: case BT_LF:
# endif
case BT_S:
case BT_CR:
case BT_LF:
for (;;) {
int t;
......@@ -605,101 +606,101 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
}
/* fall through */
case BT_EQUALS:
{
int open;
#ifdef XML_NS
hadColon = 0;
#endif
for (;;) {
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
switch (open) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
case BT_EQUALS: {
int open;
# ifdef XML_NS
hadColon = 0;
# endif
for (;;) {
ptr += MINBPC(enc);
/* in attribute value */
for (;;) {
int t;
REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
switch (t) {
REQUIRE_CHAR(enc, ptr, end);
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
switch (open) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
ptr += MINBPC(enc);
/* in attribute value */
for (;;) {
int t;
REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
case BT_AMP:
{
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
ptr += MINBPC(enc);
break;
case BT_AMP: {
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
ptr += MINBPC(enc);
break;
}
}
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_SOL:
goto sol;
case BT_GT:
goto gt;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_SOL:
goto sol;
continue;
case BT_GT:
goto gt;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* ptr points to closing quote */
for (;;) {
gt:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_START_TAG_WITH_ATTS;
case BT_SOL:
sol:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
continue;
case BT_GT:
gt:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_START_TAG_WITH_ATTS;
case BT_SOL:
sol:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
default:
if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
}
break;
}
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
......@@ -712,14 +713,13 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
static int PTRCALL
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XML_NS
const char **nextTokPtr) {
# ifdef XML_NS
int hadColon;
#endif
# endif
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
......@@ -727,8 +727,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_LSQB:
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
......@@ -740,14 +739,14 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
#ifdef XML_NS
# ifdef XML_NS
hadColon = 0;
#endif
# endif
/* we have a start-tag */
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
# ifdef XML_NS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
......@@ -757,34 +756,37 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
#endif
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC(enc);
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
# endif
case BT_S:
case BT_CR:
case BT_LF: {
ptr += MINBPC(enc);
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
case BT_S: case BT_CR: case BT_LF:
ptr += MINBPC(enc);
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
case BT_S:
case BT_CR:
case BT_LF:
ptr += MINBPC(enc);
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_PARTIAL;
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
}
return XML_TOK_PARTIAL;
}
case BT_GT:
gt:
*nextTokPtr = ptr + MINBPC(enc);
......@@ -793,7 +795,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
sol:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
......@@ -809,8 +811,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
const char **nextTokPtr) {
if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
......@@ -842,48 +843,50 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
default:
ptr += MINBPC(enc);
break;
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_RSQB:
if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
}
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_INVALID;
}
if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
if (HAS_CHARS(enc, ptr, end, 3)) {
if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
}
*nextTokPtr = ptr + 2 * MINBPC(enc);
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_AMP:
......@@ -908,12 +911,14 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
const char **nextTokPtr) {
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S:
case BT_LF:
case BT_CR:
case BT_PERCNT:
*nextTokPtr = ptr;
return XML_TOK_PERCENT;
default:
......@@ -922,7 +927,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_PARAM_ENTITY_REF;
......@@ -936,20 +941,24 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
const char **nextTokPtr) {
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S:
case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR:
case BT_LF:
case BT_S:
case BT_RPAR:
case BT_GT:
case BT_PERCNT:
case BT_VERBAR:
*nextTokPtr = ptr;
return XML_TOK_POUND_NAME;
default:
......@@ -961,14 +970,12 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
}
static int PTRCALL
PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
{
PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr);
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
case BT_QUOT:
case BT_APOS:
ptr += MINBPC(enc);
......@@ -978,8 +985,12 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
case BT_GT: case BT_PERCNT: case BT_LSQB:
case BT_S:
case BT_CR:
case BT_LF:
case BT_GT:
case BT_PERCNT:
case BT_LSQB:
return XML_TOK_LITERAL;
default:
return XML_TOK_INVALID;
......@@ -994,8 +1005,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
static int PTRCALL
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
const char **nextTokPtr) {
int tok;
if (ptr >= end)
return XML_TOK_NONE;
......@@ -1013,27 +1023,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_APOS:
return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_LT:
{
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_QUEST:
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_NMSTRT:
case BT_HEX:
case BT_NONASCII:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
*nextTokPtr = ptr - MINBPC(enc);
return XML_TOK_INSTANCE_START;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
case BT_LT: {
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_QUEST:
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_NMSTRT:
case BT_HEX:
case BT_NONASCII:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
*nextTokPtr = ptr - MINBPC(enc);
return XML_TOK_INSTANCE_START;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_CR:
if (ptr + MINBPC(enc) == end) {
*nextTokPtr = end;
......@@ -1041,13 +1050,15 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return -XML_TOK_PROLOG_S;
}
/* fall through */
case BT_S: case BT_LF:
case BT_S:
case BT_LF:
for (;;) {
ptr += MINBPC(enc);
if (! HAS_CHAR(enc, ptr, end))
break;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF:
case BT_S:
case BT_LF:
break;
case BT_CR:
/* don't split CR/LF pair */
......@@ -1076,7 +1087,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
REQUIRE_CHARS(enc, ptr, end, 2);
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
*nextTokPtr = ptr + 2 * MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
}
}
......@@ -1099,8 +1110,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_PLUS:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_CLOSE_PAREN_PLUS;
case BT_CR: case BT_LF: case BT_S:
case BT_GT: case BT_COMMA: case BT_VERBAR:
case BT_CR:
case BT_LF:
case BT_S:
case BT_GT:
case BT_COMMA:
case BT_VERBAR:
case BT_RPAR:
*nextTokPtr = ptr;
return XML_TOK_CLOSE_PAREN;
......@@ -1115,24 +1130,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DECL_CLOSE;
case BT_NUM:
return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \
break; \
} \
if (IS_NAME_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NMTOKEN; \
break; \
} \
*nextTokPtr = ptr; \
# define LEAD_CASE(n) \
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \
break; \
} \
if (IS_NAME_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NMTOKEN; \
break; \
} \
*nextTokPtr = ptr; \
return XML_TOK_INVALID;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_NMSTRT:
case BT_HEX:
tok = XML_TOK_NAME;
......@@ -1141,9 +1158,9 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
#ifdef XML_NS
# ifdef XML_NS
case BT_COLON:
#endif
# endif
tok = XML_TOK_NMTOKEN;
ptr += MINBPC(enc);
break;
......@@ -1165,13 +1182,19 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA:
case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
case BT_S: case BT_CR: case BT_LF:
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
case BT_RPAR:
case BT_COMMA:
case BT_VERBAR:
case BT_LSQB:
case BT_PERCNT:
case BT_S:
case BT_CR:
case BT_LF:
*nextTokPtr = ptr;
return tok;
#ifdef XML_NS
# ifdef XML_NS
case BT_COLON:
ptr += MINBPC(enc);
switch (tok) {
......@@ -1179,7 +1202,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
REQUIRE_CHAR(enc, ptr, end);
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
default:
tok = XML_TOK_NMTOKEN;
break;
......@@ -1190,23 +1213,23 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
break;
}
break;
#endif
# endif
case BT_PLUS:
if (tok == XML_TOK_NMTOKEN) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_NAME_PLUS;
case BT_AST:
if (tok == XML_TOK_NMTOKEN) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_NAME_ASTERISK;
case BT_QUEST:
if (tok == XML_TOK_NMTOKEN) {
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
......@@ -1221,9 +1244,8 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
}
static int PTRCALL
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
......@@ -1238,10 +1260,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
start = ptr;
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_AMP:
if (ptr == start)
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
......@@ -1287,9 +1313,8 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
......@@ -1304,10 +1329,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
start = ptr;
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_AMP:
if (ptr == start)
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
......@@ -1315,8 +1344,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_DATA_CHARS;
case BT_PERCNT:
if (ptr == start) {
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
}
*nextTokPtr = ptr;
......@@ -1349,12 +1377,11 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_DATA_CHARS;
}
#ifdef XML_DTD
# ifdef XML_DTD
static int PTRCALL
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) {
int level = 0;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
......@@ -1365,7 +1392,7 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
}
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
ptr += MINBPC(enc);
REQUIRE_CHAR(enc, ptr, end);
......@@ -1402,12 +1429,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_PARTIAL;
}
#endif /* XML_DTD */
# endif /* XML_DTD */
static int PTRCALL
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr)
{
const char **badPtr) {
ptr += MINBPC(enc);
end -= MINBPC(enc);
for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
......@@ -1430,9 +1456,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
case BT_AST:
case BT_PERCNT:
case BT_NUM:
#ifdef XML_NS
# ifdef XML_NS
case BT_COLON:
#endif
# endif
break;
case BT_S:
if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
......@@ -1442,7 +1468,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
break;
case BT_NAME:
case BT_NMSTRT:
if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
break;
/* fall through */
default:
......@@ -1466,9 +1492,8 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
*/
static int PTRCALL
PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts)
{
PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
ATTRIBUTE *atts) {
enum { other, inName, inValue } state = inName;
int nAtts = 0;
int open = 0; /* defined when state == inValue;
......@@ -1476,32 +1501,35 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
#define START_NAME \
if (state == other) { \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
}
#define LEAD_CASE(n) \
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define START_NAME \
if (state == other) { \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
}
# define LEAD_CASE(n) \
case BT_LEAD##n: \
START_NAME ptr += (n - MINBPC(enc)); \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_NONASCII:
case BT_NMSTRT:
case BT_HEX:
START_NAME
break;
#undef START_NAME
# undef START_NAME
case BT_QUOT:
if (state != inValue) {
if (nAtts < attsMax)
atts[nAtts].valuePtr = ptr + MINBPC(enc);
state = inValue;
open = BT_QUOT;
}
else if (open == BT_QUOT) {
} else if (open == BT_QUOT) {
state = other;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
......@@ -1514,8 +1542,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
atts[nAtts].valuePtr = ptr + MINBPC(enc);
state = inValue;
open = BT_APOS;
}
else if (open == BT_APOS) {
} else if (open == BT_APOS) {
state = other;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
......@@ -1529,16 +1556,15 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
case BT_S:
if (state == inName)
state = other;
else if (state == inValue
&& nAtts < attsMax
&& atts[nAtts].normalized
else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
&& (ptr == atts[nAtts].valuePtr
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
atts[nAtts].normalized = 0;
break;
case BT_CR: case BT_LF:
case BT_CR:
case BT_LF:
/* This case ensures that the first attribute name is counted
Apart from that we could just change state on the quote. */
if (state == inName)
......@@ -1559,29 +1585,44 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
}
static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
int result = 0;
/* skip &# */
ptr += 2*MINBPC(enc);
UNUSED_P(enc);
ptr += 2 * MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
for (ptr += MINBPC(enc);
!CHAR_MATCHES(enc, ptr, ASCII_SEMI);
for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr);
switch (c) {
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
case ASCII_0:
case ASCII_1:
case ASCII_2:
case ASCII_3:
case ASCII_4:
case ASCII_5:
case ASCII_6:
case ASCII_7:
case ASCII_8:
case ASCII_9:
result <<= 4;
result |= (c - ASCII_0);
break;
case ASCII_A: case ASCII_B: case ASCII_C:
case ASCII_D: case ASCII_E: case ASCII_F:
case ASCII_A:
case ASCII_B:
case ASCII_C:
case ASCII_D:
case ASCII_E:
case ASCII_F:
result <<= 4;
result += 10 + (c - ASCII_A);
break;
case ASCII_a: case ASCII_b: case ASCII_c:
case ASCII_d: case ASCII_e: case ASCII_f:
case ASCII_a:
case ASCII_b:
case ASCII_c:
case ASCII_d:
case ASCII_e:
case ASCII_f:
result <<= 4;
result += 10 + (c - ASCII_a);
break;
......@@ -1589,9 +1630,8 @@ PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
if (result >= 0x110000)
return -1;
}
}
else {
for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
} else {
for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr);
result *= 10;
result += (c - ASCII_0);
......@@ -1603,10 +1643,10 @@ PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
}
static int PTRCALL
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end)
{
switch ((end - ptr)/MINBPC(enc)) {
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
const char *end) {
UNUSED_P(enc);
switch ((end - ptr) / MINBPC(enc)) {
case 2:
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
switch (BYTE_TO_ASCII(enc, ptr)) {
......@@ -1657,9 +1697,9 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
}
static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2)
{
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
const char *end1, const char *ptr2) {
UNUSED_P(enc);
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (end1 - ptr1 < MINBPC(enc)) {
/* This line cannot be executed. The incoming data has already
......@@ -1669,27 +1709,30 @@ PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
*/
return 0; /* LCOV_EXCL_LINE */
}
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
if (! CHAR_MATCHES(enc, ptr1, *ptr2))
return 0;
}
return ptr1 == end1;
}
static int PTRFASTCALL
PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
{
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
const char *start = ptr;
for (;;) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XML_NS
# ifdef XML_NS
case BT_COLON:
#endif
# endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
......@@ -1702,9 +1745,8 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
}
}
static const char * PTRFASTCALL
PREFIX(skipS)(const ENCODING *enc, const char *ptr)
{
static const char *PTRFASTCALL
PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
for (;;) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_LF:
......@@ -1719,19 +1761,18 @@ PREFIX(skipS)(const ENCODING *enc, const char *ptr)
}
static void PTRCALL
PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr,
const char *end,
POSITION *pos)
{
PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
POSITION *pos) {
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
ptr += n; \
break;
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_LF:
pos->columnNumber = (XML_Size)-1;
pos->lineNumber++;
......@@ -1752,12 +1793,12 @@ PREFIX(updatePosition)(const ENCODING *enc,
}
}
#undef DO_LEAD_CASE
#undef MULTIBYTE_CASES
#undef INVALID_CASES
#undef CHECK_NAME_CASE
#undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES
# undef DO_LEAD_CASE
# undef MULTIBYTE_CASES
# undef INVALID_CASES
# undef CHECK_NAME_CASE
# undef CHECK_NAME_CASES
# undef CHECK_NMSTRT_CASE
# undef CHECK_NMSTRT_CASES
#endif /* XML_TOK_IMPL_C */
......@@ -31,43 +31,43 @@
*/
enum {
BT_NONXML,
BT_MALFORM,
BT_LT,
BT_AMP,
BT_RSQB,
BT_LEAD2,
BT_LEAD3,
BT_LEAD4,
BT_TRAIL,
BT_CR,
BT_LF,
BT_GT,
BT_QUOT,
BT_APOS,
BT_EQUALS,
BT_QUEST,
BT_EXCL,
BT_SOL,
BT_SEMI,
BT_NUM,
BT_LSQB,
BT_S,
BT_NMSTRT,
BT_COLON,
BT_HEX,
BT_DIGIT,
BT_NAME,
BT_MINUS,
BT_OTHER, /* known not to be a name or name start character */
BT_NONXML, /* e.g. noncharacter-FFFF */
BT_MALFORM, /* illegal, with regard to encoding */
BT_LT, /* less than = "<" */
BT_AMP, /* ampersand = "&" */
BT_RSQB, /* right square bracket = "[" */
BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */
BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */
BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */
BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */
BT_CR, /* carriage return = "\r" */
BT_LF, /* line feed = "\n" */
BT_GT, /* greater than = ">" */
BT_QUOT, /* quotation character = "\"" */
BT_APOS, /* aposthrophe = "'" */
BT_EQUALS, /* equal sign = "=" */
BT_QUEST, /* question mark = "?" */
BT_EXCL, /* exclamation mark = "!" */
BT_SOL, /* solidus, slash = "/" */
BT_SEMI, /* semicolon = ";" */
BT_NUM, /* number sign = "#" */
BT_LSQB, /* left square bracket = "[" */
BT_S, /* white space, e.g. "\t", " "[, "\r"] */
BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */
BT_COLON, /* colon = ":" */
BT_HEX, /* hex letter = "A".."F" + "a".."f" */
BT_DIGIT, /* digit = "0".."9" */
BT_NAME, /* dot and middle dot = "." + chr(0xb7) */
BT_MINUS, /* minus = "-" */
BT_OTHER, /* known not to be a name or name start character */
BT_NONASCII, /* might be a name or name start character */
BT_PERCNT,
BT_LPAR,
BT_RPAR,
BT_AST,
BT_PLUS,
BT_COMMA,
BT_VERBAR
BT_PERCNT, /* percent sign = "%" */
BT_LPAR, /* left parenthesis = "(" */
BT_RPAR, /* right parenthesis = "(" */
BT_AST, /* asterisk = "*" */
BT_PLUS, /* plus sign = "+" */
BT_COMMA, /* comma = "," */
BT_VERBAR /* vertical bar = "|" */
};
#include <stddef.h>
......@@ -33,56 +33,47 @@
#ifdef XML_TOK_NS_C
const ENCODING *
NS(XmlGetUtf8InternalEncoding)(void)
{
NS(XmlGetUtf8InternalEncoding)(void) {
return &ns(internal_utf8_encoding).enc;
}
const ENCODING *
NS(XmlGetUtf16InternalEncoding)(void)
{
#if BYTEORDER == 1234
NS(XmlGetUtf16InternalEncoding)(void) {
# if BYTEORDER == 1234
return &ns(internal_little2_encoding).enc;
#elif BYTEORDER == 4321
# elif BYTEORDER == 4321
return &ns(internal_big2_encoding).enc;
#else
# else
const short n = 1;
return (*(const char *)&n
? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
#endif
return (*(const char *)&n ? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
# endif
}
static const ENCODING * const NS(encodings)[] = {
&ns(latin1_encoding).enc,
&ns(ascii_encoding).enc,
&ns(utf8_encoding).enc,
&ns(big2_encoding).enc,
&ns(big2_encoding).enc,
&ns(little2_encoding).enc,
&ns(utf8_encoding).enc /* NO_ENC */
static const ENCODING *const NS(encodings)[] = {
&ns(latin1_encoding).enc, &ns(ascii_encoding).enc,
&ns(utf8_encoding).enc, &ns(big2_encoding).enc,
&ns(big2_encoding).enc, &ns(little2_encoding).enc,
&ns(utf8_encoding).enc /* NO_ENC */
};
static int PTRCALL
NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_PROLOG_STATE, ptr, end, nextTokPtr);
const char **nextTokPtr) {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE,
ptr, end, nextTokPtr);
}
static int PTRCALL
NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_CONTENT_STATE, ptr, end, nextTokPtr);
const char **nextTokPtr) {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE,
ptr, end, nextTokPtr);
}
int
NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
const char *name)
{
const char *name) {
int i = getEncodingIndex(name);
if (i == UNKNOWN_ENC)
return 0;
......@@ -96,9 +87,8 @@ NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
}
static const ENCODING *
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) {
# define ENCODING_MAX 128
char buf[ENCODING_MAX];
char *p = buf;
int i;
......@@ -115,28 +105,14 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
}
int
NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(NS(findEncoding),
isGeneralTextEntity,
enc,
ptr,
end,
badPtr,
versionPtr,
versionEndPtr,
encodingName,
encoding,
standalone);
NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING *enc,
const char *ptr, const char *end, const char **badPtr,
const char **versionPtr, const char **versionEndPtr,
const char **encodingName, const ENCODING **encoding,
int *standalone) {
return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end,
badPtr, versionPtr, versionEndPtr, encodingName,
encoding, standalone);
}
#endif /* XML_TOK_NS_C */
......@@ -116,7 +116,6 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_elementtree.c" />
<ClCompile Include="..\Modules\expat\loadlibrary.c" />
<ClCompile Include="..\Modules\expat\xmlparse.c" />
<ClCompile Include="..\Modules\expat\xmlrole.c" />
<ClCompile Include="..\Modules\expat\xmltok.c" />
......
......@@ -33,9 +33,6 @@
<ClInclude Include="..\Modules\expat\latin1tab.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Modules\expat\loadlibrary.c">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Modules\expat\macconfig.h">
<Filter>Header Files</Filter>
</ClInclude>
......
......@@ -100,7 +100,6 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\pyexpat.c" />
<ClCompile Include="..\Modules\expat\loadlibrary.c" />
<ClCompile Include="..\Modules\expat\xmlparse.c" />
<ClCompile Include="..\Modules\expat\xmlrole.c" />
<ClCompile Include="..\Modules\expat\xmltok.c" />
......
......@@ -20,9 +20,6 @@
<ClCompile Include="..\Modules\pyexpat.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\expat\loadlibrary.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\expat\xmlparse.c">
<Filter>Source Files</Filter>
</ClCompile>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment