Commit fc03a94a authored by Martin v. Löwis's avatar Martin v. Löwis

Incorporate Expat 1.95.6.

parent 5a772d32
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#define ASCII_A 0x41 #define ASCII_A 0x41
......
/* /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifndef XmlParse_INCLUDED #ifndef XmlParse_INCLUDED
#define XmlParse_INCLUDED 1 #define XmlParse_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
#endif
#include <stdlib.h> #include <stdlib.h>
#ifndef XMLPARSEAPI #ifndef XMLPARSEAPI
# if defined(__declspec) && !defined(__BEOS__) && !defined(__CYGWIN__) #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
# define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl #ifdef XML_STATIC
# else #define XMLPARSEAPI(type) type __cdecl
# define XMLPARSEAPI(type) type #else
# endif #define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
#endif
#else
#define XMLPARSEAPI(type) type
#endif
#endif /* not defined XMLPARSEAPI */ #endif /* not defined XMLPARSEAPI */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef void *XML_Parser; #ifdef XML_UNICODE_WCHAR_T
#define XML_UNICODE
#endif
struct XML_ParserStruct;
typedef struct XML_ParserStruct *XML_Parser;
/* Information is UTF-8 encoded. */ #ifdef XML_UNICODE /* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char;
typedef wchar_t XML_LChar;
#else
typedef unsigned short XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
typedef char XML_Char; typedef char XML_Char;
typedef char XML_LChar; typedef char XML_LChar;
#endif /* XML_UNICODE */
/* Should this be defined using stdbool.h when C99 is available? */
typedef unsigned char XML_Bool;
#define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0)
enum XML_Error {
XML_ERROR_NONE,
XML_ERROR_NO_MEMORY,
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE,
XML_ERROR_ENTITY_DECLARED_IN_PE,
XML_ERROR_FEATURE_REQUIRES_XML_DTD,
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
};
enum XML_Content_Type { enum XML_Content_Type {
XML_CTYPE_EMPTY = 1, XML_CTYPE_EMPTY = 1,
...@@ -75,7 +136,6 @@ struct XML_cp { ...@@ -75,7 +136,6 @@ struct XML_cp {
description of the model argument. It's the caller's responsibility description of the model argument. It's the caller's responsibility
to free model when finished with it. to free model when finished with it.
*/ */
typedef void (*XML_ElementDeclHandler) (void *userData, typedef void (*XML_ElementDeclHandler) (void *userData,
const XML_Char *name, const XML_Char *name,
XML_Content *model); XML_Content *model);
...@@ -84,16 +144,14 @@ XMLPARSEAPI(void) ...@@ -84,16 +144,14 @@ XMLPARSEAPI(void)
XML_SetElementDeclHandler(XML_Parser parser, XML_SetElementDeclHandler(XML_Parser parser,
XML_ElementDeclHandler eldecl); XML_ElementDeclHandler eldecl);
/* /* The Attlist declaration handler is called for *each* attribute. So
The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. may be NULL in the case of the "#IMPLIED" or "#REQUIRED"
The "isrequired" parameter will be true and the default value will keyword. The "isrequired" parameter will be true and the default
be NULL in the case of "#REQUIRED". If "isrequired" is true and value will be NULL in the case of "#REQUIRED". If "isrequired" is
default is non-NULL, then this is a "#FIXED" default. true and default is non-NULL, then this is a "#FIXED" default.
*/ */
typedef void (*XML_AttlistDeclHandler) (void *userData, typedef void (*XML_AttlistDeclHandler) (void *userData,
const XML_Char *elname, const XML_Char *elname,
const XML_Char *attname, const XML_Char *attname,
...@@ -105,15 +163,14 @@ XMLPARSEAPI(void) ...@@ -105,15 +163,14 @@ XMLPARSEAPI(void)
XML_SetAttlistDeclHandler(XML_Parser parser, XML_SetAttlistDeclHandler(XML_Parser parser,
XML_AttlistDeclHandler attdecl); XML_AttlistDeclHandler attdecl);
/* The XML declaration handler is called for *both* XML declarations
/* The XML declaration handler is called for *both* XML declarations and and text declarations. The way to distinguish is that the version
text declarations. The way to distinguish is that the version parameter parameter will be NULL for text declarations. The encoding
will be null for text declarations. The encoding parameter may be null parameter may be NULL for XML declarations. The standalone
for XML declarations. The standalone parameter will be -1, 0, or 1 parameter will be -1, 0, or 1 indicating respectively that there
indicating respectively that there was no standalone parameter in was no standalone parameter in the declaration, that it was given
the declaration, that it was given as no, or that it was given as yes. as no, or that it was given as yes.
*/ */
typedef void (*XML_XmlDeclHandler) (void *userData, typedef void (*XML_XmlDeclHandler) (void *userData,
const XML_Char *version, const XML_Char *version,
const XML_Char *encoding, const XML_Char *encoding,
...@@ -131,26 +188,27 @@ typedef struct { ...@@ -131,26 +188,27 @@ typedef struct {
} XML_Memory_Handling_Suite; } XML_Memory_Handling_Suite;
/* Constructs a new parser; encoding is the encoding specified by the /* Constructs a new parser; encoding is the encoding specified by the
external protocol or null if there is none specified. */ external protocol or NULL if there is none specified.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreate(const XML_Char *encoding); XML_ParserCreate(const XML_Char *encoding);
/* Constructs a new parser and namespace processor. Element type /* Constructs a new parser and namespace processor. Element type
names and attribute names that belong to a namespace will be expanded; names and attribute names that belong to a namespace will be
unprefixed attribute names are never expanded; unprefixed element type expanded; unprefixed attribute names are never expanded; unprefixed
names are expanded only if there is a default namespace. The expanded element type names are expanded only if there is a default
name is the concatenation of the namespace URI, the namespace namespace. The expanded name is the concatenation of the namespace
separator character, and the local part of the name. If the namespace URI, the namespace separator character, and the local part of the
separator is '\0' then the namespace URI and the local part will be name. If the namespace separator is '\0' then the namespace URI
concatenated without any separator. When a namespace is not declared, and the local part will be concatenated without any separator.
the name and prefix will be passed through without expansion. */ When a namespace is not declared, the name and prefix will be
passed through without expansion.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
/* Constructs a new parser using the memory management suit referred to /* Constructs a new parser using the memory management suite referred to
by memsuite. If memsuite is NULL, then use the standard library memory by memsuite. If memsuite is NULL, then use the standard library memory
suite. If namespaceSeparator is non-NULL it creates a parser with suite. If namespaceSeparator is non-NULL it creates a parser with
namespace processing as described above. The character pointed at namespace processing as described above. The character pointed at
...@@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); ...@@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
All further memory operations used for the created parser will come from All further memory operations used for the created parser will come from
the given suite. the given suite.
*/ */
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreate_MM(const XML_Char *encoding, XML_ParserCreate_MM(const XML_Char *encoding,
const XML_Memory_Handling_Suite *memsuite, const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator); const XML_Char *namespaceSeparator);
/* atts is array of name/value pairs, terminated by 0; /* Prepare a parser object to be re-used. This is particularly
names and values are 0 terminated. */ valuable when memory allocation overhead is disproportionatly high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
unknownEncodingHandler. The parser's external state is re-initialized
except for the values of ns and ns_triplets.
Added in Expat 1.95.3.
*/
XMLPARSEAPI(XML_Bool)
XML_ParserReset(XML_Parser parser, const XML_Char *encoding);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated.
*/
typedef void (*XML_StartElementHandler)(void *userData, typedef void (*XML_StartElementHandler)(void *userData,
const XML_Char *name, const XML_Char *name,
const XML_Char **atts); const XML_Char **atts);
...@@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); ...@@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data);
typedef void (*XML_StartCdataSectionHandler)(void *userData); typedef void (*XML_StartCdataSectionHandler)(void *userData);
typedef void (*XML_EndCdataSectionHandler)(void *userData); typedef void (*XML_EndCdataSectionHandler)(void *userData);
/* This is called for any characters in the XML document for /* This is called for any characters in the XML document for which
which there is no applicable handler. This includes both there is no applicable handler. This includes both characters that
characters that are part of markup which is of a kind that is are part of markup which is of a kind that is not reported
not reported (comments, markup declarations), or characters (comments, markup declarations), or characters that are part of a
that are part of a construct which could be reported but construct which could be reported but for which no handler has been
for which no handler has been supplied. The characters are passed supplied. The characters are passed exactly as they were in the XML
exactly as they were in the XML document except that document except that they will be encoded in UTF-8 or UTF-16.
they will be encoded in UTF-8. Line boundaries are not normalized. Line boundaries are not normalized. Note that a byte order mark
Note that a byte order mark character is not passed to the default handler. character is not passed to the default handler. There are no
There are no guarantees about how characters are divided between calls guarantees about how characters are divided between calls to the
to the default handler: for example, a comment might be split between default handler: for example, a comment might be split between
multiple calls. */ multiple calls.
*/
typedef void (*XML_DefaultHandler)(void *userData, typedef void (*XML_DefaultHandler)(void *userData,
const XML_Char *s, const XML_Char *s,
int len); int len);
/* This is called for the start of the DOCTYPE declaration, before /* This is called for the start of the DOCTYPE declaration, before
any DTD or internal subset is parsed. */ any DTD or internal subset is parsed.
*/
typedef void (*XML_StartDoctypeDeclHandler)(void *userData, typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *doctypeName, const XML_Char *doctypeName,
const XML_Char *sysid, const XML_Char *sysid,
...@@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData, ...@@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
int has_internal_subset); int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the /* This is called for the start of the DOCTYPE declaration when the
closing > is encountered, but after processing any external subset. */ closing > is encountered, but after processing any external
subset.
*/
typedef void (*XML_EndDoctypeDeclHandler)(void *userData); typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
/* This is called for entity declarations. The is_parameter_entity /* This is called for entity declarations. The is_parameter_entity
...@@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData); ...@@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
otherwise. otherwise.
For internal entities (<!ENTITY foo "bar">), value will For internal entities (<!ENTITY foo "bar">), value will
be non-null and systemId, publicID, and notationName will be null. be non-NULL and systemId, publicID, and notationName will be NULL.
The value string is NOT null terminated; the length is provided in The value string is NOT nul-terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities. values, do not use this argument to test for internal entities.
For external entities, value will be null and systemId will be non-null. For external entities, value will be NULL and systemId will be
The publicId argument will be null unless a public identifier was non-NULL. The publicId argument will be NULL unless a public
provided. The notationName argument will have a non-null value only identifier was provided. The notationName argument will have a
for unparsed entity declarations. non-NULL value only for unparsed entity declarations.
*/
Note that is_parameter_entity can't be changed to XML_Bool, since
that would break binary compatibility.
*/
typedef void (*XML_EntityDeclHandler) (void *userData, typedef void (*XML_EntityDeclHandler) (void *userData,
const XML_Char *entityName, const XML_Char *entityName,
int is_parameter_entity, int is_parameter_entity,
...@@ -255,11 +328,12 @@ XML_SetEntityDeclHandler(XML_Parser parser, ...@@ -255,11 +328,12 @@ XML_SetEntityDeclHandler(XML_Parser parser,
/* OBSOLETE -- OBSOLETE -- OBSOLETE /* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above. This handler has been superceded by the EntityDeclHandler above.
It is provided here for backward compatibility. It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA)
entity. The base argument is whatever was set by XML_SetBase.
The entityName, systemId and notationName arguments will never be null.
The other arguments may be. */
This is called for a declaration of an unparsed (NDATA) entity.
The base argument is whatever was set by XML_SetBase. The
entityName, systemId and notationName arguments will never be
NULL. The other arguments may be.
*/
typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *entityName, const XML_Char *entityName,
const XML_Char *base, const XML_Char *base,
...@@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, ...@@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName); const XML_Char *notationName);
/* This is called for a declaration of notation. /* This is called for a declaration of notation. The base argument is
The base argument is whatever was set by XML_SetBase. whatever was set by XML_SetBase. The notationName will never be
The notationName will never be null. The other arguments can be. */ NULL. The other arguments can be.
*/
typedef void (*XML_NotationDeclHandler)(void *userData, typedef void (*XML_NotationDeclHandler)(void *userData,
const XML_Char *notationName, const XML_Char *notationName,
const XML_Char *base, const XML_Char *base,
...@@ -278,11 +352,11 @@ typedef void (*XML_NotationDeclHandler)(void *userData, ...@@ -278,11 +352,11 @@ typedef void (*XML_NotationDeclHandler)(void *userData,
const XML_Char *publicId); const XML_Char *publicId);
/* When namespace processing is enabled, these are called once for /* When namespace processing is enabled, these are called once for
each namespace declaration. The call to the start and end element each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be null. declaration handlers. For an xmlns attribute, prefix will be
For an xmlns="" attribute, uri will be null. */ NULL. For an xmlns="" attribute, uri will be NULL.
*/
typedef void (*XML_StartNamespaceDeclHandler)(void *userData, typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
const XML_Char *prefix, const XML_Char *prefix,
const XML_Char *uri); const XML_Char *uri);
...@@ -290,87 +364,123 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData, ...@@ -290,87 +364,123 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
typedef void (*XML_EndNamespaceDeclHandler)(void *userData, typedef void (*XML_EndNamespaceDeclHandler)(void *userData,
const XML_Char *prefix); const XML_Char *prefix);
/* This is called if the document is not standalone (it has an /* This is called if the document is not standalone, that is, it has an
external subset or a reference to a parameter entity, but does not external subset or a reference to a parameter entity, but does not
have standalone="yes"). If this handler returns 0, then processing have standalone="yes". If this handler returns XML_STATUS_ERROR,
will not continue, and the parser will return a then processing will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error. */ XML_ERROR_NOT_STANDALONE error.
If parameter entity parsing is enabled, then in addition to the
conditions above this handler will only be called if the referenced
entity was actually read.
*/
typedef int (*XML_NotStandaloneHandler)(void *userData); typedef int (*XML_NotStandaloneHandler)(void *userData);
/* This is called for a reference to an external parsed general entity. /* This is called for a reference to an external parsed general
The referenced entity is not automatically parsed. entity. The referenced entity is not automatically parsed. The
The application can parse it immediately or later using application can parse it immediately or later using
XML_ExternalEntityParserCreate. XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the reference;
it can be passed as the parser argument to XML_ExternalEntityParserCreate. The parser argument is the parser parsing the entity containing the
The systemId argument is the system identifier as specified in the entity reference; it can be passed as the parser argument to
declaration; it will not be null. XML_ExternalEntityParserCreate. The systemId argument is the
The base argument is the system identifier that should be used as the base for system identifier as specified in the entity declaration; it will
resolving systemId if systemId was relative; this is set by XML_SetBase; not be NULL.
it may be null.
The publicId argument is the public identifier as specified in the entity The base argument is the system identifier that should be used as
declaration, or null if none was specified; the whitespace in the public the base for resolving systemId if systemId was relative; this is
identifier will have been normalized as required by the XML spec. set by XML_SetBase; it may be NULL.
The context argument specifies the parsing context in the format
expected by the context argument to The publicId argument is the public identifier as specified in the
XML_ExternalEntityParserCreate; context is valid only until the handler entity declaration, or NULL if none was specified; the whitespace
returns, so if the referenced entity is to be parsed later, it must be copied. in the public identifier will have been normalized as required by
The handler should return 0 if processing should not continue because of the XML spec.
a fatal error in the handling of the external entity.
In this case the calling parser will return an The context argument specifies the parsing context in the format
XML_ERROR_EXTERNAL_ENTITY_HANDLING error. expected by the context argument to XML_ExternalEntityParserCreate;
Note that unlike other handlers the first argument is the parser, not context is valid only until the handler returns, so if the
userData. */ referenced entity is to be parsed later, it must be copied.
context is NULL only when the entity is a parameter entity.
The handler should return XML_STATUS_ERROR if processing should not
continue because of a fatal error in the handling of the external
entity. In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser,
not userData.
*/
typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
const XML_Char *context, const XML_Char *context,
const XML_Char *base, const XML_Char *base,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId); const XML_Char *publicId);
/* This structure is filled in by the XML_UnknownEncodingHandler /* This is called in two situations:
to provide information to the parser about encodings that are unknown 1) An entity reference is encountered for which no declaration
to the parser. has been read *and* this is not an error.
The map[b] member gives information about byte sequences 2) An internal entity reference is read, but not expanded, because
whose first byte is b. XML_SetDefaultHandler has been called.
If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar Note: skipped parameter entities in declarations and skipped general
value c. entities in attribute values cannot be reported, because
If map[b] is -1, then the byte sequence is malformed. the event would be out of sync with the reporting of the
If map[b] is -n, where n >= 2, then b is the first byte of an n-byte declarations or attribute values
sequence that encodes a single Unicode scalar value. */
The data member will be passed as the first argument to the convert function. typedef void (*XML_SkippedEntityHandler)(void *userData,
The convert function is used to convert multibyte sequences; const XML_Char *entityName,
s will point to a n-byte sequence where map[(unsigned char)*s] == -n. int is_parameter_entity);
The convert function must return the Unicode scalar value
represented by this byte sequence or -1 if the byte sequence is malformed. /* This structure is filled in by the XML_UnknownEncodingHandler to
The convert function may be null if the encoding is a single-byte encoding, provide information to the parser about encodings that are unknown
that is if map[b] >= -1 for all bytes b. to the parser.
When the parser is finished with the encoding, then if release is not null,
it will call release passing it the data member;
once release has been called, the convert function will not be called again.
Expat places certain restrictions on the encodings that are supported
using this mechanism.
1. Every ASCII character that can appear in a well-formed XML document,
other than the characters
$@\^`{}~ The map[b] member gives information about byte sequences whose
first byte is b.
If map[b] is c where c is >= 0, then b by itself encodes the
Unicode scalar value c.
If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an
n-byte sequence that encodes a single Unicode scalar value.
The data member will be passed as the first argument to the convert
function.
The convert function is used to convert multibyte sequences; s will
point to a n-byte sequence where map[(unsigned char)*s] == -n. The
convert function must return the Unicode scalar value represented
by this byte sequence or -1 if the byte sequence is malformed.
must be represented by a single byte, and that byte must be the The convert function may be NULL if the encoding is a single-byte
same byte that represents that character in ASCII. encoding, that is if map[b] >= -1 for all bytes b.
2. No character may require more than 4 bytes to encode. When the parser is finished with the encoding, then if release is
not NULL, it will call release passing it the data member; once
release has been called, the convert function will not be called
again.
3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e., Expat places certain restrictions on the encodings that are supported
characters that would be encoded by surrogates in UTF-16 are not using this mechanism.
allowed). Note that this restriction doesn't apply to the built-in
support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct sequence 1. Every ASCII character that can appear in a well-formed XML document,
of bytes. */ other than the characters
$@\^`{}~
must be represented by a single byte, and that byte must be the
same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <=
0xFFFF, (i.e., characters that would be encoded by surrogates in
UTF-16 are not allowed). Note that this restriction doesn't
apply to the built-in support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct
sequence of bytes.
*/
typedef struct { typedef struct {
int map[256]; int map[256];
void *data; void *data;
...@@ -379,16 +489,20 @@ typedef struct { ...@@ -379,16 +489,20 @@ typedef struct {
} XML_Encoding; } XML_Encoding;
/* This is called for an encoding that is unknown to the parser. /* This is called for an encoding that is unknown to the parser.
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding,
it must fill in the XML_Encoding structure, and return 1.
Otherwise it must return 0.
If info does not describe a suitable encoding,
then the parser will return an XML_UNKNOWN_ENCODING error. */
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding, it must
fill in the XML_Encoding structure, and return XML_STATUS_OK.
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
return an XML_UNKNOWN_ENCODING error.
*/
typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name, const XML_Char *name,
XML_Encoding *info); XML_Encoding *info);
...@@ -429,17 +543,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser, ...@@ -429,17 +543,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser,
XML_EndCdataSectionHandler end); XML_EndCdataSectionHandler end);
/* This sets the default handler and also inhibits expansion of /* This sets the default handler and also inhibits expansion of
internal entities. The entity reference will be passed to the default internal entities. These entity references will be passed to the
handler. */ default handler, or to the skipped entity handler, if one is set.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetDefaultHandler(XML_Parser parser, XML_SetDefaultHandler(XML_Parser parser,
XML_DefaultHandler handler); XML_DefaultHandler handler);
/* This sets the default handler but does not inhibit expansion of /* This sets the default handler but does not inhibit expansion of
internal entities. The entity reference will not be passed to the internal entities. The entity reference will not be passed to the
default handler. */ default handler.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_SetDefaultHandlerExpand(XML_Parser parser,
XML_DefaultHandler handler); XML_DefaultHandler handler);
...@@ -486,32 +600,41 @@ XMLPARSEAPI(void) ...@@ -486,32 +600,41 @@ XMLPARSEAPI(void)
XML_SetExternalEntityRefHandler(XML_Parser parser, XML_SetExternalEntityRefHandler(XML_Parser parser,
XML_ExternalEntityRefHandler handler); XML_ExternalEntityRefHandler handler);
/* If a non-null value for arg is specified here, then it will be passed /* If a non-NULL value for arg is specified here, then it will be
as the first argument to the external entity ref handler instead passed as the first argument to the external entity ref handler
of the parser object. */ instead of the parser object.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg);
XMLPARSEAPI(void)
XML_SetSkippedEntityHandler(XML_Parser parser,
XML_SkippedEntityHandler handler);
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetUnknownEncodingHandler(XML_Parser parser, XML_SetUnknownEncodingHandler(XML_Parser parser,
XML_UnknownEncodingHandler handler, XML_UnknownEncodingHandler handler,
void *encodingHandlerData); void *encodingHandlerData);
/* This can be called within a handler for a start element, end element, /* This can be called within a handler for a start element, end
processing instruction or character data. It causes the corresponding element, processing instruction or character data. It causes the
markup to be passed to the default handler. */ corresponding markup to be passed to the default handler.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_DefaultCurrent(XML_Parser parser); XML_DefaultCurrent(XML_Parser parser);
/* If do_nst is non-zero, and namespace processing is in effect, and /* If do_nst is non-zero, and namespace processing is in effect, and
a name has a prefix (i.e. an explicit namespace qualifier) then a name has a prefix (i.e. an explicit namespace qualifier) then
that name is returned as a triplet in a single that name is returned as a triplet in a single string separated by
string separated by the separator character specified when the parser the separator character specified when the parser was created: URI
was created: URI + sep + local_name + sep + prefix. + sep + local_name + sep + prefix.
If do_nst is zero, then namespace information is returned in the If do_nst is zero, then namespace information is returned in the
default manner (URI + sep + local_name) whether or not the names default manner (URI + sep + local_name) whether or not the name
has a prefix. has a prefix.
Note: Calling XML_SetReturnNSTriplet after XML_Parse or
XML_ParseBuffer has no effect.
*/ */
XMLPARSEAPI(void) XMLPARSEAPI(void)
...@@ -521,80 +644,126 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); ...@@ -521,80 +644,126 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst);
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetUserData(XML_Parser parser, void *userData); XML_SetUserData(XML_Parser parser, void *userData);
/* Returns the last value set by XML_SetUserData or null. */ /* Returns the last value set by XML_SetUserData or NULL. */
#define XML_GetUserData(parser) (*(void **)(parser)) #define XML_GetUserData(parser) (*(void **)(parser))
/* This is equivalent to supplying an encoding argument /* This is equivalent to supplying an encoding argument to
to XML_ParserCreate. It must not be called after XML_Parse XML_ParserCreate. On success XML_SetEncoding returns non-zero,
or XML_ParseBuffer. */ zero otherwise.
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
XMLPARSEAPI(int) has no effect and returns XML_STATUS_ERROR.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
/* If this function is called, then the parser will be passed /* If this function is called, then the parser will be passed as the
as the first argument to callbacks instead of userData. first argument to callbacks instead of userData. The userData will
The userData will still be accessible using XML_GetUserData. */ still be accessible using XML_GetUserData.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_UseParserAsHandlerArg(XML_Parser parser); XML_UseParserAsHandlerArg(XML_Parser parser);
/* Sets the base to be used for resolving relative URIs in system /* If useDTD == XML_TRUE is passed to this function, then the parser
identifiers in declarations. Resolving relative identifiers is left will assume that there is an external subset, even if none is
to the application: this value will be passed through as the base specified in the document. In such a case the parser will call the
argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler externalEntityRefHandler with a value of NULL for the systemId
and XML_UnparsedEntityDeclHandler. The base argument will be copied. argument (the publicId and context arguments will be NULL as well).
Returns zero if out of memory, non-zero otherwise. */ Note: If this function is called, then this must be done before
the first call to XML_Parse or XML_ParseBuffer, since it will
have no effect after that. Returns
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING.
Note: If the document does not have a DOCTYPE declaration at all,
then startDoctypeDeclHandler and endDoctypeDeclHandler will not
be called, despite an external subset being parsed.
Note: If XML_DTD is not defined when Expat is compiled, returns
XML_ERROR_FEATURE_REQUIRES_XML_DTD.
*/
XMLPARSEAPI(enum XML_Error)
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
XMLPARSEAPI(int)
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is
left to the application: this value will be passed through as the
base argument to the XML_ExternalEntityRefHandler,
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base
argument will be copied. Returns XML_STATUS_ERROR if out of memory,
XML_STATUS_OK otherwise.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetBase(XML_Parser parser, const XML_Char *base); XML_SetBase(XML_Parser parser, const XML_Char *base);
XMLPARSEAPI(const XML_Char *) XMLPARSEAPI(const XML_Char *)
XML_GetBase(XML_Parser parser); XML_GetBase(XML_Parser parser);
/* Returns the number of the attribute/value pairs passed in last call /* Returns the number of the attribute/value pairs passed in last call
to the XML_StartElementHandler that were specified in the start-tag to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the this correspondds to an index into the atts array passed to the
XML_StartElementHandler. */ XML_StartElementHandler.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetSpecifiedAttributeCount(XML_Parser parser); XML_GetSpecifiedAttributeCount(XML_Parser parser);
/* Returns the index of the ID attribute passed in the last call to /* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute. Each XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an index attribute/value pair counts as 2; thus this correspondds to an
into the atts array passed to the XML_StartElementHandler. */ index into the atts array passed to the XML_StartElementHandler.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetIdAttributeIndex(XML_Parser parser); XML_GetIdAttributeIndex(XML_Parser parser);
/* Parses some input. Returns 0 if a fatal error is detected. /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
The last call to XML_Parse must have isFinal true; detected. The last call to XML_Parse must have isFinal true; len
len may be zero for this call (or any other). */ may be zero for this call (or any other).
XMLPARSEAPI(int)
The XML_Status enum gives the possible return values for the
XML_Parse and XML_ParseBuffer functions. Though the return values
for these functions has always been described as a Boolean value,
the implementation, at least for the 1.95.x series, has always
returned exactly one of these values. The preprocessor #defines
are included so this stanza can be added to code that still needs
to support older versions of Expat 1.95.x:
#ifndef XML_STATUS_OK
#define XML_STATUS_OK 1
#define XML_STATUS_ERROR 0
#endif
Otherwise, the #define hackery is quite ugly and would have been dropped.
*/
enum XML_Status {
XML_STATUS_ERROR = 0,
#define XML_STATUS_ERROR XML_STATUS_ERROR
XML_STATUS_OK = 1
#define XML_STATUS_OK XML_STATUS_OK
};
XMLPARSEAPI(enum XML_Status)
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); XML_Parse(XML_Parser parser, const char *s, int len, int isFinal);
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_GetBuffer(XML_Parser parser, int len); XML_GetBuffer(XML_Parser parser, int len);
XMLPARSEAPI(int) XMLPARSEAPI(enum XML_Status)
XML_ParseBuffer(XML_Parser parser, int len, int isFinal); XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
/* Creates an XML_Parser object that can parse an external general /* Creates an XML_Parser object that can parse an external general
entity; context is a '\0'-terminated string specifying the parse entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of the context; encoding is a '\0'-terminated string giving the name of
externally specified encoding, or null if there is no externally the externally specified encoding, or NULL if there is no
specified encoding. The context string consists of a sequence of externally specified encoding. The context string consists of a
tokens separated by formfeeds (\f); a token consisting of a name sequence of tokens separated by formfeeds (\f); a token consisting
specifies that the general entity of the name is open; a token of the of a name specifies that the general entity of the name is open; a
form prefix=uri specifies the namespace for a particular prefix; a token of the form prefix=uri specifies the namespace for a
token of the form =uri specifies the default namespace. This can be particular prefix; a token of the form =uri specifies the default
called at any point after the first call to an namespace. This can be called at any point after the first call to
ExternalEntityRefHandler so longer as the parser has not yet been an ExternalEntityRefHandler so longer as the parser has not yet
freed. The new parser is completely independent and may safely be been freed. The new parser is completely independent and may
used in a separate thread. The handlers and userData are initialized safely be used in a separate thread. The handlers and userData are
from the parser argument. Returns 0 if out of memory. Otherwise initialized from the parser argument. Returns NULL if out of memory.
returns a new XML_Parser object. */ Otherwise returns a new XML_Parser object.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ExternalEntityParserCreate(XML_Parser parser, XML_ExternalEntityParserCreate(XML_Parser parser,
const XML_Char *context, const XML_Char *context,
...@@ -607,76 +776,56 @@ enum XML_ParamEntityParsing { ...@@ -607,76 +776,56 @@ enum XML_ParamEntityParsing {
}; };
/* Controls parsing of parameter entities (including the external DTD /* Controls parsing of parameter entities (including the external DTD
subset). If parsing of parameter entities is enabled, then references subset). If parsing of parameter entities is enabled, then
to external parameter entities (including the external DTD subset) references to external parameter entities (including the external
will be passed to the handler set with DTD subset) will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0. XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can only
be parsed synchronously. If the external parameter entity is to be Unlike external general entities, external parameter entities can
parsed, it must be parsed during the call to the external entity ref only be parsed synchronously. If the external parameter entity is
handler: the complete sequence of XML_ExternalEntityParserCreate, to be parsed, it must be parsed during the call to the external
XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during entity ref handler: the complete sequence of
this call. After XML_ExternalEntityParserCreate has been called to XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and
create the parser for the external parameter entity (context must be 0 XML_ParserFree calls must be made during this call. After
for this call), it is illegal to make any calls on the old parser XML_ExternalEntityParserCreate has been called to create the parser
until XML_ParserFree has been called on the newly created parser. If for the external parameter entity (context must be 0 for this
the library has been compiled without support for parameter entity call), it is illegal to make any calls on the old parser until
parsing (ie without XML_DTD being defined), then XML_ParserFree has been called on the newly created parser.
XML_SetParamEntityParsing will return 0 if parsing of parameter If the library has been compiled without support for parameter
entities is requested; otherwise it will return non-zero. */ entity parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero.
Note: If XML_SetParamEntityParsing is called after XML_Parse or
XML_ParseBuffer, then it has no effect and will always return 0.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_SetParamEntityParsing(XML_Parser parser, XML_SetParamEntityParsing(XML_Parser parser,
enum XML_ParamEntityParsing parsing); enum XML_ParamEntityParsing parsing);
enum XML_Error { /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_ERROR_NONE, XML_GetErrorCode returns information about the error.
XML_ERROR_NO_MEMORY, */
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE
};
/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode
returns information about the error. */
XMLPARSEAPI(enum XML_Error) XMLPARSEAPI(enum XML_Error)
XML_GetErrorCode(XML_Parser parser); XML_GetErrorCode(XML_Parser parser);
/* These functions return information about the current parse location. /* These functions return information about the current parse
They may be called when XML_Parse or XML_ParseBuffer return 0; location. They may be called from any callback called to report
in this case the location is the location of the character at which some parse event; in this case the location is the location of
the error was detected. the first of the sequence of characters that generated the event.
They may also be called from any other callback called to report
some parse event; in this the location is the location of the first
of the sequence of characters that generated the event. */
They may also be called after returning from a call to XML_Parse
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
the location is the location of the character at which the error
was detected; otherwise the location is the location of the last
parse event, as described above.
*/
XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser);
XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser);
XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser); XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser);
/* Return the number of bytes in the current event. /* Return the number of bytes in the current event.
Returns 0 if the event is in an internal entity. */ Returns 0 if the event is in an internal entity.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser); XML_GetCurrentByteCount(XML_Parser parser);
...@@ -684,12 +833,12 @@ XML_GetCurrentByteCount(XML_Parser parser); ...@@ -684,12 +833,12 @@ XML_GetCurrentByteCount(XML_Parser parser);
the integer pointed to by offset to the offset within this buffer the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise to the size of this buffer (the number of input bytes). Otherwise
returns a null pointer. Also returns a null pointer if a parse isn't returns a NULL pointer. Also returns a NULL pointer if a parse isn't
active. active.
NOTE: The character pointer returned should not be used outside NOTE: The character pointer returned should not be used outside
the handler that makes the call. */ the handler that makes the call.
*/
XMLPARSEAPI(const char *) XMLPARSEAPI(const char *)
XML_GetInputContext(XML_Parser parser, XML_GetInputContext(XML_Parser parser,
int *offset, int *offset,
...@@ -700,13 +849,27 @@ XML_GetInputContext(XML_Parser parser, ...@@ -700,13 +849,27 @@ XML_GetInputContext(XML_Parser parser,
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex #define XML_GetErrorByteIndex XML_GetCurrentByteIndex
/* Frees the content model passed to the element declaration handler */
XMLPARSEAPI(void)
XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
XML_MemFree(XML_Parser parser, void *ptr);
/* Frees memory used by the parser. */ /* Frees memory used by the parser. */
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_ParserFree(XML_Parser parser); XML_ParserFree(XML_Parser parser);
/* Returns a string describing the error. */ /* Returns a string describing the error. */
XMLPARSEAPI(const XML_LChar *) XMLPARSEAPI(const XML_LChar *)
XML_ErrorString(int code); XML_ErrorString(enum XML_Error code);
/* Return a string containing the version number of this expat */ /* Return a string containing the version number of this expat */
XMLPARSEAPI(const XML_LChar *) XMLPARSEAPI(const XML_LChar *)
...@@ -719,18 +882,42 @@ typedef struct { ...@@ -719,18 +882,42 @@ typedef struct {
} XML_Expat_Version; } XML_Expat_Version;
/* Return an XML_Expat_Version structure containing numeric version /* Return an XML_Expat_Version structure containing numeric version
number information for this version of expat */ number information for this version of expat.
*/
XMLPARSEAPI(XML_Expat_Version) XMLPARSEAPI(XML_Expat_Version)
XML_ExpatVersionInfo(void); XML_ExpatVersionInfo(void);
/* VERSION is not defined in expat.h.in, but it really belongs here, /* Added in Expat 1.95.5. */
and defining it on the command line gives difficulties with MSVC. */ enum XML_FeatureEnum {
#define VERSION "1.95.2" XML_FEATURE_END = 0,
XML_FEATURE_UNICODE,
XML_FEATURE_UNICODE_WCHAR_T,
XML_FEATURE_DTD,
XML_FEATURE_CONTEXT_BYTES,
XML_FEATURE_MIN_SIZE,
XML_FEATURE_SIZEOF_XML_CHAR,
XML_FEATURE_SIZEOF_XML_LCHAR
/* Additional features must be added to the end of this enum. */
};
typedef struct {
enum XML_FeatureEnum feature;
const XML_LChar *name;
long int value;
} XML_Feature;
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/
#define XML_MAJOR_VERSION 1 #define XML_MAJOR_VERSION 1
#define XML_MINOR_VERSION 95 #define XML_MINOR_VERSION 95
#define XML_MICRO_VERSION 2 #define XML_MICRO_VERSION 6
#ifdef __cplusplus #ifdef __cplusplus
} }
......
/* internal.h
Internal definitions used by Expat. This is not needed to compile
client code.
The following calling convention macros are defined for frequently
called functions:
FASTCALL - Used for those internal functions that have a simple
body and a low number of arguments and local variables.
PTRCALL - Used for functions called though function pointers.
PTRFASTCALL - Like PTRCALL, but for low number of arguments.
inline - Used for selected internal functions for which inlining
may improve performance on some platforms.
Note: Use of these macros is based on judgement, not hard rules,
and therefore subject to change.
*/
#if defined(__GNUC__)
/* Instability reported with egcs on a RedHat Linux 7.3.
Let's comment it out:
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRCALL
#define PTRFASTCALL __attribute__((regparm(3)))
#elif defined(WIN32)
/* Using __fastcall seems to have an unexpected negative effect under
MS VC++, especially for function pointers, so we won't use it for
now on that platform. It may be reconsidered for a future release
if it can be made more effective.
Likely reason: __fastcall on Windows is like stdcall, therefore
the compiler cannot perform stack optimizations for call clusters.
*/
#define FASTCALL
#define PTRCALL
#define PTRFASTCALL
#endif
#ifndef FASTCALL
#define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif /* __GNUC__ */
#endif
#endif /* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
#else
#ifndef inline
#define inline
#endif
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifdef COMPILED_FROM_DSP #ifdef COMPILED_FROM_DSP
# include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else #else
#ifdef HAVE_CONFIG_H #include <expat_config.h>
# include <config.h>
#endif
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmlrole.h" #include "xmlrole.h"
#include "ascii.h" #include "ascii.h"
...@@ -21,29 +21,56 @@ See the file COPYING for copying permission. ...@@ -21,29 +21,56 @@ See the file COPYING for copying permission.
*/ */
static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; static const char KW_ANY[] = {
static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; ASCII_A, ASCII_N, ASCII_Y, '\0' };
static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_ATTLIST[] = {
static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; static const char KW_CDATA[] = {
static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; static const char KW_DOCTYPE[] = {
static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; static const char KW_ELEMENT[] = {
static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; static const char KW_EMPTY[] = {
static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; static const char KW_ENTITIES[] = {
static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; '\0' };
static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_ENTITY[] = {
static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; static const char KW_FIXED[] = {
static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_ID[] = {
static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; ASCII_I, ASCII_D, '\0' };
static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; static const char KW_IDREF[] = {
static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
static const char KW_IDREFS[] = {
ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
static const char KW_IGNORE[] = {
ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
static const char KW_IMPLIED[] = {
ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
static const char KW_INCLUDE[] = {
ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
static const char KW_NDATA[] = {
ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_NMTOKEN[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
static const char KW_NMTOKENS[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
'\0' };
static const char KW_NOTATION[] =
{ ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
'\0' };
static const char KW_PCDATA[] = {
ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_PUBLIC[] = {
ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
static const char KW_REQUIRED[] = {
ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
'\0' };
static const char KW_SYSTEM[] = {
ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
#ifndef MIN_BYTES_PER_CHAR #ifndef MIN_BYTES_PER_CHAR
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
...@@ -58,7 +85,7 @@ static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, A ...@@ -58,7 +85,7 @@ static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, A
#define setTopLevel(state) ((state)->handler = internalSubset) #define setTopLevel(state) ((state)->handler = internalSubset)
#endif /* not XML_DTD */ #endif /* not XML_DTD */
typedef int PROLOG_HANDLER(PROLOG_STATE *state, typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -69,7 +96,7 @@ static PROLOG_HANDLER ...@@ -69,7 +96,7 @@ static PROLOG_HANDLER
doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
internalSubset, internalSubset,
entity0, entity1, entity2, entity3, entity4, entity5, entity6, entity0, entity1, entity2, entity3, entity4, entity5, entity6,
entity7, entity8, entity9, entity7, entity8, entity9, entity10,
notation0, notation1, notation2, notation3, notation4, notation0, notation1, notation2, notation3, notation4,
attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
attlist7, attlist8, attlist9, attlist7, attlist8, attlist9,
...@@ -82,11 +109,10 @@ static PROLOG_HANDLER ...@@ -82,11 +109,10 @@ static PROLOG_HANDLER
declClose, declClose,
error; error;
static static int FASTCALL common(PROLOG_STATE *state, int tok);
int common(PROLOG_STATE *state, int tok);
static static int PTRCALL
int prolog0(PROLOG_STATE *state, prolog0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -101,9 +127,10 @@ int prolog0(PROLOG_STATE *state, ...@@ -101,9 +127,10 @@ int prolog0(PROLOG_STATE *state,
return XML_ROLE_XML_DECL; return XML_ROLE_XML_DECL;
case XML_TOK_PI: case XML_TOK_PI:
state->handler = prolog1; state->handler = prolog1;
return XML_ROLE_NONE; return XML_ROLE_PI;
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
state->handler = prolog1; state->handler = prolog1;
return XML_ROLE_COMMENT;
case XML_TOK_BOM: case XML_TOK_BOM:
return XML_ROLE_NONE; return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN: case XML_TOK_DECL_OPEN:
...@@ -113,7 +140,7 @@ int prolog0(PROLOG_STATE *state, ...@@ -113,7 +140,7 @@ int prolog0(PROLOG_STATE *state,
KW_DOCTYPE)) KW_DOCTYPE))
break; break;
state->handler = doctype0; state->handler = doctype0;
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_INSTANCE_START: case XML_TOK_INSTANCE_START:
state->handler = error; state->handler = error;
return XML_ROLE_INSTANCE_START; return XML_ROLE_INSTANCE_START;
...@@ -121,8 +148,8 @@ int prolog0(PROLOG_STATE *state, ...@@ -121,8 +148,8 @@ int prolog0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int prolog1(PROLOG_STATE *state, prolog1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -132,7 +159,9 @@ int prolog1(PROLOG_STATE *state, ...@@ -132,7 +159,9 @@ int prolog1(PROLOG_STATE *state,
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NONE;
case XML_TOK_PI: case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
return XML_ROLE_COMMENT;
case XML_TOK_BOM: case XML_TOK_BOM:
return XML_ROLE_NONE; return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN: case XML_TOK_DECL_OPEN:
...@@ -142,7 +171,7 @@ int prolog1(PROLOG_STATE *state, ...@@ -142,7 +171,7 @@ int prolog1(PROLOG_STATE *state,
KW_DOCTYPE)) KW_DOCTYPE))
break; break;
state->handler = doctype0; state->handler = doctype0;
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_INSTANCE_START: case XML_TOK_INSTANCE_START:
state->handler = error; state->handler = error;
return XML_ROLE_INSTANCE_START; return XML_ROLE_INSTANCE_START;
...@@ -150,8 +179,8 @@ int prolog1(PROLOG_STATE *state, ...@@ -150,8 +179,8 @@ int prolog1(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int prolog2(PROLOG_STATE *state, prolog2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -161,8 +190,9 @@ int prolog2(PROLOG_STATE *state, ...@@ -161,8 +190,9 @@ int prolog2(PROLOG_STATE *state,
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NONE;
case XML_TOK_PI: case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
return XML_ROLE_NONE; return XML_ROLE_COMMENT;
case XML_TOK_INSTANCE_START: case XML_TOK_INSTANCE_START:
state->handler = error; state->handler = error;
return XML_ROLE_INSTANCE_START; return XML_ROLE_INSTANCE_START;
...@@ -170,8 +200,8 @@ int prolog2(PROLOG_STATE *state, ...@@ -170,8 +200,8 @@ int prolog2(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype0(PROLOG_STATE *state, doctype0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -179,7 +209,7 @@ int doctype0(PROLOG_STATE *state, ...@@ -179,7 +209,7 @@ int doctype0(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
state->handler = doctype1; state->handler = doctype1;
...@@ -188,8 +218,8 @@ int doctype0(PROLOG_STATE *state, ...@@ -188,8 +218,8 @@ int doctype0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype1(PROLOG_STATE *state, doctype1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -197,7 +227,7 @@ int doctype1(PROLOG_STATE *state, ...@@ -197,7 +227,7 @@ int doctype1(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_OPEN_BRACKET: case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset; state->handler = internalSubset;
return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
...@@ -207,19 +237,19 @@ int doctype1(PROLOG_STATE *state, ...@@ -207,19 +237,19 @@ int doctype1(PROLOG_STATE *state,
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = doctype3; state->handler = doctype3;
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = doctype2; state->handler = doctype2;
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
} }
break; break;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype2(PROLOG_STATE *state, doctype2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -227,7 +257,7 @@ int doctype2(PROLOG_STATE *state, ...@@ -227,7 +257,7 @@ int doctype2(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = doctype3; state->handler = doctype3;
return XML_ROLE_DOCTYPE_PUBLIC_ID; return XML_ROLE_DOCTYPE_PUBLIC_ID;
...@@ -235,8 +265,8 @@ int doctype2(PROLOG_STATE *state, ...@@ -235,8 +265,8 @@ int doctype2(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype3(PROLOG_STATE *state, doctype3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -244,7 +274,7 @@ int doctype3(PROLOG_STATE *state, ...@@ -244,7 +274,7 @@ int doctype3(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = doctype4; state->handler = doctype4;
return XML_ROLE_DOCTYPE_SYSTEM_ID; return XML_ROLE_DOCTYPE_SYSTEM_ID;
...@@ -252,8 +282,8 @@ int doctype3(PROLOG_STATE *state, ...@@ -252,8 +282,8 @@ int doctype3(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype4(PROLOG_STATE *state, doctype4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -261,7 +291,7 @@ int doctype4(PROLOG_STATE *state, ...@@ -261,7 +291,7 @@ int doctype4(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_OPEN_BRACKET: case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset; state->handler = internalSubset;
return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
...@@ -272,8 +302,8 @@ int doctype4(PROLOG_STATE *state, ...@@ -272,8 +302,8 @@ int doctype4(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int doctype5(PROLOG_STATE *state, doctype5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -281,7 +311,7 @@ int doctype5(PROLOG_STATE *state, ...@@ -281,7 +311,7 @@ int doctype5(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_DECL_CLOSE: case XML_TOK_DECL_CLOSE:
state->handler = prolog2; state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE; return XML_ROLE_DOCTYPE_CLOSE;
...@@ -289,8 +319,8 @@ int doctype5(PROLOG_STATE *state, ...@@ -289,8 +319,8 @@ int doctype5(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int internalSubset(PROLOG_STATE *state, internalSubset(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -305,46 +335,47 @@ int internalSubset(PROLOG_STATE *state, ...@@ -305,46 +335,47 @@ int internalSubset(PROLOG_STATE *state,
end, end,
KW_ENTITY)) { KW_ENTITY)) {
state->handler = entity0; state->handler = entity0;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
if (XmlNameMatchesAscii(enc, if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc), ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end, end,
KW_ATTLIST)) { KW_ATTLIST)) {
state->handler = attlist0; state->handler = attlist0;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
if (XmlNameMatchesAscii(enc, if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc), ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end, end,
KW_ELEMENT)) { KW_ELEMENT)) {
state->handler = element0; state->handler = element0;
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
} }
if (XmlNameMatchesAscii(enc, if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc), ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end, end,
KW_NOTATION)) { KW_NOTATION)) {
state->handler = notation0; state->handler = notation0;
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
} }
break; break;
case XML_TOK_PI: case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
return XML_ROLE_NONE; return XML_ROLE_COMMENT;
case XML_TOK_PARAM_ENTITY_REF: case XML_TOK_PARAM_ENTITY_REF:
return XML_ROLE_PARAM_ENTITY_REF; return XML_ROLE_PARAM_ENTITY_REF;
case XML_TOK_CLOSE_BRACKET: case XML_TOK_CLOSE_BRACKET:
state->handler = doctype5; state->handler = doctype5;
return XML_ROLE_NONE; return XML_ROLE_DOCTYPE_NONE;
} }
return common(state, tok); return common(state, tok);
} }
#ifdef XML_DTD #ifdef XML_DTD
static static int PTRCALL
int externalSubset0(PROLOG_STATE *state, externalSubset0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -356,8 +387,8 @@ int externalSubset0(PROLOG_STATE *state, ...@@ -356,8 +387,8 @@ int externalSubset0(PROLOG_STATE *state,
return externalSubset1(state, tok, ptr, end, enc); return externalSubset1(state, tok, ptr, end, enc);
} }
static static int PTRCALL
int externalSubset1(PROLOG_STATE *state, externalSubset1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -388,8 +419,8 @@ int externalSubset1(PROLOG_STATE *state, ...@@ -388,8 +419,8 @@ int externalSubset1(PROLOG_STATE *state,
#endif /* XML_DTD */ #endif /* XML_DTD */
static static int PTRCALL
int entity0(PROLOG_STATE *state, entity0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -397,10 +428,10 @@ int entity0(PROLOG_STATE *state, ...@@ -397,10 +428,10 @@ int entity0(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_PERCENT: case XML_TOK_PERCENT:
state->handler = entity1; state->handler = entity1;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
state->handler = entity2; state->handler = entity2;
return XML_ROLE_GENERAL_ENTITY_NAME; return XML_ROLE_GENERAL_ENTITY_NAME;
...@@ -408,8 +439,8 @@ int entity0(PROLOG_STATE *state, ...@@ -408,8 +439,8 @@ int entity0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity1(PROLOG_STATE *state, entity1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -417,7 +448,7 @@ int entity1(PROLOG_STATE *state, ...@@ -417,7 +448,7 @@ int entity1(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
state->handler = entity7; state->handler = entity7;
return XML_ROLE_PARAM_ENTITY_NAME; return XML_ROLE_PARAM_ENTITY_NAME;
...@@ -425,8 +456,8 @@ int entity1(PROLOG_STATE *state, ...@@ -425,8 +456,8 @@ int entity1(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity2(PROLOG_STATE *state, entity2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -434,26 +465,27 @@ int entity2(PROLOG_STATE *state, ...@@ -434,26 +465,27 @@ int entity2(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity4; state->handler = entity4;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity3; state->handler = entity3;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
break; break;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_VALUE; return XML_ROLE_ENTITY_VALUE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity3(PROLOG_STATE *state, entity3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -461,7 +493,7 @@ int entity3(PROLOG_STATE *state, ...@@ -461,7 +493,7 @@ int entity3(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = entity4; state->handler = entity4;
return XML_ROLE_ENTITY_PUBLIC_ID; return XML_ROLE_ENTITY_PUBLIC_ID;
...@@ -469,9 +501,8 @@ int entity3(PROLOG_STATE *state, ...@@ -469,9 +501,8 @@ int entity3(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static int PTRCALL
static entity4(PROLOG_STATE *state,
int entity4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -479,7 +510,7 @@ int entity4(PROLOG_STATE *state, ...@@ -479,7 +510,7 @@ int entity4(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = entity5; state->handler = entity5;
return XML_ROLE_ENTITY_SYSTEM_ID; return XML_ROLE_ENTITY_SYSTEM_ID;
...@@ -487,8 +518,8 @@ int entity4(PROLOG_STATE *state, ...@@ -487,8 +518,8 @@ int entity4(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity5(PROLOG_STATE *state, entity5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -496,22 +527,22 @@ int entity5(PROLOG_STATE *state, ...@@ -496,22 +527,22 @@ int entity5(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_DECL_CLOSE: case XML_TOK_DECL_CLOSE:
setTopLevel(state); setTopLevel(state);
return XML_ROLE_ENTITY_COMPLETE; return XML_ROLE_ENTITY_COMPLETE;
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
state->handler = entity6; state->handler = entity6;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
break; break;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity6(PROLOG_STATE *state, entity6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -519,16 +550,17 @@ int entity6(PROLOG_STATE *state, ...@@ -519,16 +550,17 @@ int entity6(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_NOTATION_NAME; return XML_ROLE_ENTITY_NOTATION_NAME;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity7(PROLOG_STATE *state, entity7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -536,26 +568,27 @@ int entity7(PROLOG_STATE *state, ...@@ -536,26 +568,27 @@ int entity7(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity9; state->handler = entity9;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity8; state->handler = entity8;
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
} }
break; break;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_VALUE; return XML_ROLE_ENTITY_VALUE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity8(PROLOG_STATE *state, entity8(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -563,7 +596,7 @@ int entity8(PROLOG_STATE *state, ...@@ -563,7 +596,7 @@ int entity8(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = entity9; state->handler = entity9;
return XML_ROLE_ENTITY_PUBLIC_ID; return XML_ROLE_ENTITY_PUBLIC_ID;
...@@ -571,8 +604,8 @@ int entity8(PROLOG_STATE *state, ...@@ -571,8 +604,8 @@ int entity8(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int entity9(PROLOG_STATE *state, entity9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -580,16 +613,16 @@ int entity9(PROLOG_STATE *state, ...@@ -580,16 +613,16 @@ int entity9(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = declClose; state->handler = entity10;
return XML_ROLE_ENTITY_SYSTEM_ID; return XML_ROLE_ENTITY_SYSTEM_ID;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int notation0(PROLOG_STATE *state, entity10(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -597,7 +630,24 @@ int notation0(PROLOG_STATE *state, ...@@ -597,7 +630,24 @@ int notation0(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ENTITY_NONE;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
return XML_ROLE_ENTITY_COMPLETE;
}
return common(state, tok);
}
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
state->handler = notation1; state->handler = notation1;
return XML_ROLE_NOTATION_NAME; return XML_ROLE_NOTATION_NAME;
...@@ -605,8 +655,8 @@ int notation0(PROLOG_STATE *state, ...@@ -605,8 +655,8 @@ int notation0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int notation1(PROLOG_STATE *state, notation1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -614,23 +664,23 @@ int notation1(PROLOG_STATE *state, ...@@ -614,23 +664,23 @@ int notation1(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = notation3; state->handler = notation3;
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = notation2; state->handler = notation2;
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
} }
break; break;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int notation2(PROLOG_STATE *state, notation2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -638,7 +688,7 @@ int notation2(PROLOG_STATE *state, ...@@ -638,7 +688,7 @@ int notation2(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = notation4; state->handler = notation4;
return XML_ROLE_NOTATION_PUBLIC_ID; return XML_ROLE_NOTATION_PUBLIC_ID;
...@@ -646,8 +696,8 @@ int notation2(PROLOG_STATE *state, ...@@ -646,8 +696,8 @@ int notation2(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int notation3(PROLOG_STATE *state, notation3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -655,16 +705,17 @@ int notation3(PROLOG_STATE *state, ...@@ -655,16 +705,17 @@ int notation3(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_NOTATION_NONE;
return XML_ROLE_NOTATION_SYSTEM_ID; return XML_ROLE_NOTATION_SYSTEM_ID;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int notation4(PROLOG_STATE *state, notation4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -672,9 +723,10 @@ int notation4(PROLOG_STATE *state, ...@@ -672,9 +723,10 @@ int notation4(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_NOTATION_NONE;
return XML_ROLE_NOTATION_SYSTEM_ID; return XML_ROLE_NOTATION_SYSTEM_ID;
case XML_TOK_DECL_CLOSE: case XML_TOK_DECL_CLOSE:
setTopLevel(state); setTopLevel(state);
...@@ -683,8 +735,8 @@ int notation4(PROLOG_STATE *state, ...@@ -683,8 +735,8 @@ int notation4(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist0(PROLOG_STATE *state, attlist0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -692,7 +744,7 @@ int attlist0(PROLOG_STATE *state, ...@@ -692,7 +744,7 @@ int attlist0(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
state->handler = attlist1; state->handler = attlist1;
...@@ -701,8 +753,8 @@ int attlist0(PROLOG_STATE *state, ...@@ -701,8 +753,8 @@ int attlist0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist1(PROLOG_STATE *state, attlist1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -710,10 +762,10 @@ int attlist1(PROLOG_STATE *state, ...@@ -710,10 +762,10 @@ int attlist1(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_DECL_CLOSE: case XML_TOK_DECL_CLOSE:
setTopLevel(state); setTopLevel(state);
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
state->handler = attlist2; state->handler = attlist2;
...@@ -722,8 +774,8 @@ int attlist1(PROLOG_STATE *state, ...@@ -722,8 +774,8 @@ int attlist1(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist2(PROLOG_STATE *state, attlist2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -731,7 +783,7 @@ int attlist2(PROLOG_STATE *state, ...@@ -731,7 +783,7 @@ int attlist2(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
{ {
static const char *types[] = { static const char *types[] = {
...@@ -753,18 +805,18 @@ int attlist2(PROLOG_STATE *state, ...@@ -753,18 +805,18 @@ int attlist2(PROLOG_STATE *state,
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
state->handler = attlist5; state->handler = attlist5;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
break; break;
case XML_TOK_OPEN_PAREN: case XML_TOK_OPEN_PAREN:
state->handler = attlist3; state->handler = attlist3;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist3(PROLOG_STATE *state, attlist3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -772,7 +824,7 @@ int attlist3(PROLOG_STATE *state, ...@@ -772,7 +824,7 @@ int attlist3(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NMTOKEN: case XML_TOK_NMTOKEN:
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
...@@ -782,8 +834,8 @@ int attlist3(PROLOG_STATE *state, ...@@ -782,8 +834,8 @@ int attlist3(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist4(PROLOG_STATE *state, attlist4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -791,19 +843,19 @@ int attlist4(PROLOG_STATE *state, ...@@ -791,19 +843,19 @@ int attlist4(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN:
state->handler = attlist8; state->handler = attlist8;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OR: case XML_TOK_OR:
state->handler = attlist3; state->handler = attlist3;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist5(PROLOG_STATE *state, attlist5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -811,17 +863,16 @@ int attlist5(PROLOG_STATE *state, ...@@ -811,17 +863,16 @@ int attlist5(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OPEN_PAREN: case XML_TOK_OPEN_PAREN:
state->handler = attlist6; state->handler = attlist6;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
return common(state, tok); return common(state, tok);
} }
static int PTRCALL
static attlist6(PROLOG_STATE *state,
int attlist6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -829,7 +880,7 @@ int attlist6(PROLOG_STATE *state, ...@@ -829,7 +880,7 @@ int attlist6(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
state->handler = attlist7; state->handler = attlist7;
return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
...@@ -837,8 +888,8 @@ int attlist6(PROLOG_STATE *state, ...@@ -837,8 +888,8 @@ int attlist6(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist7(PROLOG_STATE *state, attlist7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -846,20 +897,20 @@ int attlist7(PROLOG_STATE *state, ...@@ -846,20 +897,20 @@ int attlist7(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN:
state->handler = attlist8; state->handler = attlist8;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OR: case XML_TOK_OR:
state->handler = attlist6; state->handler = attlist6;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
return common(state, tok); return common(state, tok);
} }
/* default value */ /* default value */
static static int PTRCALL
int attlist8(PROLOG_STATE *state, attlist8(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -867,7 +918,7 @@ int attlist8(PROLOG_STATE *state, ...@@ -867,7 +918,7 @@ int attlist8(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_POUND_NAME: case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc, if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc), ptr + MIN_BYTES_PER_CHAR(enc),
...@@ -888,7 +939,7 @@ int attlist8(PROLOG_STATE *state, ...@@ -888,7 +939,7 @@ int attlist8(PROLOG_STATE *state,
end, end,
KW_FIXED)) { KW_FIXED)) {
state->handler = attlist9; state->handler = attlist9;
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
} }
break; break;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
...@@ -898,8 +949,8 @@ int attlist8(PROLOG_STATE *state, ...@@ -898,8 +949,8 @@ int attlist8(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int attlist9(PROLOG_STATE *state, attlist9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -907,7 +958,7 @@ int attlist9(PROLOG_STATE *state, ...@@ -907,7 +958,7 @@ int attlist9(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ATTLIST_NONE;
case XML_TOK_LITERAL: case XML_TOK_LITERAL:
state->handler = attlist1; state->handler = attlist1;
return XML_ROLE_FIXED_ATTRIBUTE_VALUE; return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
...@@ -915,8 +966,8 @@ int attlist9(PROLOG_STATE *state, ...@@ -915,8 +966,8 @@ int attlist9(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element0(PROLOG_STATE *state, element0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -924,7 +975,7 @@ int element0(PROLOG_STATE *state, ...@@ -924,7 +975,7 @@ int element0(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
state->handler = element1; state->handler = element1;
...@@ -933,8 +984,8 @@ int element0(PROLOG_STATE *state, ...@@ -933,8 +984,8 @@ int element0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element1(PROLOG_STATE *state, element1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -942,14 +993,16 @@ int element1(PROLOG_STATE *state, ...@@ -942,14 +993,16 @@ int element1(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_CONTENT_EMPTY; return XML_ROLE_CONTENT_EMPTY;
} }
if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_CONTENT_ANY; return XML_ROLE_CONTENT_ANY;
} }
break; break;
...@@ -961,8 +1014,8 @@ int element1(PROLOG_STATE *state, ...@@ -961,8 +1014,8 @@ int element1(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element2(PROLOG_STATE *state, element2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -970,7 +1023,7 @@ int element2(PROLOG_STATE *state, ...@@ -970,7 +1023,7 @@ int element2(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_POUND_NAME: case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc, if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc), ptr + MIN_BYTES_PER_CHAR(enc),
...@@ -1001,8 +1054,8 @@ int element2(PROLOG_STATE *state, ...@@ -1001,8 +1054,8 @@ int element2(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element3(PROLOG_STATE *state, element3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1010,22 +1063,24 @@ int element3(PROLOG_STATE *state, ...@@ -1010,22 +1063,24 @@ int element3(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE; return XML_ROLE_GROUP_CLOSE;
case XML_TOK_CLOSE_PAREN_ASTERISK: case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE_REP; return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR: case XML_TOK_OR:
state->handler = element4; state->handler = element4;
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element4(PROLOG_STATE *state, element4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1033,7 +1088,7 @@ int element4(PROLOG_STATE *state, ...@@ -1033,7 +1088,7 @@ int element4(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME: case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME: case XML_TOK_PREFIXED_NAME:
state->handler = element5; state->handler = element5;
...@@ -1042,8 +1097,8 @@ int element4(PROLOG_STATE *state, ...@@ -1042,8 +1097,8 @@ int element4(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element5(PROLOG_STATE *state, element5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1051,19 +1106,20 @@ int element5(PROLOG_STATE *state, ...@@ -1051,19 +1106,20 @@ int element5(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN_ASTERISK: case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE_REP; return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR: case XML_TOK_OR:
state->handler = element4; state->handler = element4;
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
} }
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element6(PROLOG_STATE *state, element6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1071,7 +1127,7 @@ int element6(PROLOG_STATE *state, ...@@ -1071,7 +1127,7 @@ int element6(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_OPEN_PAREN: case XML_TOK_OPEN_PAREN:
state->level += 1; state->level += 1;
return XML_ROLE_GROUP_OPEN; return XML_ROLE_GROUP_OPEN;
...@@ -1092,8 +1148,8 @@ int element6(PROLOG_STATE *state, ...@@ -1092,8 +1148,8 @@ int element6(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int element7(PROLOG_STATE *state, element7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1101,26 +1157,34 @@ int element7(PROLOG_STATE *state, ...@@ -1101,26 +1157,34 @@ int element7(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN:
state->level -= 1; state->level -= 1;
if (state->level == 0) if (state->level == 0) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE; return XML_ROLE_GROUP_CLOSE;
case XML_TOK_CLOSE_PAREN_ASTERISK: case XML_TOK_CLOSE_PAREN_ASTERISK:
state->level -= 1; state->level -= 1;
if (state->level == 0) if (state->level == 0) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_REP; return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_CLOSE_PAREN_QUESTION: case XML_TOK_CLOSE_PAREN_QUESTION:
state->level -= 1; state->level -= 1;
if (state->level == 0) if (state->level == 0) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_OPT; return XML_ROLE_GROUP_CLOSE_OPT;
case XML_TOK_CLOSE_PAREN_PLUS: case XML_TOK_CLOSE_PAREN_PLUS:
state->level -= 1; state->level -= 1;
if (state->level == 0) if (state->level == 0) {
state->handler = declClose; state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_PLUS; return XML_ROLE_GROUP_CLOSE_PLUS;
case XML_TOK_COMMA: case XML_TOK_COMMA:
state->handler = element6; state->handler = element6;
...@@ -1134,8 +1198,8 @@ int element7(PROLOG_STATE *state, ...@@ -1134,8 +1198,8 @@ int element7(PROLOG_STATE *state,
#ifdef XML_DTD #ifdef XML_DTD
static static int PTRCALL
int condSect0(PROLOG_STATE *state, condSect0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1158,8 +1222,8 @@ int condSect0(PROLOG_STATE *state, ...@@ -1158,8 +1222,8 @@ int condSect0(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int condSect1(PROLOG_STATE *state, condSect1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1176,8 +1240,8 @@ int condSect1(PROLOG_STATE *state, ...@@ -1176,8 +1240,8 @@ int condSect1(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
static static int PTRCALL
int condSect2(PROLOG_STATE *state, condSect2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1195,8 +1259,8 @@ int condSect2(PROLOG_STATE *state, ...@@ -1195,8 +1259,8 @@ int condSect2(PROLOG_STATE *state,
#endif /* XML_DTD */ #endif /* XML_DTD */
static static int PTRCALL
int declClose(PROLOG_STATE *state, declClose(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1204,36 +1268,16 @@ int declClose(PROLOG_STATE *state, ...@@ -1204,36 +1268,16 @@ int declClose(PROLOG_STATE *state,
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
return XML_ROLE_NONE; return state->role_none;
case XML_TOK_DECL_CLOSE: case XML_TOK_DECL_CLOSE:
setTopLevel(state); setTopLevel(state);
return XML_ROLE_NONE; return state->role_none;
} }
return common(state, tok); return common(state, tok);
} }
#if 0 static int PTRCALL
error(PROLOG_STATE *state,
static
int ignore(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return 0;
default:
return XML_ROLE_NONE;
}
return common(state, tok);
}
#endif
static
int error(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
...@@ -1242,8 +1286,8 @@ int error(PROLOG_STATE *state, ...@@ -1242,8 +1286,8 @@ int error(PROLOG_STATE *state,
return XML_ROLE_NONE; return XML_ROLE_NONE;
} }
static static int FASTCALL
int common(PROLOG_STATE *state, int tok) common(PROLOG_STATE *state, int tok)
{ {
#ifdef XML_DTD #ifdef XML_DTD
if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
...@@ -1253,18 +1297,21 @@ int common(PROLOG_STATE *state, int tok) ...@@ -1253,18 +1297,21 @@ int common(PROLOG_STATE *state, int tok)
return XML_ROLE_ERROR; return XML_ROLE_ERROR;
} }
void XmlPrologStateInit(PROLOG_STATE *state) void
XmlPrologStateInit(PROLOG_STATE *state)
{ {
state->handler = prolog0; state->handler = prolog0;
#ifdef XML_DTD #ifdef XML_DTD
state->documentEntity = 1; state->documentEntity = 1;
state->includeLevel = 0; state->includeLevel = 0;
state->inEntityValue = 0;
#endif /* XML_DTD */ #endif /* XML_DTD */
} }
#ifdef XML_DTD #ifdef XML_DTD
void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) void
XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
{ {
state->handler = externalSubset0; state->handler = externalSubset0;
state->documentEntity = 0; state->documentEntity = 0;
......
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifndef XmlRole_INCLUDED #ifndef XmlRole_INCLUDED
#define XmlRole_INCLUDED 1 #define XmlRole_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h" #include "xmltok.h"
#ifdef __cplusplus #ifdef __cplusplus
...@@ -17,6 +22,7 @@ enum { ...@@ -17,6 +22,7 @@ enum {
XML_ROLE_NONE = 0, XML_ROLE_NONE = 0,
XML_ROLE_XML_DECL, XML_ROLE_XML_DECL,
XML_ROLE_INSTANCE_START, XML_ROLE_INSTANCE_START,
XML_ROLE_DOCTYPE_NONE,
XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_NAME,
XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_SYSTEM_ID,
XML_ROLE_DOCTYPE_PUBLIC_ID, XML_ROLE_DOCTYPE_PUBLIC_ID,
...@@ -24,11 +30,13 @@ enum { ...@@ -24,11 +30,13 @@ enum {
XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_DOCTYPE_CLOSE,
XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_GENERAL_ENTITY_NAME,
XML_ROLE_PARAM_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME,
XML_ROLE_ENTITY_NONE,
XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_VALUE,
XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_SYSTEM_ID,
XML_ROLE_ENTITY_PUBLIC_ID, XML_ROLE_ENTITY_PUBLIC_ID,
XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_COMPLETE,
XML_ROLE_ENTITY_NOTATION_NAME, XML_ROLE_ENTITY_NOTATION_NAME,
XML_ROLE_NOTATION_NONE,
XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_NAME,
XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_SYSTEM_ID,
XML_ROLE_NOTATION_NO_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID,
...@@ -44,11 +52,13 @@ enum { ...@@ -44,11 +52,13 @@ enum {
XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS,
XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_ENUM_VALUE,
XML_ROLE_ATTRIBUTE_NOTATION_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE,
XML_ROLE_ATTLIST_NONE,
XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_ATTLIST_ELEMENT_NAME,
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE,
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE,
XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE,
XML_ROLE_FIXED_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE,
XML_ROLE_ELEMENT_NONE,
XML_ROLE_ELEMENT_NAME, XML_ROLE_ELEMENT_NAME,
XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_ANY,
XML_ROLE_CONTENT_EMPTY, XML_ROLE_CONTENT_EMPTY,
...@@ -64,6 +74,8 @@ enum { ...@@ -64,6 +74,8 @@ enum {
XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_REP,
XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_OPT,
XML_ROLE_CONTENT_ELEMENT_PLUS, XML_ROLE_CONTENT_ELEMENT_PLUS,
XML_ROLE_PI,
XML_ROLE_COMMENT,
#ifdef XML_DTD #ifdef XML_DTD
XML_ROLE_TEXT_DECL, XML_ROLE_TEXT_DECL,
XML_ROLE_IGNORE_SECT, XML_ROLE_IGNORE_SECT,
...@@ -73,15 +85,17 @@ enum { ...@@ -73,15 +85,17 @@ enum {
}; };
typedef struct prolog_state { typedef struct prolog_state {
int (*handler)(struct prolog_state *state, int (PTRCALL *handler) (struct prolog_state *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
const ENCODING *enc); const ENCODING *enc);
unsigned level; unsigned level;
int role_none;
#ifdef XML_DTD #ifdef XML_DTD
unsigned includeLevel; unsigned includeLevel;
int documentEntity; int documentEntity;
int inEntityValue;
#endif /* XML_DTD */ #endif /* XML_DTD */
} PROLOG_STATE; } PROLOG_STATE;
......
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifdef COMPILED_FROM_DSP #ifdef COMPILED_FROM_DSP
# include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else #else
#ifdef HAVE_CONFIG_H /* Unused - MvL
# include <config.h> #include <expat_config.h>
#endif */
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmltok.h" #include "xmltok.h"
#include "nametab.h" #include "nametab.h"
...@@ -39,20 +41,21 @@ See the file COPYING for copying permission. ...@@ -39,20 +41,21 @@ See the file COPYING for copying permission.
#define UCS2_GET_NAMING(pages, hi, lo) \ #define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits /* A 2 byte UTF-8 representation splits the characters 11 bits between
between the bottom 5 and 6 bits of the bytes. the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
We need 8 bits to index into pages, 3 bits to add to that index and pages, 3 bits to add to that index and 5 bits to generate the mask.
5 bits to generate the mask. */ */
#define UTF8_GET_NAMING2(pages, byte) \ #define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \ + ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \ + ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F))) & (1 << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits /* A 3 byte UTF-8 representation splits the characters 16 bits between
between the bottom 4, 6 and 6 bits of the bytes. the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
We need 8 bits to index into pages, 3 bits to add to that index and into pages, 3 bits to add to that index and 5 bits to generate the
5 bits to generate the mask. */ mask.
*/
#define UTF8_GET_NAMING3(pages, byte) \ #define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \ + ((((byte)[1]) >> 2) & 0xF)] \
...@@ -68,59 +71,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and ...@@ -68,59 +71,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0)) : 0))
#define UTF8_INVALID3(p) \ /* Detection of invalid UTF-8 sequences is based on Table 3.1B
((*p) == 0xED \ of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
? (((p)[1] & 0x20) != 0) \ with the additional restriction of not allowing the Unicode
: ((*p) == 0xEF \ code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ Implementation details:
: 0)) (A & 0x80) == 0 means A < 0x80
and
(A & 0xC0) == 0xC0 means A > 0xBF
*/
#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) #define UTF8_INVALID2(p) \
((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
static #define UTF8_INVALID3(p) \
int isNever(const ENCODING *enc, const char *p) (((p)[2] & 0x80) == 0 \
|| \
((*p) == 0xEF && (p)[1] == 0xBF \
? \
(p)[2] > 0xBD \
: \
((p)[2] & 0xC0) == 0xC0) \
|| \
((*p) == 0xE0 \
? \
(p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) \
(((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
|| \
((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
|| \
((*p) == 0xF0 \
? \
(p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
isNever(const ENCODING *enc, const char *p)
{ {
return 0; return 0;
} }
static static int PTRFASTCALL
int utf8_isName2(const ENCODING *enc, const char *p) utf8_isName2(const ENCODING *enc, const char *p)
{ {
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
} }
static static int PTRFASTCALL
int utf8_isName3(const ENCODING *enc, const char *p) utf8_isName3(const ENCODING *enc, const char *p)
{ {
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
} }
#define utf8_isName4 isNever #define utf8_isName4 isNever
static static int PTRFASTCALL
int utf8_isNmstrt2(const ENCODING *enc, const char *p) utf8_isNmstrt2(const ENCODING *enc, const char *p)
{ {
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
} }
static static int PTRFASTCALL
int utf8_isNmstrt3(const ENCODING *enc, const char *p) utf8_isNmstrt3(const ENCODING *enc, const char *p)
{ {
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
} }
#define utf8_isNmstrt4 isNever #define utf8_isNmstrt4 isNever
#define utf8_isInvalid2 isNever static int PTRFASTCALL
utf8_isInvalid2(const ENCODING *enc, const char *p)
{
return UTF8_INVALID2((const unsigned char *)p);
}
static static int PTRFASTCALL
int utf8_isInvalid3(const ENCODING *enc, const char *p) utf8_isInvalid3(const ENCODING *enc, const char *p)
{ {
return UTF8_INVALID3((const unsigned char *)p); return UTF8_INVALID3((const unsigned char *)p);
} }
static static int PTRFASTCALL
int utf8_isInvalid4(const ENCODING *enc, const char *p) utf8_isInvalid4(const ENCODING *enc, const char *p)
{ {
return UTF8_INVALID4((const unsigned char *)p); return UTF8_INVALID4((const unsigned char *)p);
} }
...@@ -129,23 +170,25 @@ struct normal_encoding { ...@@ -129,23 +170,25 @@ struct normal_encoding {
ENCODING enc; ENCODING enc;
unsigned char type[256]; unsigned char type[256];
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
int (*byteType)(const ENCODING *, const char *); int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
int (*isNameMin)(const ENCODING *, const char *); int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
int (*isNmstrtMin)(const ENCODING *, const char *); int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
int (*byteToAscii)(const ENCODING *, const char *); int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
int (*charMatches)(const ENCODING *, const char *, int); int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
#endif /* XML_MIN_SIZE */ #endif /* XML_MIN_SIZE */
int (*isName2)(const ENCODING *, const char *); int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
int (*isName3)(const ENCODING *, const char *); int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
int (*isName4)(const ENCODING *, const char *); int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
int (*isNmstrt2)(const ENCODING *, const char *); int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
int (*isNmstrt3)(const ENCODING *, const char *); int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
int (*isNmstrt4)(const ENCODING *, const char *); int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
int (*isInvalid2)(const ENCODING *, const char *); int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
int (*isInvalid3)(const ENCODING *, const char *); int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
int (*isInvalid4)(const ENCODING *, const char *); int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
}; };
#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
#define STANDARD_VTABLE(E) \ #define STANDARD_VTABLE(E) \
...@@ -172,7 +215,7 @@ struct normal_encoding { ...@@ -172,7 +215,7 @@ struct normal_encoding {
E ## isInvalid3, \ E ## isInvalid3, \
E ## isInvalid4 E ## isInvalid4
static int checkCharRefNumber(int); static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h" #include "xmltok_impl.h"
#include "ascii.h" #include "ascii.h"
...@@ -193,22 +236,22 @@ static int checkCharRefNumber(int); ...@@ -193,22 +236,22 @@ static int checkCharRefNumber(int);
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
static static int PTRFASTCALL
int sb_byteType(const ENCODING *enc, const char *p) sb_byteType(const ENCODING *enc, const char *p)
{ {
return SB_BYTE_TYPE(enc, p); return SB_BYTE_TYPE(enc, p);
} }
#define BYTE_TYPE(enc, p) \ #define BYTE_TYPE(enc, p) \
(((const struct normal_encoding *)(enc))->byteType(enc, p)) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
#else #else
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
#endif #endif
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
#define BYTE_TO_ASCII(enc, p) \ #define BYTE_TO_ASCII(enc, p) \
(((const struct normal_encoding *)(enc))->byteToAscii(enc, p)) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
static static int PTRFASTCALL
int sb_byteToAscii(const ENCODING *enc, const char *p) sb_byteToAscii(const ENCODING *enc, const char *p)
{ {
return *p; return *p;
} }
...@@ -217,17 +260,17 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) ...@@ -217,17 +260,17 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#endif #endif
#define IS_NAME_CHAR(enc, p, n) \ #define IS_NAME_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isName ## n(enc, p)) (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) \ #define IS_NMSTRT_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
#define IS_INVALID_CHAR(enc, p, n) \ #define IS_INVALID_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
#define IS_NAME_CHAR_MINBPC(enc, p) \ #define IS_NAME_CHAR_MINBPC(enc, p) \
(((const struct normal_encoding *)(enc))->isNameMin(enc, p)) (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
(((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p)) (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
#else #else
#define IS_NAME_CHAR_MINBPC(enc, p) (0) #define IS_NAME_CHAR_MINBPC(enc, p) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
...@@ -235,9 +278,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) ...@@ -235,9 +278,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
#define CHAR_MATCHES(enc, p, c) \ #define CHAR_MATCHES(enc, p, c) \
(((const struct normal_encoding *)(enc))->charMatches(enc, p, c)) (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
static static int PTRCALL
int sb_charMatches(const ENCODING *enc, const char *p, int c) sb_charMatches(const ENCODING *enc, const char *p, int c)
{ {
return *p == c; return *p == c;
} }
...@@ -266,8 +309,8 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ ...@@ -266,8 +309,8 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0 UTF8_cval4 = 0xf0
}; };
static static void PTRCALL
void utf8_toUtf8(const ENCODING *enc, utf8_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
...@@ -285,8 +328,8 @@ void utf8_toUtf8(const ENCODING *enc, ...@@ -285,8 +328,8 @@ void utf8_toUtf8(const ENCODING *enc,
*toP = to; *toP = to;
} }
static static void PTRCALL
void utf8_toUtf16(const ENCODING *enc, utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
...@@ -295,19 +338,21 @@ void utf8_toUtf16(const ENCODING *enc, ...@@ -295,19 +338,21 @@ void utf8_toUtf16(const ENCODING *enc,
while (from != fromLim && to != toLim) { while (from != fromLim && to != toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2: case BT_LEAD2:
*to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2; from += 2;
break; break;
case BT_LEAD3: case BT_LEAD3:
*to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); *to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3; from += 3;
break; break;
case BT_LEAD4: case BT_LEAD4:
{ {
unsigned long n; unsigned long n;
if (to + 1 == toLim) if (to + 1 == toLim)
break; goto after;
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000; n -= 0x10000;
to[0] = (unsigned short)((n >> 10) | 0xD800); to[0] = (unsigned short)((n >> 10) | 0xD800);
to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
...@@ -320,6 +365,7 @@ void utf8_toUtf16(const ENCODING *enc, ...@@ -320,6 +365,7 @@ void utf8_toUtf16(const ENCODING *enc,
break; break;
} }
} }
after:
*fromP = from; *fromP = from;
*toP = to; *toP = to;
} }
...@@ -370,8 +416,8 @@ static const struct normal_encoding internal_utf8_encoding = { ...@@ -370,8 +416,8 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}; };
static static void PTRCALL
void latin1_toUtf8(const ENCODING *enc, latin1_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
...@@ -383,8 +429,8 @@ void latin1_toUtf8(const ENCODING *enc, ...@@ -383,8 +429,8 @@ void latin1_toUtf8(const ENCODING *enc,
if (c & 0x80) { if (c & 0x80) {
if (toLim - *toP < 2) if (toLim - *toP < 2)
break; break;
*(*toP)++ = ((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = ((c & 0x3f) | 0x80); *(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++; (*fromP)++;
} }
else { else {
...@@ -395,8 +441,8 @@ void latin1_toUtf8(const ENCODING *enc, ...@@ -395,8 +441,8 @@ void latin1_toUtf8(const ENCODING *enc,
} }
} }
static static void PTRCALL
void latin1_toUtf16(const ENCODING *enc, latin1_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
...@@ -428,8 +474,8 @@ static const struct normal_encoding latin1_encoding = { ...@@ -428,8 +474,8 @@ static const struct normal_encoding latin1_encoding = {
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_)
}; };
static static void PTRCALL
void ascii_toUtf8(const ENCODING *enc, ascii_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
...@@ -461,7 +507,8 @@ static const struct normal_encoding ascii_encoding = { ...@@ -461,7 +507,8 @@ static const struct normal_encoding ascii_encoding = {
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_)
}; };
static int unicode_byte_type(char hi, char lo) static int PTRFASTCALL
unicode_byte_type(char hi, char lo)
{ {
switch ((unsigned char)hi) { switch ((unsigned char)hi) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xD8: case 0xD9: case 0xDA: case 0xDB:
...@@ -480,8 +527,8 @@ static int unicode_byte_type(char hi, char lo) ...@@ -480,8 +527,8 @@ static int unicode_byte_type(char hi, char lo)
} }
#define DEFINE_UTF16_TO_UTF8(E) \ #define DEFINE_UTF16_TO_UTF8(E) \
static \ static void PTRCALL \
void E ## toUtf8(const ENCODING *enc, \ E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \ char **toP, const char *toLim) \
{ \ { \
...@@ -543,8 +590,8 @@ void E ## toUtf8(const ENCODING *enc, \ ...@@ -543,8 +590,8 @@ void E ## toUtf8(const ENCODING *enc, \
} }
#define DEFINE_UTF16_TO_UTF16(E) \ #define DEFINE_UTF16_TO_UTF16(E) \
static \ static void PTRCALL \
void E ## toUtf16(const ENCODING *enc, \ E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \ unsigned short **toP, const unsigned short *toLim) \
{ \ { \
...@@ -593,32 +640,32 @@ DEFINE_UTF16_TO_UTF16(big2_) ...@@ -593,32 +640,32 @@ DEFINE_UTF16_TO_UTF16(big2_)
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
static static int PTRFASTCALL
int little2_byteType(const ENCODING *enc, const char *p) little2_byteType(const ENCODING *enc, const char *p)
{ {
return LITTLE2_BYTE_TYPE(enc, p); return LITTLE2_BYTE_TYPE(enc, p);
} }
static static int PTRFASTCALL
int little2_byteToAscii(const ENCODING *enc, const char *p) little2_byteToAscii(const ENCODING *enc, const char *p)
{ {
return LITTLE2_BYTE_TO_ASCII(enc, p); return LITTLE2_BYTE_TO_ASCII(enc, p);
} }
static static int PTRCALL
int little2_charMatches(const ENCODING *enc, const char *p, int c) little2_charMatches(const ENCODING *enc, const char *p, int c)
{ {
return LITTLE2_CHAR_MATCHES(enc, p, c); return LITTLE2_CHAR_MATCHES(enc, p, c);
} }
static static int PTRFASTCALL
int little2_isNameMin(const ENCODING *enc, const char *p) little2_isNameMin(const ENCODING *enc, const char *p)
{ {
return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
} }
static static int PTRFASTCALL
int little2_isNmstrtMin(const ENCODING *enc, const char *p) little2_isNmstrtMin(const ENCODING *enc, const char *p)
{ {
return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
} }
...@@ -658,7 +705,7 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p) ...@@ -658,7 +705,7 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p)
static const struct normal_encoding little2_encoding_ns = { static const struct normal_encoding little2_encoding_ns = {
{ VTABLE, 2, 0, { VTABLE, 2, 0,
#if XML_BYTE_ORDER == 12 #if BYTEORDER == 1234
1 1
#else #else
0 0
...@@ -675,7 +722,7 @@ static const struct normal_encoding little2_encoding_ns = { ...@@ -675,7 +722,7 @@ static const struct normal_encoding little2_encoding_ns = {
static const struct normal_encoding little2_encoding = { static const struct normal_encoding little2_encoding = {
{ VTABLE, 2, 0, { VTABLE, 2, 0,
#if XML_BYTE_ORDER == 12 #if BYTEORDER == 1234
1 1
#else #else
0 0
...@@ -690,7 +737,7 @@ static const struct normal_encoding little2_encoding = { ...@@ -690,7 +737,7 @@ static const struct normal_encoding little2_encoding = {
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_)
}; };
#if XML_BYTE_ORDER != 21 #if BYTEORDER != 4321
#ifdef XML_NS #ifdef XML_NS
...@@ -732,32 +779,32 @@ static const struct normal_encoding internal_little2_encoding = { ...@@ -732,32 +779,32 @@ static const struct normal_encoding internal_little2_encoding = {
#ifdef XML_MIN_SIZE #ifdef XML_MIN_SIZE
static static int PTRFASTCALL
int big2_byteType(const ENCODING *enc, const char *p) big2_byteType(const ENCODING *enc, const char *p)
{ {
return BIG2_BYTE_TYPE(enc, p); return BIG2_BYTE_TYPE(enc, p);
} }
static static int PTRFASTCALL
int big2_byteToAscii(const ENCODING *enc, const char *p) big2_byteToAscii(const ENCODING *enc, const char *p)
{ {
return BIG2_BYTE_TO_ASCII(enc, p); return BIG2_BYTE_TO_ASCII(enc, p);
} }
static static int PTRCALL
int big2_charMatches(const ENCODING *enc, const char *p, int c) big2_charMatches(const ENCODING *enc, const char *p, int c)
{ {
return BIG2_CHAR_MATCHES(enc, p, c); return BIG2_CHAR_MATCHES(enc, p, c);
} }
static static int PTRFASTCALL
int big2_isNameMin(const ENCODING *enc, const char *p) big2_isNameMin(const ENCODING *enc, const char *p)
{ {
return BIG2_IS_NAME_CHAR_MINBPC(enc, p); return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
} }
static static int PTRFASTCALL
int big2_isNmstrtMin(const ENCODING *enc, const char *p) big2_isNmstrtMin(const ENCODING *enc, const char *p)
{ {
return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
} }
...@@ -797,7 +844,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p) ...@@ -797,7 +844,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p)
static const struct normal_encoding big2_encoding_ns = { static const struct normal_encoding big2_encoding_ns = {
{ VTABLE, 2, 0, { VTABLE, 2, 0,
#if XML_BYTE_ORDER == 21 #if BYTEORDER == 4321
1 1
#else #else
0 0
...@@ -814,7 +861,7 @@ static const struct normal_encoding big2_encoding_ns = { ...@@ -814,7 +861,7 @@ static const struct normal_encoding big2_encoding_ns = {
static const struct normal_encoding big2_encoding = { static const struct normal_encoding big2_encoding = {
{ VTABLE, 2, 0, { VTABLE, 2, 0,
#if XML_BYTE_ORDER == 21 #if BYTEORDER == 4321
1 1
#else #else
0 0
...@@ -829,7 +876,7 @@ static const struct normal_encoding big2_encoding = { ...@@ -829,7 +876,7 @@ static const struct normal_encoding big2_encoding = {
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_)
}; };
#if XML_BYTE_ORDER != 12 #if BYTEORDER != 1234
#ifdef XML_NS #ifdef XML_NS
...@@ -859,8 +906,8 @@ static const struct normal_encoding internal_big2_encoding = { ...@@ -859,8 +906,8 @@ static const struct normal_encoding internal_big2_encoding = {
#undef PREFIX #undef PREFIX
static static int FASTCALL
int streqci(const char *s1, const char *s2) streqci(const char *s1, const char *s2)
{ {
for (;;) { for (;;) {
char c1 = *s1++; char c1 = *s1++;
...@@ -877,15 +924,15 @@ int streqci(const char *s1, const char *s2) ...@@ -877,15 +924,15 @@ int streqci(const char *s1, const char *s2)
return 1; return 1;
} }
static static void PTRCALL
void initUpdatePosition(const ENCODING *enc, const char *ptr, initUpdatePosition(const ENCODING *enc, const char *ptr,
const char *end, POSITION *pos) const char *end, POSITION *pos)
{ {
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
} }
static static int
int toAscii(const ENCODING *enc, const char *ptr, const char *end) toAscii(const ENCODING *enc, const char *ptr, const char *end)
{ {
char buf[1]; char buf[1];
char *p = buf; char *p = buf;
...@@ -896,8 +943,8 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end) ...@@ -896,8 +943,8 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end)
return buf[0]; return buf[0];
} }
static static int FASTCALL
int isSpace(int c) isSpace(int c)
{ {
switch (c) { switch (c) {
case 0x20: case 0x20:
...@@ -909,10 +956,11 @@ int isSpace(int c) ...@@ -909,10 +956,11 @@ int isSpace(int c)
return 0; return 0;
} }
/* Return 1 if there's just optional white space /* Return 1 if there's just optional white space or there's an S
or there's an S followed by name=val. */ followed by name=val.
static */
int parsePseudoAttribute(const ENCODING *enc, static int
parsePseudoAttribute(const ENCODING *enc,
const char *ptr, const char *ptr,
const char *end, const char *end,
const char **namePtr, const char **namePtr,
...@@ -923,7 +971,7 @@ int parsePseudoAttribute(const ENCODING *enc, ...@@ -923,7 +971,7 @@ int parsePseudoAttribute(const ENCODING *enc,
int c; int c;
char open; char open;
if (ptr == end) { if (ptr == end) {
*namePtr = 0; *namePtr = NULL;
return 1; return 1;
} }
if (!isSpace(toAscii(enc, ptr, end))) { if (!isSpace(toAscii(enc, ptr, end))) {
...@@ -934,7 +982,7 @@ int parsePseudoAttribute(const ENCODING *enc, ...@@ -934,7 +982,7 @@ int parsePseudoAttribute(const ENCODING *enc,
ptr += enc->minBytesPerChar; ptr += enc->minBytesPerChar;
} while (isSpace(toAscii(enc, ptr, end))); } while (isSpace(toAscii(enc, ptr, end)));
if (ptr == end) { if (ptr == end) {
*namePtr = 0; *namePtr = NULL;
return 1; return 1;
} }
*namePtr = ptr; *namePtr = ptr;
...@@ -975,7 +1023,7 @@ int parsePseudoAttribute(const ENCODING *enc, ...@@ -975,7 +1023,7 @@ int parsePseudoAttribute(const ENCODING *enc,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return 0; return 0;
} }
open = c; open = (char)c;
ptr += enc->minBytesPerChar; ptr += enc->minBytesPerChar;
*valPtr = ptr; *valPtr = ptr;
for (;; ptr += enc->minBytesPerChar) { for (;; ptr += enc->minBytesPerChar) {
...@@ -1005,7 +1053,8 @@ static const char KW_encoding[] = { ...@@ -1005,7 +1053,8 @@ static const char KW_encoding[] = {
}; };
static const char KW_standalone[] = { static const char KW_standalone[] = {
ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
ASCII_n, ASCII_e, '\0'
}; };
static const char KW_yes[] = { static const char KW_yes[] = {
...@@ -1016,8 +1065,8 @@ static const char KW_no[] = { ...@@ -1016,8 +1065,8 @@ static const char KW_no[] = {
ASCII_n, ASCII_o, '\0' ASCII_n, ASCII_o, '\0'
}; };
static static int
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *, const char *,
const char *), const char *),
int isGeneralTextEntity, int isGeneralTextEntity,
...@@ -1031,12 +1080,13 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, ...@@ -1031,12 +1080,13 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const ENCODING **encoding, const ENCODING **encoding,
int *standalone) int *standalone)
{ {
const char *val = 0; const char *val = NULL;
const char *name = 0; const char *name = NULL;
const char *nameEnd = 0; const char *nameEnd = NULL;
ptr += 5 * enc->minBytesPerChar; ptr += 5 * enc->minBytesPerChar;
end -= 2 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar;
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
|| !name) {
*badPtr = ptr; *badPtr = ptr;
return 0; return 0;
} }
...@@ -1081,7 +1131,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, ...@@ -1081,7 +1131,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
if (!name) if (!name)
return 1; return 1;
} }
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
|| isGeneralTextEntity) {
*badPtr = name; *badPtr = name;
return 0; return 0;
} }
...@@ -1106,8 +1157,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, ...@@ -1106,8 +1157,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
return 1; return 1;
} }
static static int FASTCALL
int checkCharRefNumber(int result) checkCharRefNumber(int result)
{ {
switch (result >> 8) { switch (result >> 8) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xD8: case 0xD9: case 0xDA: case 0xDB:
...@@ -1125,7 +1176,8 @@ int checkCharRefNumber(int result) ...@@ -1125,7 +1176,8 @@ int checkCharRefNumber(int result)
return result; return result;
} }
int XmlUtf8Encode(int c, char *buf) int FASTCALL
XmlUtf8Encode(int c, char *buf)
{ {
enum { enum {
/* minN is minimum legal resulting value for N byte sequence */ /* minN is minimum legal resulting value for N byte sequence */
...@@ -1137,42 +1189,43 @@ int XmlUtf8Encode(int c, char *buf) ...@@ -1137,42 +1189,43 @@ int XmlUtf8Encode(int c, char *buf)
if (c < 0) if (c < 0)
return 0; return 0;
if (c < min2) { if (c < min2) {
buf[0] = (c | UTF8_cval1); buf[0] = (char)(c | UTF8_cval1);
return 1; return 1;
} }
if (c < min3) { if (c < min3) {
buf[0] = ((c >> 6) | UTF8_cval2); buf[0] = (char)((c >> 6) | UTF8_cval2);
buf[1] = ((c & 0x3f) | 0x80); buf[1] = (char)((c & 0x3f) | 0x80);
return 2; return 2;
} }
if (c < min4) { if (c < min4) {
buf[0] = ((c >> 12) | UTF8_cval3); buf[0] = (char)((c >> 12) | UTF8_cval3);
buf[1] = (((c >> 6) & 0x3f) | 0x80); buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
buf[2] = ((c & 0x3f) | 0x80); buf[2] = (char)((c & 0x3f) | 0x80);
return 3; return 3;
} }
if (c < 0x110000) { if (c < 0x110000) {
buf[0] = ((c >> 18) | UTF8_cval4); buf[0] = (char)((c >> 18) | UTF8_cval4);
buf[1] = (((c >> 12) & 0x3f) | 0x80); buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
buf[2] = (((c >> 6) & 0x3f) | 0x80); buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
buf[3] = ((c & 0x3f) | 0x80); buf[3] = (char)((c & 0x3f) | 0x80);
return 4; return 4;
} }
return 0; return 0;
} }
int XmlUtf16Encode(int charNum, unsigned short *buf) int FASTCALL
XmlUtf16Encode(int charNum, unsigned short *buf)
{ {
if (charNum < 0) if (charNum < 0)
return 0; return 0;
if (charNum < 0x10000) { if (charNum < 0x10000) {
buf[0] = charNum; buf[0] = (unsigned short)charNum;
return 1; return 1;
} }
if (charNum < 0x110000) { if (charNum < 0x110000) {
charNum -= 0x10000; charNum -= 0x10000;
buf[0] = (charNum >> 10) + 0xD800; buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
buf[1] = (charNum & 0x3FF) + 0xDC00; buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
return 2; return 2;
} }
return 0; return 0;
...@@ -1186,61 +1239,64 @@ struct unknown_encoding { ...@@ -1186,61 +1239,64 @@ struct unknown_encoding {
char utf8[256][4]; char utf8[256][4];
}; };
int XmlSizeOfUnknownEncoding(void) #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
int
XmlSizeOfUnknownEncoding(void)
{ {
return sizeof(struct unknown_encoding); return sizeof(struct unknown_encoding);
} }
static static int PTRFASTCALL
int unknown_isName(const ENCODING *enc, const char *p) unknown_isName(const ENCODING *enc, const char *p)
{ {
int c = ((const struct unknown_encoding *)enc) const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
->convert(((const struct unknown_encoding *)enc)->userData, p); int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF) if (c & ~0xFFFF)
return 0; return 0;
return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
} }
static static int PTRFASTCALL
int unknown_isNmstrt(const ENCODING *enc, const char *p) unknown_isNmstrt(const ENCODING *enc, const char *p)
{ {
int c = ((const struct unknown_encoding *)enc) const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
->convert(((const struct unknown_encoding *)enc)->userData, p); int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF) if (c & ~0xFFFF)
return 0; return 0;
return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
} }
static static int PTRFASTCALL
int unknown_isInvalid(const ENCODING *enc, const char *p) unknown_isInvalid(const ENCODING *enc, const char *p)
{ {
int c = ((const struct unknown_encoding *)enc) const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
->convert(((const struct unknown_encoding *)enc)->userData, p); int c = uenc->convert(uenc->userData, p);
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
} }
static static void PTRCALL
void unknown_toUtf8(const ENCODING *enc, unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
char buf[XML_UTF8_ENCODE_MAX]; char buf[XML_UTF8_ENCODE_MAX];
for (;;) { for (;;) {
const char *utf8; const char *utf8;
int n; int n;
if (*fromP == fromLim) if (*fromP == fromLim)
break; break;
utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++; n = *utf8++;
if (n == 0) { if (n == 0) {
int c = ((const struct unknown_encoding *)enc) int c = uenc->convert(uenc->userData, *fromP);
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
n = XmlUtf8Encode(c, buf); n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP) if (n > toLim - *toP)
break; break;
utf8 = buf; utf8 = buf;
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2); - (BT_LEAD2 - 2));
} }
else { else {
if (n > toLim - *toP) if (n > toLim - *toP)
...@@ -1253,19 +1309,19 @@ void unknown_toUtf8(const ENCODING *enc, ...@@ -1253,19 +1309,19 @@ void unknown_toUtf8(const ENCODING *enc,
} }
} }
static static void PTRCALL
void unknown_toUtf16(const ENCODING *enc, unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
while (*fromP != fromLim && *toP != toLim) { while (*fromP != fromLim && *toP != toLim) {
unsigned short c unsigned short c = uenc->utf16[(unsigned char)**fromP];
= ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
if (c == 0) { if (c == 0) {
c = (unsigned short)((const struct unknown_encoding *)enc) c = (unsigned short)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP); uenc->convert(uenc->userData, *fromP);
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2); - (BT_LEAD2 - 2));
} }
else else
(*fromP)++; (*fromP)++;
...@@ -1276,11 +1332,11 @@ void unknown_toUtf16(const ENCODING *enc, ...@@ -1276,11 +1332,11 @@ void unknown_toUtf16(const ENCODING *enc,
ENCODING * ENCODING *
XmlInitUnknownEncoding(void *mem, XmlInitUnknownEncoding(void *mem,
int *table, int *table,
int (*convert)(void *userData, const char *p), CONVERTER convert,
void *userData) void *userData)
{ {
int i; int i;
struct unknown_encoding *e = mem; struct unknown_encoding *e = (struct unknown_encoding *)mem;
for (i = 0; i < (int)sizeof(struct normal_encoding); i++) for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
((char *)mem)[i] = ((char *)&latin1_encoding)[i]; ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
for (i = 0; i < 128; i++) for (i = 0; i < 128; i++)
...@@ -1300,7 +1356,7 @@ XmlInitUnknownEncoding(void *mem, ...@@ -1300,7 +1356,7 @@ XmlInitUnknownEncoding(void *mem,
else if (c < 0) { else if (c < 0) {
if (c < -4) if (c < -4)
return 0; return 0;
e->normal.type[i] = BT_LEAD2 - (c + 2); e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0; e->utf8[i][0] = 0;
e->utf16[i] = 0; e->utf16[i] = 0;
} }
...@@ -1312,7 +1368,7 @@ XmlInitUnknownEncoding(void *mem, ...@@ -1312,7 +1368,7 @@ XmlInitUnknownEncoding(void *mem,
e->normal.type[i] = latin1_encoding.type[c]; e->normal.type[i] = latin1_encoding.type[c];
e->utf8[i][0] = 1; e->utf8[i][0] = 1;
e->utf8[i][1] = (char)c; e->utf8[i][1] = (char)c;
e->utf16[i] = c == 0 ? 0xFFFF : c; e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
} }
else if (checkCharRefNumber(c) < 0) { else if (checkCharRefNumber(c) < 0) {
e->normal.type[i] = BT_NONXML; e->normal.type[i] = BT_NONXML;
...@@ -1331,7 +1387,7 @@ XmlInitUnknownEncoding(void *mem, ...@@ -1331,7 +1387,7 @@ XmlInitUnknownEncoding(void *mem,
else else
e->normal.type[i] = BT_OTHER; e->normal.type[i] = BT_OTHER;
e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
e->utf16[i] = c; e->utf16[i] = (unsigned short)c;
} }
} }
e->userData = userData; e->userData = userData;
...@@ -1367,10 +1423,12 @@ enum { ...@@ -1367,10 +1423,12 @@ enum {
}; };
static const char KW_ISO_8859_1[] = { static const char KW_ISO_8859_1[] = {
ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
ASCII_MINUS, ASCII_1, '\0'
}; };
static const char KW_US_ASCII[] = { static const char KW_US_ASCII[] = {
ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
'\0'
}; };
static const char KW_UTF_8[] = { static const char KW_UTF_8[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
...@@ -1379,14 +1437,16 @@ static const char KW_UTF_16[] = { ...@@ -1379,14 +1437,16 @@ static const char KW_UTF_16[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
}; };
static const char KW_UTF_16BE[] = { static const char KW_UTF_16BE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
'\0'
}; };
static const char KW_UTF_16LE[] = { static const char KW_UTF_16LE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
'\0'
}; };
static static int FASTCALL
int getEncodingIndex(const char *name) getEncodingIndex(const char *name)
{ {
static const char *encodingNames[] = { static const char *encodingNames[] = {
KW_ISO_8859_1, KW_ISO_8859_1,
...@@ -1397,7 +1457,7 @@ int getEncodingIndex(const char *name) ...@@ -1397,7 +1457,7 @@ int getEncodingIndex(const char *name)
KW_UTF_16LE, KW_UTF_16LE,
}; };
int i; int i;
if (name == 0) if (name == NULL)
return NO_ENC; return NO_ENC;
for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
if (streqci(name, encodingNames[i])) if (streqci(name, encodingNames[i]))
...@@ -1405,22 +1465,23 @@ int getEncodingIndex(const char *name) ...@@ -1405,22 +1465,23 @@ int getEncodingIndex(const char *name)
return UNKNOWN_ENC; return UNKNOWN_ENC;
} }
/* For binary compatibility, we store the index of the encoding specified /* For binary compatibility, we store the index of the encoding
at initialization in the isUtf16 member. */ specified at initialization in the isUtf16 member.
*/
#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
/* This is what detects the encoding. /* This is what detects the encoding. encodingTable maps from
encodingTable maps from encoding indices to encodings; encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding; the external (protocol) specified encoding; state is
state is XML_CONTENT_STATE if we're parsing an external text entity, XML_CONTENT_STATE if we're parsing an external text entity, and
and XML_PROLOG_STATE otherwise. XML_PROLOG_STATE otherwise.
*/ */
static static int
int initScan(const ENCODING **encodingTable, initScan(const ENCODING **encodingTable,
const INIT_ENCODING *enc, const INIT_ENCODING *enc,
int state, int state,
const char *ptr, const char *ptr,
...@@ -1490,10 +1551,12 @@ int initScan(const ENCODING **encodingTable, ...@@ -1490,10 +1551,12 @@ int initScan(const ENCODING **encodingTable,
of ISO-8859-1 or some flavour of UTF-16 of ISO-8859-1 or some flavour of UTF-16
and this is an external text entity, and this is an external text entity,
don't look for the BOM, don't look for the BOM,
because it might be a legal data. */ because it might be a legal data.
*/
if (state == XML_CONTENT_STATE) { if (state == XML_CONTENT_STATE) {
int e = INIT_ENC_INDEX(enc); int e = INIT_ENC_INDEX(enc);
if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
|| e == UTF_16LE_ENC || e == UTF_16_ENC)
break; break;
} }
if (ptr + 2 == end) if (ptr + 2 == end)
...@@ -1506,10 +1569,12 @@ int initScan(const ENCODING **encodingTable, ...@@ -1506,10 +1569,12 @@ int initScan(const ENCODING **encodingTable,
break; break;
default: default:
if (ptr[0] == '\0') { if (ptr[0] == '\0') {
/* 0 isn't a legal data character. Furthermore a document entity can only /* 0 isn't a legal data character. Furthermore a document
start with ASCII characters. So the only way this can fail to be big-endian entity can only start with ASCII characters. So the only
UTF-16 if it it's an external parsed general entity that's labelled as way this can fail to be big-endian UTF-16 if it it's an
UTF-16LE. */ external parsed general entity that's labelled as
UTF-16LE.
*/
if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
break; break;
*encPtr = encodingTable[UTF_16BE_ENC]; *encPtr = encodingTable[UTF_16BE_ENC];
...@@ -1523,7 +1588,8 @@ int initScan(const ENCODING **encodingTable, ...@@ -1523,7 +1588,8 @@ int initScan(const ENCODING **encodingTable,
- no encoding declaration - no encoding declaration
by assuming UTF-16LE. But we don't, because this would mean when by assuming UTF-16LE. But we don't, because this would mean when
presented just with a single byte, we couldn't reliably determine presented just with a single byte, we couldn't reliably determine
whether we needed further bytes. */ whether we needed further bytes.
*/
if (state == XML_CONTENT_STATE) if (state == XML_CONTENT_STATE)
break; break;
*encPtr = encodingTable[UTF_16LE_ENC]; *encPtr = encodingTable[UTF_16LE_ENC];
...@@ -1556,7 +1622,7 @@ int initScan(const ENCODING **encodingTable, ...@@ -1556,7 +1622,7 @@ int initScan(const ENCODING **encodingTable,
ENCODING * ENCODING *
XmlInitUnknownEncodingNS(void *mem, XmlInitUnknownEncodingNS(void *mem,
int *table, int *table,
int (*convert)(void *userData, const char *p), CONVERTER convert,
void *userData) void *userData)
{ {
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
......
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifndef XmlTok_INCLUDED #ifndef XmlTok_INCLUDED
...@@ -11,9 +10,11 @@ extern "C" { ...@@ -11,9 +10,11 @@ extern "C" {
#endif #endif
/* The following token may be returned by XmlContentTok */ /* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
illegal ]]> sequence */ start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ /* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4 /* The string to be scanned is empty */ #define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */ might be part of CRLF sequence */
...@@ -22,8 +23,8 @@ extern "C" { ...@@ -22,8 +23,8 @@ extern "C" {
#define XML_TOK_INVALID 0 #define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also /* The following tokens are returned by XmlContentTok; some are also
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
*/
#define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
...@@ -35,7 +36,9 @@ extern "C" { ...@@ -35,7 +36,9 @@ extern "C" {
#define XML_TOK_ENTITY_REF 9 #define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10 /* numeric character reference */ #define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ /* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11 /* processing instruction */ #define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13 #define XML_TOK_COMMENT 13
...@@ -75,8 +78,9 @@ extern "C" { ...@@ -75,8 +78,9 @@ extern "C" {
/* The following token is returned only by XmlCdataSectionTok */ /* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40 #define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok /* With namespace processing this is returned by XmlPrologTok for a
for a name with a colon. */ name with a colon.
*/
#define XML_TOK_PREFIXED_NAME 41 #define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD #ifdef XML_DTD
...@@ -121,37 +125,45 @@ typedef struct { ...@@ -121,37 +125,45 @@ typedef struct {
struct encoding; struct encoding;
typedef struct encoding ENCODING; typedef struct encoding ENCODING;
struct encoding { typedef int (PTRCALL *SCANNER)(const ENCODING *,
int (*scanners[XML_N_STATES])(const ENCODING *,
const char *, const char *,
const char *, const char *,
const char **); const char **);
int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *,
struct encoding {
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *sameName)(const ENCODING *,
const char *, const char *,
const char *);
int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *, const char *,
const char **); const char *,
int (*sameName)(const ENCODING *, const char *);
const char *, const char *); int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
int (*nameMatchesAscii)(const ENCODING *, const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
const char *, const char *, const char *); int (PTRCALL *getAtts)(const ENCODING *enc,
int (*nameLength)(const ENCODING *, const char *); const char *ptr,
const char *(*skipS)(const ENCODING *, const char *); int attsMax,
int (*getAtts)(const ENCODING *enc, const char *ptr, ATTRIBUTE *atts);
int attsMax, ATTRIBUTE *atts); int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int (*charRefNumber)(const ENCODING *enc, const char *ptr); int (PTRCALL *predefinedEntityName)(const ENCODING *,
int (*predefinedEntityName)(const ENCODING *, const char *, const char *); const char *,
void (*updatePosition)(const ENCODING *, const char *);
void (PTRCALL *updatePosition)(const ENCODING *,
const char *ptr, const char *ptr,
const char *end, const char *end,
POSITION *); POSITION *);
int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, int (PTRCALL *isPublicId)(const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr); const char **badPtr);
void (*utf8Convert)(const ENCODING *enc, void (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
char **toP, char **toP,
const char *toLim); const char *toLim);
void (*utf16Convert)(const ENCODING *enc, void (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
unsigned short **toP, unsigned short **toP,
...@@ -161,24 +173,25 @@ struct encoding { ...@@ -161,24 +173,25 @@ struct encoding {
char isUtf16; char isUtf16;
}; };
/* /* Scan the string starting at ptr until the end of the next complete
Scan the string starting at ptr until the end of the next complete token, token, but do not scan past eptr. Return an integer giving the
but do not scan past eptr. Return an integer giving the type of token. type of token.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete token; Return XML_TOK_PARTIAL when the string does not contain a complete
nextTokPtr will not be set. token; nextTokPtr will not be set.
Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr Return XML_TOK_INVALID when the string does not start a valid
will be set to point to the character which made the token invalid. token; nextTokPtr will be set to point to the character which made
the token invalid.
Otherwise the string starts with a valid token; nextTokPtr will be set to point Otherwise the string starts with a valid token; nextTokPtr will be
to the character following the end of that token. set to point to the character following the end of that token.
Each data character counts as a single token, but adjacent data characters Each data character counts as a single token, but adjacent data
may be returned together. Similarly for characters in the prolog outside characters may be returned together. Similarly for characters in
literals, comments and processing instructions. the prolog outside literals, comments and processing instructions.
*/ */
...@@ -201,9 +214,9 @@ literals, comments and processing instructions. ...@@ -201,9 +214,9 @@ literals, comments and processing instructions.
#endif /* XML_DTD */ #endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on /* This is used for performing a 2nd-level tokenization on the content
the content of a literal that has already been returned by XmlTok. */ of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
...@@ -264,14 +277,16 @@ int XmlParseXmlDecl(int isGeneralTextEntity, ...@@ -264,14 +277,16 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void); const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void); const ENCODING *XmlGetUtf16InternalEncoding(void);
int XmlUtf8Encode(int charNumber, char *buf); int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
int XmlUtf16Encode(int charNumber, unsigned short *buf); int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void); int XmlSizeOfUnknownEncoding(void);
typedef int (*CONVERTER)(void *userData, const char *p);
ENCODING * ENCODING *
XmlInitUnknownEncoding(void *mem, XmlInitUnknownEncoding(void *mem,
int *table, int *table,
int (*conv)(void *userData, const char *p), CONVERTER convert,
void *userData); void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity, int XmlParseXmlDeclNS(int isGeneralTextEntity,
...@@ -284,13 +299,14 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity, ...@@ -284,13 +299,14 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity,
const char **encodingNamePtr, const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, const ENCODING **namedEncodingPtr,
int *standalonePtr); int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING * ENCODING *
XmlInitUnknownEncodingNS(void *mem, XmlInitUnknownEncodingNS(void *mem,
int *table, int *table,
int (*conv)(void *userData, const char *p), CONVERTER convert,
void *userData); void *userData);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file COPYING for copying permission.
See the file COPYING for copying permission.
*/ */
#ifndef IS_INVALID_CHAR #ifndef IS_INVALID_CHAR
...@@ -87,9 +86,9 @@ See the file COPYING for copying permission. ...@@ -87,9 +86,9 @@ See the file COPYING for copying permission.
/* ptr points to character following "<!-" */ /* ptr points to character following "<!-" */
static static int PTRCALL
int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
...@@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<!" */ /* ptr points to character following "<!" */
static static int PTRCALL
int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
...@@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
const char *end, int *tokPtr)
{ {
int upper = 0; int upper = 0;
*tokPtr = XML_TOK_PI; *tokPtr = XML_TOK_PI;
...@@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<?" */ /* ptr points to character following "<?" */
static static int PTRCALL
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
int tok; int tok;
const char *target = ptr; const char *target = ptr;
...@@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static int PTRCALL
static PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char *end, const char **nextTokPtr)
const char **nextTokPtr)
{ {
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i; int i;
/* CDATA[ */ /* CDATA[ */
if (end - ptr < 6 * MINBPC(enc)) if (end - ptr < 6 * MINBPC(enc))
...@@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e ...@@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e
return XML_TOK_CDATA_SECT_OPEN; return XML_TOK_CDATA_SECT_OPEN;
} }
static static int PTRCALL
int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_NONE; return XML_TOK_NONE;
...@@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en ...@@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
/* ptr points to character following "</" */ /* ptr points to character following "</" */
static static int PTRCALL
int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
...@@ -407,7 +407,8 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -407,7 +407,8 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
#ifdef XML_NS #ifdef XML_NS
case BT_COLON: case BT_COLON:
/* no need to check qname syntax here, since end-tag must match exactly */ /* no need to check qname syntax here,
since end-tag must match exactly */
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
#endif #endif
...@@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&#X" */ /* ptr points to character following "&#X" */
static static int PTRCALL
int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
...@@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end ...@@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
/* ptr points to character following "&#" */ /* ptr points to character following "&#" */
static static int PTRCALL
int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
if (CHAR_MATCHES(enc, ptr, ASCII_x)) if (CHAR_MATCHES(enc, ptr, ASCII_x))
...@@ -488,8 +489,8 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -488,8 +489,8 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&" */ /* ptr points to character following "&" */
static static int PTRCALL
int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
...@@ -518,8 +519,8 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -518,8 +519,8 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following first character of attribute name */ /* ptr points to character following first character of attribute name */
static static int PTRCALL
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
#ifdef XML_NS #ifdef XML_NS
...@@ -574,7 +575,6 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -574,7 +575,6 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0; hadColon = 0;
#endif #endif
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
...@@ -678,8 +678,8 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -678,8 +678,8 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<" */ /* ptr points to character following "<" */
static static int PTRCALL
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
#ifdef XML_NS #ifdef XML_NS
...@@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_LSQB: case BT_LSQB:
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
} }
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
...@@ -777,8 +778,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -777,8 +778,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
...@@ -876,8 +877,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -876,8 +877,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "%" */ /* ptr points to character following "%" */
static static int PTRCALL
int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
...@@ -905,8 +906,8 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -905,8 +906,8 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
...@@ -932,8 +933,8 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -932,8 +933,8 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
return -XML_TOK_POUND_NAME; return -XML_TOK_POUND_NAME;
} }
static static int PTRCALL
int PREFIX(scanLit)(int open, const ENCODING *enc, PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
...@@ -964,8 +965,8 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, ...@@ -964,8 +965,8 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
int tok; int tok;
...@@ -1008,8 +1009,11 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -1008,8 +1009,11 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
case BT_CR: case BT_CR:
if (ptr + MINBPC(enc) == end) if (ptr + MINBPC(enc) == end) {
*nextTokPtr = end;
/* indicate that this might be part of a CR/LF pair */
return -XML_TOK_PROLOG_S; return -XML_TOK_PROLOG_S;
}
/* fall through */ /* fall through */
case BT_S: case BT_LF: case BT_S: case BT_LF:
for (;;) { for (;;) {
...@@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return -tok; return -tok;
} }
static static int PTRCALL
int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr == end)
...@@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * ...@@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return XML_TOK_DATA_CHARS; return XML_TOK_DATA_CHARS;
} }
static static int PTRCALL
int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr == end)
...@@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end ...@@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#ifdef XML_DTD #ifdef XML_DTD
static static int PTRCALL
int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
int level = 0; int level = 0;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
...@@ -1360,8 +1364,8 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e ...@@ -1360,8 +1364,8 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
#endif /* XML_DTD */ #endif /* XML_DTD */
static static int PTRCALL
int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr) const char **badPtr)
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
...@@ -1415,12 +1419,13 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, ...@@ -1415,12 +1419,13 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
return 1; return 1;
} }
/* This must only be called for a well-formed start-tag or empty element tag. /* This must only be called for a well-formed start-tag or empty
Returns the number of attributes. Pointers to the first attsMax attributes element tag. Returns the number of attributes. Pointers to the
are stored in atts. */ first attsMax attributes are stored in atts.
*/
static static int PTRCALL
int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts) int attsMax, ATTRIBUTE *atts)
{ {
enum { other, inName, inValue } state = inName; enum { other, inName, inValue } state = inName;
...@@ -1512,14 +1517,16 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, ...@@ -1512,14 +1517,16 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
/* not reached */ /* not reached */
} }
static static int PTRFASTCALL
int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{ {
int result = 0; int result = 0;
/* skip &# */ /* skip &# */
ptr += 2*MINBPC(enc); ptr += 2*MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_x)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { for (ptr += MINBPC(enc);
!CHAR_MATCHES(enc, ptr, ASCII_SEMI);
ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr); int c = BYTE_TO_ASCII(enc, ptr);
switch (c) { switch (c) {
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
...@@ -1527,11 +1534,13 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) ...@@ -1527,11 +1534,13 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
result <<= 4; result <<= 4;
result |= (c - ASCII_0); result |= (c - ASCII_0);
break; break;
case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: case ASCII_A: case ASCII_B: case ASCII_C:
case ASCII_D: case ASCII_E: case ASCII_F:
result <<= 4; result <<= 4;
result += 10 + (c - ASCII_A); result += 10 + (c - ASCII_A);
break; break;
case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: case ASCII_a: case ASCII_b: case ASCII_c:
case ASCII_d: case ASCII_e: case ASCII_f:
result <<= 4; result <<= 4;
result += 10 + (c - ASCII_a); result += 10 + (c - ASCII_a);
break; break;
...@@ -1552,8 +1561,9 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) ...@@ -1552,8 +1561,9 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
return checkCharRefNumber(result); return checkCharRefNumber(result);
} }
static static int PTRCALL
int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
const char *end)
{ {
switch ((end - ptr)/MINBPC(enc)) { switch ((end - ptr)/MINBPC(enc)) {
case 2: case 2:
...@@ -1605,8 +1615,8 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha ...@@ -1605,8 +1615,8 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
return 0; return 0;
} }
static static int PTRCALL
int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{ {
for (;;) { for (;;) {
switch (BYTE_TYPE(enc, ptr1)) { switch (BYTE_TYPE(enc, ptr1)) {
...@@ -1669,8 +1679,8 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) ...@@ -1669,8 +1679,8 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
/* not reached */ /* not reached */
} }
static static int PTRCALL
int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
{ {
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
...@@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, ...@@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
return ptr1 == end1; return ptr1 == end1;
} }
static static int PTRFASTCALL
int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
{ {
const char *start = ptr; const char *start = ptr;
for (;;) { for (;;) {
...@@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) ...@@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
} }
} }
static static const char * PTRFASTCALL
const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) PREFIX(skipS)(const ENCODING *enc, const char *ptr)
{ {
for (;;) { for (;;) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
...@@ -1725,8 +1735,8 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) ...@@ -1725,8 +1735,8 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
} }
} }
static static void PTRCALL
void PREFIX(updatePosition)(const ENCODING *enc, PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr, const char *ptr,
const char *end, const char *end,
POSITION *pos) POSITION *pos)
...@@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc, ...@@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc,
#undef CHECK_NAME_CASES #undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES #undef CHECK_NMSTRT_CASES
const ENCODING *NS(XmlGetUtf8InternalEncoding)(void) const ENCODING *
NS(XmlGetUtf8InternalEncoding)(void)
{ {
return &ns(internal_utf8_encoding).enc; return &ns(internal_utf8_encoding).enc;
} }
const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) const ENCODING *
NS(XmlGetUtf16InternalEncoding)(void)
{ {
#if XML_BYTE_ORDER == 12 #if BYTEORDER == 1234
return &ns(internal_little2_encoding).enc; return &ns(internal_little2_encoding).enc;
#elif XML_BYTE_ORDER == 21 #elif BYTEORDER == 4321
return &ns(internal_big2_encoding).enc; return &ns(internal_big2_encoding).enc;
#else #else
const short n = 1; const short n = 1;
return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; return (*(const char *)&n
? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
#endif #endif
} }
static static const ENCODING *NS(encodings)[] = {
const ENCODING *NS(encodings)[] = {
&ns(latin1_encoding).enc, &ns(latin1_encoding).enc,
&ns(ascii_encoding).enc, &ns(ascii_encoding).enc,
&ns(utf8_encoding).enc, &ns(utf8_encoding).enc,
...@@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = { ...@@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = {
&ns(utf8_encoding).enc /* NO_ENC */ &ns(utf8_encoding).enc /* NO_ENC */
}; };
static static int PTRCALL
int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_PROLOG_STATE, ptr, end, nextTokPtr);
} }
static static int PTRCALL
int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_CONTENT_STATE, ptr, end, nextTokPtr);
} }
int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) int
NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
const char *name)
{ {
int i = getEncodingIndex(name); int i = getEncodingIndex(name);
if (i == UNKNOWN_ENC) if (i == UNKNOWN_ENC)
...@@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n ...@@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n
return 1; return 1;
} }
static static const ENCODING *
const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
{ {
#define ENCODING_MAX 128 #define ENCODING_MAX 128
char buf[ENCODING_MAX]; char buf[ENCODING_MAX];
...@@ -73,7 +80,8 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha ...@@ -73,7 +80,8 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha
return NS(encodings)[i]; return NS(encodings)[i];
} }
int NS(XmlParseXmlDecl)(int isGeneralTextEntity, int
NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc, const ENCODING *enc,
const char *ptr, const char *ptr,
const char *end, const char *end,
......
...@@ -690,9 +690,9 @@ class PyBuildExt(build_ext): ...@@ -690,9 +690,9 @@ class PyBuildExt(build_ext):
# More information on Expat can be found at www.libexpat.org. # More information on Expat can be found at www.libexpat.org.
# #
if sys.byteorder == "little": if sys.byteorder == "little":
xmlbo = "12" xmlbo = "1234"
else: else:
xmlbo = "21" xmlbo = "4321"
expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat') expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')
exts.append(Extension('pyexpat', exts.append(Extension('pyexpat',
sources = [ sources = [
...@@ -702,10 +702,9 @@ class PyBuildExt(build_ext): ...@@ -702,10 +702,9 @@ class PyBuildExt(build_ext):
'expat/xmltok.c', 'expat/xmltok.c',
], ],
define_macros = [ define_macros = [
('HAVE_EXPAT_H',None),
('XML_NS', '1'), ('XML_NS', '1'),
('XML_DTD', '1'), ('XML_DTD', '1'),
('XML_BYTE_ORDER', xmlbo), ('BYTEORDER', xmlbo),
('XML_CONTEXT_BYTES','1024'), ('XML_CONTEXT_BYTES','1024'),
], ],
include_dirs = [expatinc] include_dirs = [expatinc]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment