Commit fc03a94a authored by Martin v. Löwis's avatar Martin v. Löwis

Incorporate Expat 1.95.6.

parent 5a772d32
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#define ASCII_A 0x41
......@@ -69,7 +68,7 @@ See the file COPYING for copying permission.
#define ASCII_9 0x39
#define ASCII_TAB 0x09
#define ASCII_SPACE 0x20
#define ASCII_SPACE 0x20
#define ASCII_EXCL 0x21
#define ASCII_QUOT 0x22
#define ASCII_AMP 0x26
......
/*
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlParse_INCLUDED
#define XmlParse_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
#endif
#include <stdlib.h>
#ifndef XMLPARSEAPI
# if defined(__declspec) && !defined(__BEOS__) && !defined(__CYGWIN__)
# define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
# else
# define XMLPARSEAPI(type) type
# endif
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
#ifdef XML_STATIC
#define XMLPARSEAPI(type) type __cdecl
#else
#define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
#endif
#else
#define XMLPARSEAPI(type) type
#endif
#endif /* not defined XMLPARSEAPI */
#ifdef __cplusplus
extern "C" {
#endif
typedef void *XML_Parser;
#ifdef XML_UNICODE_WCHAR_T
#define XML_UNICODE
#endif
struct XML_ParserStruct;
typedef struct XML_ParserStruct *XML_Parser;
/* Information is UTF-8 encoded. */
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char;
typedef wchar_t XML_LChar;
#else
typedef unsigned short XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
typedef char XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE */
/* Should this be defined using stdbool.h when C99 is available? */
typedef unsigned char XML_Bool;
#define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0)
enum XML_Error {
XML_ERROR_NONE,
XML_ERROR_NO_MEMORY,
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE,
XML_ERROR_ENTITY_DECLARED_IN_PE,
XML_ERROR_FEATURE_REQUIRES_XML_DTD,
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
};
enum XML_Content_Type {
XML_CTYPE_EMPTY = 1,
......@@ -63,11 +124,11 @@ enum XML_Content_Quant {
typedef struct XML_cp XML_Content;
struct XML_cp {
enum XML_Content_Type type;
enum XML_Content_Quant quant;
XML_Char * name;
unsigned int numchildren;
XML_Content * children;
enum XML_Content_Type type;
enum XML_Content_Quant quant;
XML_Char * name;
unsigned int numchildren;
XML_Content * children;
};
......@@ -75,53 +136,49 @@ struct XML_cp {
description of the model argument. It's the caller's responsibility
to free model when finished with it.
*/
typedef void (*XML_ElementDeclHandler) (void *userData,
const XML_Char *name,
XML_Content *model);
XMLPARSEAPI(void)
XML_SetElementDeclHandler(XML_Parser parser,
XML_ElementDeclHandler eldecl);
/*
The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword.
The "isrequired" parameter will be true and the default value will
be NULL in the case of "#REQUIRED". If "isrequired" is true and
default is non-NULL, then this is a "#FIXED" default.
*/
typedef void (*XML_AttlistDeclHandler) (void *userData,
XML_ElementDeclHandler eldecl);
/* The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED"
keyword. The "isrequired" parameter will be true and the default
value will be NULL in the case of "#REQUIRED". If "isrequired" is
true and default is non-NULL, then this is a "#FIXED" default.
*/
typedef void (*XML_AttlistDeclHandler) (void *userData,
const XML_Char *elname,
const XML_Char *attname,
const XML_Char *att_type,
const XML_Char *dflt,
int isrequired);
int isrequired);
XMLPARSEAPI(void)
XML_SetAttlistDeclHandler(XML_Parser parser,
XML_AttlistDeclHandler attdecl);
/* The XML declaration handler is called for *both* XML declarations and
text declarations. The way to distinguish is that the version parameter
will be null for text declarations. The encoding parameter may be null
for XML declarations. The standalone parameter will be -1, 0, or 1
indicating respectively that there was no standalone parameter in
the declaration, that it was given as no, or that it was given as yes.
*/
typedef void (*XML_XmlDeclHandler) (void *userData,
const XML_Char *version,
const XML_Char *encoding,
int standalone);
XML_AttlistDeclHandler attdecl);
/* The XML declaration handler is called for *both* XML declarations
and text declarations. The way to distinguish is that the version
parameter will be NULL for text declarations. The encoding
parameter may be NULL for XML declarations. The standalone
parameter will be -1, 0, or 1 indicating respectively that there
was no standalone parameter in the declaration, that it was given
as no, or that it was given as yes.
*/
typedef void (*XML_XmlDeclHandler) (void *userData,
const XML_Char *version,
const XML_Char *encoding,
int standalone);
XMLPARSEAPI(void)
XML_SetXmlDeclHandler(XML_Parser parser,
XML_XmlDeclHandler xmldecl);
XML_XmlDeclHandler xmldecl);
typedef struct {
......@@ -131,26 +188,27 @@ typedef struct {
} XML_Memory_Handling_Suite;
/* Constructs a new parser; encoding is the encoding specified by the
external protocol or null if there is none specified. */
external protocol or NULL if there is none specified.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreate(const XML_Char *encoding);
/* Constructs a new parser and namespace processor. Element type
names and attribute names that belong to a namespace will be expanded;
unprefixed attribute names are never expanded; unprefixed element type
names are expanded only if there is a default namespace. The expanded
name is the concatenation of the namespace URI, the namespace
separator character, and the local part of the name. If the namespace
separator is '\0' then the namespace URI and the local part will be
concatenated without any separator. When a namespace is not declared,
the name and prefix will be passed through without expansion. */
names and attribute names that belong to a namespace will be
expanded; unprefixed attribute names are never expanded; unprefixed
element type names are expanded only if there is a default
namespace. The expanded name is the concatenation of the namespace
URI, the namespace separator character, and the local part of the
name. If the namespace separator is '\0' then the namespace URI
and the local part will be concatenated without any separator.
When a namespace is not declared, the name and prefix will be
passed through without expansion.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
/* Constructs a new parser using the memory management suit referred to
/* Constructs a new parser using the memory management suite referred to
by memsuite. If memsuite is NULL, then use the standard library memory
suite. If namespaceSeparator is non-NULL it creates a parser with
namespace processing as described above. The character pointed at
......@@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
All further memory operations used for the created parser will come from
the given suite.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreate_MM(const XML_Char *encoding,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator);
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated. */
/* Prepare a parser object to be re-used. This is particularly
valuable when memory allocation overhead is disproportionatly high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
unknownEncodingHandler. The parser's external state is re-initialized
except for the values of ns and ns_triplets.
Added in Expat 1.95.3.
*/
XMLPARSEAPI(XML_Bool)
XML_ParserReset(XML_Parser parser, const XML_Char *encoding);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated.
*/
typedef void (*XML_StartElementHandler)(void *userData,
const XML_Char *name,
const XML_Char **atts);
......@@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data);
typedef void (*XML_StartCdataSectionHandler)(void *userData);
typedef void (*XML_EndCdataSectionHandler)(void *userData);
/* This is called for any characters in the XML document for
which there is no applicable handler. This includes both
characters that are part of markup which is of a kind that is
not reported (comments, markup declarations), or characters
that are part of a construct which could be reported but
for which no handler has been supplied. The characters are passed
exactly as they were in the XML document except that
they will be encoded in UTF-8. Line boundaries are not normalized.
Note that a byte order mark character is not passed to the default handler.
There are no guarantees about how characters are divided between calls
to the default handler: for example, a comment might be split between
multiple calls. */
/* This is called for any characters in the XML document for which
there is no applicable handler. This includes both characters that
are part of markup which is of a kind that is not reported
(comments, markup declarations), or characters that are part of a
construct which could be reported but for which no handler has been
supplied. The characters are passed exactly as they were in the XML
document except that they will be encoded in UTF-8 or UTF-16.
Line boundaries are not normalized. Note that a byte order mark
character is not passed to the default handler. There are no
guarantees about how characters are divided between calls to the
default handler: for example, a comment might be split between
multiple calls.
*/
typedef void (*XML_DefaultHandler)(void *userData,
const XML_Char *s,
int len);
/* This is called for the start of the DOCTYPE declaration, before
any DTD or internal subset is parsed. */
any DTD or internal subset is parsed.
*/
typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *doctypeName,
const XML_Char *sysid,
......@@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the
closing > is encountered, but after processing any external subset. */
closing > is encountered, but after processing any external
subset.
*/
typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
/* This is called for entity declarations. The is_parameter_entity
......@@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
otherwise.
For internal entities (<!ENTITY foo "bar">), value will
be non-null and systemId, publicID, and notationName will be null.
The value string is NOT null terminated; the length is provided in
be non-NULL and systemId, publicID, and notationName will be NULL.
The value string is NOT nul-terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities.
For external entities, value will be null and systemId will be non-null.
The publicId argument will be null unless a public identifier was
provided. The notationName argument will have a non-null value only
for unparsed entity declarations.
*/
For external entities, value will be NULL and systemId will be
non-NULL. The publicId argument will be NULL unless a public
identifier was provided. The notationName argument will have a
non-NULL value only for unparsed entity declarations.
Note that is_parameter_entity can't be changed to XML_Bool, since
that would break binary compatibility.
*/
typedef void (*XML_EntityDeclHandler) (void *userData,
const XML_Char *entityName,
int is_parameter_entity,
......@@ -247,19 +320,20 @@ typedef void (*XML_EntityDeclHandler) (void *userData,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
XMLPARSEAPI(void)
XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler);
XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above.
It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA)
entity. The base argument is whatever was set by XML_SetBase.
The entityName, systemId and notationName arguments will never be null.
The other arguments may be. */
This is called for a declaration of an unparsed (NDATA) entity.
The base argument is whatever was set by XML_SetBase. The
entityName, systemId and notationName arguments will never be
NULL. The other arguments may be.
*/
typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *entityName,
const XML_Char *base,
......@@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *publicId,
const XML_Char *notationName);
/* This is called for a declaration of notation.
The base argument is whatever was set by XML_SetBase.
The notationName will never be null. The other arguments can be. */
/* This is called for a declaration of notation. The base argument is
whatever was set by XML_SetBase. The notationName will never be
NULL. The other arguments can be.
*/
typedef void (*XML_NotationDeclHandler)(void *userData,
const XML_Char *notationName,
const XML_Char *base,
......@@ -278,11 +352,11 @@ typedef void (*XML_NotationDeclHandler)(void *userData,
const XML_Char *publicId);
/* When namespace processing is enabled, these are called once for
each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be null.
For an xmlns="" attribute, uri will be null. */
each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be
NULL. For an xmlns="" attribute, uri will be NULL.
*/
typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
const XML_Char *prefix,
const XML_Char *uri);
......@@ -290,87 +364,123 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
typedef void (*XML_EndNamespaceDeclHandler)(void *userData,
const XML_Char *prefix);
/* This is called if the document is not standalone (it has an
external subset or a reference to a parameter entity, but does not
have standalone="yes"). If this handler returns 0, then processing
will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error. */
/* This is called if the document is not standalone, that is, it has an
external subset or a reference to a parameter entity, but does not
have standalone="yes". If this handler returns XML_STATUS_ERROR,
then processing will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error.
If parameter entity parsing is enabled, then in addition to the
conditions above this handler will only be called if the referenced
entity was actually read.
*/
typedef int (*XML_NotStandaloneHandler)(void *userData);
/* This is called for a reference to an external parsed general entity.
The referenced entity is not automatically parsed.
The application can parse it immediately or later using
XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the reference;
it can be passed as the parser argument to XML_ExternalEntityParserCreate.
The systemId argument is the system identifier as specified in the entity
declaration; it will not be null.
The base argument is the system identifier that should be used as the base for
resolving systemId if systemId was relative; this is set by XML_SetBase;
it may be null.
The publicId argument is the public identifier as specified in the entity
declaration, or null if none was specified; the whitespace in the public
identifier will have been normalized as required by the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to
XML_ExternalEntityParserCreate; context is valid only until the handler
returns, so if the referenced entity is to be parsed later, it must be copied.
The handler should return 0 if processing should not continue because of
a fatal error in the handling of the external entity.
In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser, not
userData. */
/* This is called for a reference to an external parsed general
entity. The referenced entity is not automatically parsed. The
application can parse it immediately or later using
XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the
reference; it can be passed as the parser argument to
XML_ExternalEntityParserCreate. The systemId argument is the
system identifier as specified in the entity declaration; it will
not be NULL.
The base argument is the system identifier that should be used as
the base for resolving systemId if systemId was relative; this is
set by XML_SetBase; it may be NULL.
The publicId argument is the public identifier as specified in the
entity declaration, or NULL if none was specified; the whitespace
in the public identifier will have been normalized as required by
the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to XML_ExternalEntityParserCreate;
context is valid only until the handler returns, so if the
referenced entity is to be parsed later, it must be copied.
context is NULL only when the entity is a parameter entity.
The handler should return XML_STATUS_ERROR if processing should not
continue because of a fatal error in the handling of the external
entity. In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser,
not userData.
*/
typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
/* This structure is filled in by the XML_UnknownEncodingHandler
to provide information to the parser about encodings that are unknown
to the parser.
The map[b] member gives information about byte sequences
whose first byte is b.
If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar
value c.
If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
sequence that encodes a single Unicode scalar value.
The data member will be passed as the first argument to the convert function.
The convert function is used to convert multibyte sequences;
s will point to a n-byte sequence where map[(unsigned char)*s] == -n.
The convert function must return the Unicode scalar value
represented by this byte sequence or -1 if the byte sequence is malformed.
The convert function may be null if the encoding is a single-byte encoding,
that is if map[b] >= -1 for all bytes b.
When the parser is finished with the encoding, then if release is not null,
it will call release passing it the data member;
once release has been called, the convert function will not be called again.
Expat places certain restrictions on the encodings that are supported
using this mechanism.
1. Every ASCII character that can appear in a well-formed XML document,
other than the characters
$@\^`{}~
must be represented by a single byte, and that byte must be the
same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e.,
characters that would be encoded by surrogates in UTF-16 are not
allowed). Note that this restriction doesn't apply to the built-in
support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct sequence
of bytes. */
/* This is called in two situations:
1) An entity reference is encountered for which no declaration
has been read *and* this is not an error.
2) An internal entity reference is read, but not expanded, because
XML_SetDefaultHandler has been called.
Note: skipped parameter entities in declarations and skipped general
entities in attribute values cannot be reported, because
the event would be out of sync with the reporting of the
declarations or attribute values
*/
typedef void (*XML_SkippedEntityHandler)(void *userData,
const XML_Char *entityName,
int is_parameter_entity);
/* This structure is filled in by the XML_UnknownEncodingHandler to
provide information to the parser about encodings that are unknown
to the parser.
The map[b] member gives information about byte sequences whose
first byte is b.
If map[b] is c where c is >= 0, then b by itself encodes the
Unicode scalar value c.
If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an
n-byte sequence that encodes a single Unicode scalar value.
The data member will be passed as the first argument to the convert
function.
The convert function is used to convert multibyte sequences; s will
point to a n-byte sequence where map[(unsigned char)*s] == -n. The
convert function must return the Unicode scalar value represented
by this byte sequence or -1 if the byte sequence is malformed.
The convert function may be NULL if the encoding is a single-byte
encoding, that is if map[b] >= -1 for all bytes b.
When the parser is finished with the encoding, then if release is
not NULL, it will call release passing it the data member; once
release has been called, the convert function will not be called
again.
Expat places certain restrictions on the encodings that are supported
using this mechanism.
1. Every ASCII character that can appear in a well-formed XML document,
other than the characters
$@\^`{}~
must be represented by a single byte, and that byte must be the
same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <=
0xFFFF, (i.e., characters that would be encoded by surrogates in
UTF-16 are not allowed). Note that this restriction doesn't
apply to the built-in support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct
sequence of bytes.
*/
typedef struct {
int map[256];
void *data;
......@@ -379,24 +489,28 @@ typedef struct {
} XML_Encoding;
/* This is called for an encoding that is unknown to the parser.
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding,
it must fill in the XML_Encoding structure, and return 1.
Otherwise it must return 0.
If info does not describe a suitable encoding,
then the parser will return an XML_UNKNOWN_ENCODING error. */
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding, it must
fill in the XML_Encoding structure, and return XML_STATUS_OK.
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
return an XML_UNKNOWN_ENCODING error.
*/
typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
XMLPARSEAPI(void)
XML_SetElementHandler(XML_Parser parser,
XML_StartElementHandler start,
XML_EndElementHandler end);
XML_StartElementHandler start,
XML_EndElementHandler end);
XMLPARSEAPI(void)
XML_SetStartElementHandler(XML_Parser, XML_StartElementHandler);
......@@ -406,19 +520,19 @@ XML_SetEndElementHandler(XML_Parser, XML_EndElementHandler);
XMLPARSEAPI(void)
XML_SetCharacterDataHandler(XML_Parser parser,
XML_CharacterDataHandler handler);
XML_CharacterDataHandler handler);
XMLPARSEAPI(void)
XML_SetProcessingInstructionHandler(XML_Parser parser,
XML_ProcessingInstructionHandler handler);
XML_ProcessingInstructionHandler handler);
XMLPARSEAPI(void)
XML_SetCommentHandler(XML_Parser parser,
XML_CommentHandler handler);
XMLPARSEAPI(void)
XML_SetCdataSectionHandler(XML_Parser parser,
XML_StartCdataSectionHandler start,
XML_EndCdataSectionHandler end);
XML_StartCdataSectionHandler start,
XML_EndCdataSectionHandler end);
XMLPARSEAPI(void)
XML_SetStartCdataSectionHandler(XML_Parser parser,
......@@ -429,89 +543,98 @@ XML_SetEndCdataSectionHandler(XML_Parser parser,
XML_EndCdataSectionHandler end);
/* This sets the default handler and also inhibits expansion of
internal entities. The entity reference will be passed to the default
handler. */
internal entities. These entity references will be passed to the
default handler, or to the skipped entity handler, if one is set.
*/
XMLPARSEAPI(void)
XML_SetDefaultHandler(XML_Parser parser,
XML_DefaultHandler handler);
XML_DefaultHandler handler);
/* This sets the default handler but does not inhibit expansion of
internal entities. The entity reference will not be passed to the
default handler. */
internal entities. The entity reference will not be passed to the
default handler.
*/
XMLPARSEAPI(void)
XML_SetDefaultHandlerExpand(XML_Parser parser,
XML_DefaultHandler handler);
XML_DefaultHandler handler);
XMLPARSEAPI(void)
XML_SetDoctypeDeclHandler(XML_Parser parser,
XML_StartDoctypeDeclHandler start,
XML_EndDoctypeDeclHandler end);
XML_StartDoctypeDeclHandler start,
XML_EndDoctypeDeclHandler end);
XMLPARSEAPI(void)
XML_SetStartDoctypeDeclHandler(XML_Parser parser,
XML_StartDoctypeDeclHandler start);
XML_StartDoctypeDeclHandler start);
XMLPARSEAPI(void)
XML_SetEndDoctypeDeclHandler(XML_Parser parser,
XML_EndDoctypeDeclHandler end);
XML_EndDoctypeDeclHandler end);
XMLPARSEAPI(void)
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
XML_UnparsedEntityDeclHandler handler);
XML_UnparsedEntityDeclHandler handler);
XMLPARSEAPI(void)
XML_SetNotationDeclHandler(XML_Parser parser,
XML_NotationDeclHandler handler);
XML_NotationDeclHandler handler);
XMLPARSEAPI(void)
XML_SetNamespaceDeclHandler(XML_Parser parser,
XML_StartNamespaceDeclHandler start,
XML_EndNamespaceDeclHandler end);
XML_StartNamespaceDeclHandler start,
XML_EndNamespaceDeclHandler end);
XMLPARSEAPI(void)
XML_SetStartNamespaceDeclHandler(XML_Parser parser,
XML_StartNamespaceDeclHandler start);
XML_StartNamespaceDeclHandler start);
XMLPARSEAPI(void)
XML_SetEndNamespaceDeclHandler(XML_Parser parser,
XML_EndNamespaceDeclHandler end);
XML_EndNamespaceDeclHandler end);
XMLPARSEAPI(void)
XML_SetNotStandaloneHandler(XML_Parser parser,
XML_NotStandaloneHandler handler);
XML_NotStandaloneHandler handler);
XMLPARSEAPI(void)
XML_SetExternalEntityRefHandler(XML_Parser parser,
XML_ExternalEntityRefHandler handler);
XML_ExternalEntityRefHandler handler);
/* If a non-null value for arg is specified here, then it will be passed
as the first argument to the external entity ref handler instead
of the parser object. */
/* If a non-NULL value for arg is specified here, then it will be
passed as the first argument to the external entity ref handler
instead of the parser object.
*/
XMLPARSEAPI(void)
XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg);
XMLPARSEAPI(void)
XML_SetSkippedEntityHandler(XML_Parser parser,
XML_SkippedEntityHandler handler);
XMLPARSEAPI(void)
XML_SetUnknownEncodingHandler(XML_Parser parser,
XML_UnknownEncodingHandler handler,
void *encodingHandlerData);
XML_UnknownEncodingHandler handler,
void *encodingHandlerData);
/* This can be called within a handler for a start element, end element,
processing instruction or character data. It causes the corresponding
markup to be passed to the default handler. */
/* This can be called within a handler for a start element, end
element, processing instruction or character data. It causes the
corresponding markup to be passed to the default handler.
*/
XMLPARSEAPI(void)
XML_DefaultCurrent(XML_Parser parser);
/* If do_nst is non-zero, and namespace processing is in effect, and
a name has a prefix (i.e. an explicit namespace qualifier) then
that name is returned as a triplet in a single
string separated by the separator character specified when the parser
was created: URI + sep + local_name + sep + prefix.
that name is returned as a triplet in a single string separated by
the separator character specified when the parser was created: URI
+ sep + local_name + sep + prefix.
If do_nst is zero, then namespace information is returned in the
default manner (URI + sep + local_name) whether or not the names
default manner (URI + sep + local_name) whether or not the name
has a prefix.
Note: Calling XML_SetReturnNSTriplet after XML_Parse or
XML_ParseBuffer has no effect.
*/
XMLPARSEAPI(void)
......@@ -521,84 +644,130 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst);
XMLPARSEAPI(void)
XML_SetUserData(XML_Parser parser, void *userData);
/* Returns the last value set by XML_SetUserData or null. */
/* Returns the last value set by XML_SetUserData or NULL. */
#define XML_GetUserData(parser) (*(void **)(parser))
/* This is equivalent to supplying an encoding argument
to XML_ParserCreate. It must not be called after XML_Parse
or XML_ParseBuffer. */
XMLPARSEAPI(int)
/* This is equivalent to supplying an encoding argument to
XML_ParserCreate. On success XML_SetEncoding returns non-zero,
zero otherwise.
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
has no effect and returns XML_STATUS_ERROR.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
/* If this function is called, then the parser will be passed
as the first argument to callbacks instead of userData.
The userData will still be accessible using XML_GetUserData. */
/* If this function is called, then the parser will be passed as the
first argument to callbacks instead of userData. The userData will
still be accessible using XML_GetUserData.
*/
XMLPARSEAPI(void)
XML_UseParserAsHandlerArg(XML_Parser parser);
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is left
to the application: this value will be passed through as the base
argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler
and XML_UnparsedEntityDeclHandler. The base argument will be copied.
Returns zero if out of memory, non-zero otherwise. */
/* If useDTD == XML_TRUE is passed to this function, then the parser
will assume that there is an external subset, even if none is
specified in the document. In such a case the parser will call the
externalEntityRefHandler with a value of NULL for the systemId
argument (the publicId and context arguments will be NULL as well).
Note: If this function is called, then this must be done before
the first call to XML_Parse or XML_ParseBuffer, since it will
have no effect after that. Returns
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING.
Note: If the document does not have a DOCTYPE declaration at all,
then startDoctypeDeclHandler and endDoctypeDeclHandler will not
be called, despite an external subset being parsed.
Note: If XML_DTD is not defined when Expat is compiled, returns
XML_ERROR_FEATURE_REQUIRES_XML_DTD.
*/
XMLPARSEAPI(enum XML_Error)
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
XMLPARSEAPI(int)
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is
left to the application: this value will be passed through as the
base argument to the XML_ExternalEntityRefHandler,
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base
argument will be copied. Returns XML_STATUS_ERROR if out of memory,
XML_STATUS_OK otherwise.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetBase(XML_Parser parser, const XML_Char *base);
XMLPARSEAPI(const XML_Char *)
XML_GetBase(XML_Parser parser);
/* Returns the number of the attribute/value pairs passed in last call
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the
XML_StartElementHandler. */
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the
XML_StartElementHandler.
*/
XMLPARSEAPI(int)
XML_GetSpecifiedAttributeCount(XML_Parser parser);
/* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an index
into the atts array passed to the XML_StartElementHandler. */
XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an
index into the atts array passed to the XML_StartElementHandler.
*/
XMLPARSEAPI(int)
XML_GetIdAttributeIndex(XML_Parser parser);
/* Parses some input. Returns 0 if a fatal error is detected.
The last call to XML_Parse must have isFinal true;
len may be zero for this call (or any other). */
XMLPARSEAPI(int)
/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
detected. The last call to XML_Parse must have isFinal true; len
may be zero for this call (or any other).
The XML_Status enum gives the possible return values for the
XML_Parse and XML_ParseBuffer functions. Though the return values
for these functions has always been described as a Boolean value,
the implementation, at least for the 1.95.x series, has always
returned exactly one of these values. The preprocessor #defines
are included so this stanza can be added to code that still needs
to support older versions of Expat 1.95.x:
#ifndef XML_STATUS_OK
#define XML_STATUS_OK 1
#define XML_STATUS_ERROR 0
#endif
Otherwise, the #define hackery is quite ugly and would have been dropped.
*/
enum XML_Status {
XML_STATUS_ERROR = 0,
#define XML_STATUS_ERROR XML_STATUS_ERROR
XML_STATUS_OK = 1
#define XML_STATUS_OK XML_STATUS_OK
};
XMLPARSEAPI(enum XML_Status)
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal);
XMLPARSEAPI(void *)
XML_GetBuffer(XML_Parser parser, int len);
XMLPARSEAPI(int)
XMLPARSEAPI(enum XML_Status)
XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
/* Creates an XML_Parser object that can parse an external general
entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of the
externally specified encoding, or null if there is no externally
specified encoding. The context string consists of a sequence of
tokens separated by formfeeds (\f); a token consisting of a name
specifies that the general entity of the name is open; a token of the
form prefix=uri specifies the namespace for a particular prefix; a
token of the form =uri specifies the default namespace. This can be
called at any point after the first call to an
ExternalEntityRefHandler so longer as the parser has not yet been
freed. The new parser is completely independent and may safely be
used in a separate thread. The handlers and userData are initialized
from the parser argument. Returns 0 if out of memory. Otherwise
returns a new XML_Parser object. */
entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of
the externally specified encoding, or NULL if there is no
externally specified encoding. The context string consists of a
sequence of tokens separated by formfeeds (\f); a token consisting
of a name specifies that the general entity of the name is open; a
token of the form prefix=uri specifies the namespace for a
particular prefix; a token of the form =uri specifies the default
namespace. This can be called at any point after the first call to
an ExternalEntityRefHandler so longer as the parser has not yet
been freed. The new parser is completely independent and may
safely be used in a separate thread. The handlers and userData are
initialized from the parser argument. Returns NULL if out of memory.
Otherwise returns a new XML_Parser object.
*/
XMLPARSEAPI(XML_Parser)
XML_ExternalEntityParserCreate(XML_Parser parser,
const XML_Char *context,
const XML_Char *encoding);
const XML_Char *context,
const XML_Char *encoding);
enum XML_ParamEntityParsing {
XML_PARAM_ENTITY_PARSING_NEVER,
......@@ -607,76 +776,56 @@ enum XML_ParamEntityParsing {
};
/* Controls parsing of parameter entities (including the external DTD
subset). If parsing of parameter entities is enabled, then references
to external parameter entities (including the external DTD subset)
will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can only
be parsed synchronously. If the external parameter entity is to be
parsed, it must be parsed during the call to the external entity ref
handler: the complete sequence of XML_ExternalEntityParserCreate,
XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during
this call. After XML_ExternalEntityParserCreate has been called to
create the parser for the external parameter entity (context must be 0
for this call), it is illegal to make any calls on the old parser
until XML_ParserFree has been called on the newly created parser. If
the library has been compiled without support for parameter entity
parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero. */
subset). If parsing of parameter entities is enabled, then
references to external parameter entities (including the external
DTD subset) will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can
only be parsed synchronously. If the external parameter entity is
to be parsed, it must be parsed during the call to the external
entity ref handler: the complete sequence of
XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and
XML_ParserFree calls must be made during this call. After
XML_ExternalEntityParserCreate has been called to create the parser
for the external parameter entity (context must be 0 for this
call), it is illegal to make any calls on the old parser until
XML_ParserFree has been called on the newly created parser.
If the library has been compiled without support for parameter
entity parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero.
Note: If XML_SetParamEntityParsing is called after XML_Parse or
XML_ParseBuffer, then it has no effect and will always return 0.
*/
XMLPARSEAPI(int)
XML_SetParamEntityParsing(XML_Parser parser,
enum XML_ParamEntityParsing parsing);
enum XML_Error {
XML_ERROR_NONE,
XML_ERROR_NO_MEMORY,
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE
};
/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode
returns information about the error. */
enum XML_ParamEntityParsing parsing);
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_GetErrorCode returns information about the error.
*/
XMLPARSEAPI(enum XML_Error)
XML_GetErrorCode(XML_Parser parser);
/* These functions return information about the current parse location.
They may be called when XML_Parse or XML_ParseBuffer return 0;
in this case the location is the location of the character at which
the error was detected.
They may also be called from any other callback called to report
some parse event; in this the location is the location of the first
of the sequence of characters that generated the event. */
/* These functions return information about the current parse
location. They may be called from any callback called to report
some parse event; in this case the location is the location of
the first of the sequence of characters that generated the event.
They may also be called after returning from a call to XML_Parse
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
the location is the location of the character at which the error
was detected; otherwise the location is the location of the last
parse event, as described above.
*/
XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser);
XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser);
XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser);
/* Return the number of bytes in the current event.
Returns 0 if the event is in an internal entity. */
Returns 0 if the event is in an internal entity.
*/
XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser);
......@@ -684,21 +833,35 @@ XML_GetCurrentByteCount(XML_Parser parser);
the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise
returns a null pointer. Also returns a null pointer if a parse isn't
returns a NULL pointer. Also returns a NULL pointer if a parse isn't
active.
NOTE: The character pointer returned should not be used outside
the handler that makes the call. */
the handler that makes the call.
*/
XMLPARSEAPI(const char *)
XML_GetInputContext(XML_Parser parser,
int *offset,
int *size);
int *offset,
int *size);
/* For backwards compatibility with previous versions. */
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
/* Frees the content model passed to the element declaration handler */
XMLPARSEAPI(void)
XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
XML_MemFree(XML_Parser parser, void *ptr);
/* Frees memory used by the parser. */
XMLPARSEAPI(void)
......@@ -706,7 +869,7 @@ XML_ParserFree(XML_Parser parser);
/* Returns a string describing the error. */
XMLPARSEAPI(const XML_LChar *)
XML_ErrorString(int code);
XML_ErrorString(enum XML_Error code);
/* Return a string containing the version number of this expat */
XMLPARSEAPI(const XML_LChar *)
......@@ -719,18 +882,42 @@ typedef struct {
} XML_Expat_Version;
/* Return an XML_Expat_Version structure containing numeric version
number information for this version of expat */
number information for this version of expat.
*/
XMLPARSEAPI(XML_Expat_Version)
XML_ExpatVersionInfo(void);
/* VERSION is not defined in expat.h.in, but it really belongs here,
and defining it on the command line gives difficulties with MSVC. */
#define VERSION "1.95.2"
/* Added in Expat 1.95.5. */
enum XML_FeatureEnum {
XML_FEATURE_END = 0,
XML_FEATURE_UNICODE,
XML_FEATURE_UNICODE_WCHAR_T,
XML_FEATURE_DTD,
XML_FEATURE_CONTEXT_BYTES,
XML_FEATURE_MIN_SIZE,
XML_FEATURE_SIZEOF_XML_CHAR,
XML_FEATURE_SIZEOF_XML_LCHAR
/* Additional features must be added to the end of this enum. */
};
typedef struct {
enum XML_FeatureEnum feature;
const XML_LChar *name;
long int value;
} XML_Feature;
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/
#define XML_MAJOR_VERSION 1
#define XML_MINOR_VERSION 95
#define XML_MICRO_VERSION 2
#define XML_MICRO_VERSION 6
#ifdef __cplusplus
}
......
/* internal.h
Internal definitions used by Expat. This is not needed to compile
client code.
The following calling convention macros are defined for frequently
called functions:
FASTCALL - Used for those internal functions that have a simple
body and a low number of arguments and local variables.
PTRCALL - Used for functions called though function pointers.
PTRFASTCALL - Like PTRCALL, but for low number of arguments.
inline - Used for selected internal functions for which inlining
may improve performance on some platforms.
Note: Use of these macros is based on judgement, not hard rules,
and therefore subject to change.
*/
#if defined(__GNUC__)
/* Instability reported with egcs on a RedHat Linux 7.3.
Let's comment it out:
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRCALL
#define PTRFASTCALL __attribute__((regparm(3)))
#elif defined(WIN32)
/* Using __fastcall seems to have an unexpected negative effect under
MS VC++, especially for function pointers, so we won't use it for
now on that platform. It may be reconsidered for a future release
if it can be made more effective.
Likely reason: __fastcall on Windows is like stdcall, therefore
the compiler cannot perform stack optimizations for call clusters.
*/
#define FASTCALL
#define PTRCALL
#define PTRFASTCALL
#endif
#ifndef FASTCALL
#define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif /* __GNUC__ */
#endif
#endif /* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
#else
#ifndef inline
#define inline
#endif
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifdef COMPILED_FROM_DSP
# include "winconfig.h"
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <expat_config.h>
#endif /* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmlrole.h"
#include "ascii.h"
......@@ -21,29 +21,56 @@ See the file COPYING for copying permission.
*/
static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
static const char KW_ANY[] = {
ASCII_A, ASCII_N, ASCII_Y, '\0' };
static const char KW_ATTLIST[] = {
ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
static const char KW_CDATA[] = {
ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_DOCTYPE[] = {
ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
static const char KW_ELEMENT[] = {
ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
static const char KW_EMPTY[] = {
ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
static const char KW_ENTITIES[] = {
ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
'\0' };
static const char KW_ENTITY[] = {
ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
static const char KW_FIXED[] = {
ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
static const char KW_ID[] = {
ASCII_I, ASCII_D, '\0' };
static const char KW_IDREF[] = {
ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
static const char KW_IDREFS[] = {
ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
static const char KW_IGNORE[] = {
ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
static const char KW_IMPLIED[] = {
ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
static const char KW_INCLUDE[] = {
ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
static const char KW_NDATA[] = {
ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_NMTOKEN[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
static const char KW_NMTOKENS[] = {
ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
'\0' };
static const char KW_NOTATION[] =
{ ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
'\0' };
static const char KW_PCDATA[] = {
ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
static const char KW_PUBLIC[] = {
ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
static const char KW_REQUIRED[] = {
ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
'\0' };
static const char KW_SYSTEM[] = {
ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
#ifndef MIN_BYTES_PER_CHAR
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
......@@ -58,18 +85,18 @@ static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, A
#define setTopLevel(state) ((state)->handler = internalSubset)
#endif /* not XML_DTD */
typedef int PROLOG_HANDLER(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
static PROLOG_HANDLER
prolog0, prolog1, prolog2,
doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
internalSubset,
entity0, entity1, entity2, entity3, entity4, entity5, entity6,
entity7, entity8, entity9,
entity7, entity8, entity9, entity10,
notation0, notation1, notation2, notation3, notation4,
attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
attlist7, attlist8, attlist9,
......@@ -82,15 +109,14 @@ static PROLOG_HANDLER
declClose,
error;
static
int common(PROLOG_STATE *state, int tok);
static int FASTCALL common(PROLOG_STATE *state, int tok);
static
int prolog0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
prolog0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
......@@ -101,19 +127,20 @@ int prolog0(PROLOG_STATE *state,
return XML_ROLE_XML_DECL;
case XML_TOK_PI:
state->handler = prolog1;
return XML_ROLE_NONE;
return XML_ROLE_PI;
case XML_TOK_COMMENT:
state->handler = prolog1;
return XML_ROLE_COMMENT;
case XML_TOK_BOM:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_INSTANCE_START:
state->handler = error;
return XML_ROLE_INSTANCE_START;
......@@ -121,28 +148,30 @@ int prolog0(PROLOG_STATE *state,
return common(state, tok);
}
static
int prolog1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
prolog1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT:
return XML_ROLE_COMMENT;
case XML_TOK_BOM:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_DOCTYPE))
break;
state->handler = doctype0;
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_INSTANCE_START:
state->handler = error;
return XML_ROLE_INSTANCE_START;
......@@ -150,19 +179,20 @@ int prolog1(PROLOG_STATE *state,
return common(state, tok);
}
static
int prolog2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
prolog2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT:
return XML_ROLE_NONE;
return XML_ROLE_COMMENT;
case XML_TOK_INSTANCE_START:
state->handler = error;
return XML_ROLE_INSTANCE_START;
......@@ -170,16 +200,16 @@ int prolog2(PROLOG_STATE *state,
return common(state, tok);
}
static
int doctype0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = doctype1;
......@@ -188,16 +218,16 @@ int doctype0(PROLOG_STATE *state,
return common(state, tok);
}
static
int doctype1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset;
return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
......@@ -207,27 +237,27 @@ int doctype1(PROLOG_STATE *state,
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = doctype3;
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = doctype2;
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
}
break;
}
return common(state, tok);
}
static
int doctype2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_LITERAL:
state->handler = doctype3;
return XML_ROLE_DOCTYPE_PUBLIC_ID;
......@@ -235,16 +265,16 @@ int doctype2(PROLOG_STATE *state,
return common(state, tok);
}
static
int doctype3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_LITERAL:
state->handler = doctype4;
return XML_ROLE_DOCTYPE_SYSTEM_ID;
......@@ -252,16 +282,16 @@ int doctype3(PROLOG_STATE *state,
return common(state, tok);
}
static
int doctype4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_OPEN_BRACKET:
state->handler = internalSubset;
return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
......@@ -272,16 +302,16 @@ int doctype4(PROLOG_STATE *state,
return common(state, tok);
}
static
int doctype5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
doctype5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
case XML_TOK_DECL_CLOSE:
state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
......@@ -289,66 +319,67 @@ int doctype5(PROLOG_STATE *state,
return common(state, tok);
}
static
int internalSubset(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
internalSubset(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ENTITY)) {
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ENTITY)) {
state->handler = entity0;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ATTLIST)) {
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ATTLIST)) {
state->handler = attlist0;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ELEMENT)) {
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_ELEMENT)) {
state->handler = element0;
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_NOTATION)) {
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
end,
KW_NOTATION)) {
state->handler = notation0;
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
}
break;
case XML_TOK_PI:
return XML_ROLE_PI;
case XML_TOK_COMMENT:
return XML_ROLE_NONE;
return XML_ROLE_COMMENT;
case XML_TOK_PARAM_ENTITY_REF:
return XML_ROLE_PARAM_ENTITY_REF;
case XML_TOK_CLOSE_BRACKET:
state->handler = doctype5;
return XML_ROLE_NONE;
return XML_ROLE_DOCTYPE_NONE;
}
return common(state, tok);
}
#ifdef XML_DTD
static
int externalSubset0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
externalSubset0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
state->handler = externalSubset1;
if (tok == XML_TOK_XML_DECL)
......@@ -356,12 +387,12 @@ int externalSubset0(PROLOG_STATE *state,
return externalSubset1(state, tok, ptr, end, enc);
}
static
int externalSubset1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
externalSubset1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_COND_SECT_OPEN:
......@@ -388,19 +419,19 @@ int externalSubset1(PROLOG_STATE *state,
#endif /* XML_DTD */
static
int entity0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_PERCENT:
state->handler = entity1;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME:
state->handler = entity2;
return XML_ROLE_GENERAL_ENTITY_NAME;
......@@ -408,16 +439,16 @@ int entity0(PROLOG_STATE *state,
return common(state, tok);
}
static
int entity1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME:
state->handler = entity7;
return XML_ROLE_PARAM_ENTITY_NAME;
......@@ -425,43 +456,44 @@ int entity1(PROLOG_STATE *state,
return common(state, tok);
}
static
int entity2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity4;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity3;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
break;
case XML_TOK_LITERAL:
state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_VALUE;
}
return common(state, tok);
}
static
int entity3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL:
state->handler = entity4;
return XML_ROLE_ENTITY_PUBLIC_ID;
......@@ -469,17 +501,16 @@ int entity3(PROLOG_STATE *state,
return common(state, tok);
}
static
int entity4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL:
state->handler = entity5;
return XML_ROLE_ENTITY_SYSTEM_ID;
......@@ -487,83 +518,85 @@ int entity4(PROLOG_STATE *state,
return common(state, tok);
}
static
int entity5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
return XML_ROLE_ENTITY_COMPLETE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
state->handler = entity6;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
break;
}
return common(state, tok);
}
static
int entity6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME:
state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_NOTATION_NAME;
}
return common(state, tok);
}
static
int entity7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = entity9;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = entity8;
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
}
break;
case XML_TOK_LITERAL:
state->handler = declClose;
state->role_none = XML_ROLE_ENTITY_NONE;
return XML_ROLE_ENTITY_VALUE;
}
return common(state, tok);
}
static
int entity8(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity8(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL:
state->handler = entity9;
return XML_ROLE_ENTITY_PUBLIC_ID;
......@@ -571,33 +604,50 @@ int entity8(PROLOG_STATE *state,
return common(state, tok);
}
static
int entity9(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity9(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
state->handler = entity10;
return XML_ROLE_ENTITY_SYSTEM_ID;
}
return common(state, tok);
}
static
int notation0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
entity10(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ENTITY_NONE;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
return XML_ROLE_ENTITY_COMPLETE;
}
return common(state, tok);
}
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NOTATION_NONE;
case XML_TOK_NAME:
state->handler = notation1;
return XML_ROLE_NOTATION_NAME;
......@@ -605,40 +655,40 @@ int notation0(PROLOG_STATE *state,
return common(state, tok);
}
static
int notation1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
notation1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
state->handler = notation3;
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
state->handler = notation2;
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
}
break;
}
return common(state, tok);
}
static
int notation2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
notation2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL:
state->handler = notation4;
return XML_ROLE_NOTATION_PUBLIC_ID;
......@@ -646,35 +696,37 @@ int notation2(PROLOG_STATE *state,
return common(state, tok);
}
static
int notation3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
notation3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
state->role_none = XML_ROLE_NOTATION_NONE;
return XML_ROLE_NOTATION_SYSTEM_ID;
}
return common(state, tok);
}
static
int notation4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
notation4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_NOTATION_NONE;
case XML_TOK_LITERAL:
state->handler = declClose;
state->role_none = XML_ROLE_NOTATION_NONE;
return XML_ROLE_NOTATION_SYSTEM_ID;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
......@@ -683,16 +735,16 @@ int notation4(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist1;
......@@ -701,19 +753,19 @@ int attlist0(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = attlist2;
......@@ -722,20 +774,20 @@ int attlist1(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME:
{
static const char *types[] = {
KW_CDATA,
KW_CDATA,
KW_ID,
KW_IDREF,
KW_IDREFS,
......@@ -746,33 +798,33 @@ int attlist2(PROLOG_STATE *state,
};
int i;
for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
state->handler = attlist8;
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
}
if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
state->handler = attlist8;
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
}
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
state->handler = attlist5;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
break;
case XML_TOK_OPEN_PAREN:
state->handler = attlist3;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
return common(state, tok);
}
static
int attlist3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NMTOKEN:
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
......@@ -782,54 +834,53 @@ int attlist3(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_CLOSE_PAREN:
state->handler = attlist8;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OR:
state->handler = attlist3;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
return common(state, tok);
}
static
int attlist5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OPEN_PAREN:
state->handler = attlist6;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
return common(state, tok);
}
static
int attlist6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_NAME:
state->handler = attlist7;
return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
......@@ -837,58 +888,58 @@ int attlist6(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_CLOSE_PAREN:
state->handler = attlist8;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_OR:
state->handler = attlist6;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
return common(state, tok);
}
/* default value */
static
int attlist8(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist8(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_IMPLIED)) {
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_IMPLIED)) {
state->handler = attlist1;
return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_REQUIRED)) {
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_REQUIRED)) {
state->handler = attlist1;
return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
}
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_FIXED)) {
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_FIXED)) {
state->handler = attlist9;
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
}
break;
case XML_TOK_LITERAL:
......@@ -898,16 +949,16 @@ int attlist8(PROLOG_STATE *state,
return common(state, tok);
}
static
int attlist9(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
attlist9(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ATTLIST_NONE;
case XML_TOK_LITERAL:
state->handler = attlist1;
return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
......@@ -915,16 +966,16 @@ int attlist9(PROLOG_STATE *state,
return common(state, tok);
}
static
int element0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element1;
......@@ -933,23 +984,25 @@ int element0(PROLOG_STATE *state,
return common(state, tok);
}
static
int element1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_CONTENT_EMPTY;
}
if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_CONTENT_ANY;
}
break;
......@@ -961,21 +1014,21 @@ int element1(PROLOG_STATE *state,
return common(state, tok);
}
static
int element2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_POUND_NAME:
if (XmlNameMatchesAscii(enc,
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_PCDATA)) {
ptr + MIN_BYTES_PER_CHAR(enc),
end,
KW_PCDATA)) {
state->handler = element3;
return XML_ROLE_CONTENT_PCDATA;
}
......@@ -1001,39 +1054,41 @@ int element2(PROLOG_STATE *state,
return common(state, tok);
}
static
int element3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element3(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN:
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE;
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR:
state->handler = element4;
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
}
return common(state, tok);
}
static
int element4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element4(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_NAME:
case XML_TOK_PREFIXED_NAME:
state->handler = element5;
......@@ -1042,36 +1097,37 @@ int element4(PROLOG_STATE *state,
return common(state, tok);
}
static
int element5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element5(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR:
state->handler = element4;
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
}
return common(state, tok);
}
static
int element6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element6(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_OPEN_PAREN:
state->level += 1;
return XML_ROLE_GROUP_OPEN;
......@@ -1092,35 +1148,43 @@ int element6(PROLOG_STATE *state,
return common(state, tok);
}
static
int element7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
element7(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return XML_ROLE_ELEMENT_NONE;
case XML_TOK_CLOSE_PAREN:
state->level -= 1;
if (state->level == 0)
if (state->level == 0) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE;
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->level -= 1;
if (state->level == 0)
if (state->level == 0) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_CLOSE_PAREN_QUESTION:
state->level -= 1;
if (state->level == 0)
if (state->level == 0) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_OPT;
case XML_TOK_CLOSE_PAREN_PLUS:
state->level -= 1;
if (state->level == 0)
if (state->level == 0) {
state->handler = declClose;
state->role_none = XML_ROLE_ELEMENT_NONE;
}
return XML_ROLE_GROUP_CLOSE_PLUS;
case XML_TOK_COMMA:
state->handler = element6;
......@@ -1134,12 +1198,12 @@ int element7(PROLOG_STATE *state,
#ifdef XML_DTD
static
int condSect0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
condSect0(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
......@@ -1158,12 +1222,12 @@ int condSect0(PROLOG_STATE *state,
return common(state, tok);
}
static
int condSect1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
condSect1(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
......@@ -1176,12 +1240,12 @@ int condSect1(PROLOG_STATE *state,
return common(state, tok);
}
static
int condSect2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
condSect2(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
......@@ -1195,55 +1259,35 @@ int condSect2(PROLOG_STATE *state,
#endif /* XML_DTD */
static
int declClose(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
declClose(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_PROLOG_S:
return XML_ROLE_NONE;
return state->role_none;
case XML_TOK_DECL_CLOSE:
setTopLevel(state);
return XML_ROLE_NONE;
return state->role_none;
}
return common(state, tok);
}
#if 0
static
int ignore(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return 0;
default:
return XML_ROLE_NONE;
}
return common(state, tok);
}
#endif
static
int error(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
static int PTRCALL
error(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
return XML_ROLE_NONE;
}
static
int common(PROLOG_STATE *state, int tok)
static int FASTCALL
common(PROLOG_STATE *state, int tok)
{
#ifdef XML_DTD
if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
......@@ -1253,18 +1297,21 @@ int common(PROLOG_STATE *state, int tok)
return XML_ROLE_ERROR;
}
void XmlPrologStateInit(PROLOG_STATE *state)
void
XmlPrologStateInit(PROLOG_STATE *state)
{
state->handler = prolog0;
#ifdef XML_DTD
state->documentEntity = 1;
state->includeLevel = 0;
state->inEntityValue = 0;
#endif /* XML_DTD */
}
#ifdef XML_DTD
void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
void
XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
{
state->handler = externalSubset0;
state->documentEntity = 0;
......
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlRole_INCLUDED
#define XmlRole_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h"
#ifdef __cplusplus
......@@ -17,6 +22,7 @@ enum {
XML_ROLE_NONE = 0,
XML_ROLE_XML_DECL,
XML_ROLE_INSTANCE_START,
XML_ROLE_DOCTYPE_NONE,
XML_ROLE_DOCTYPE_NAME,
XML_ROLE_DOCTYPE_SYSTEM_ID,
XML_ROLE_DOCTYPE_PUBLIC_ID,
......@@ -24,11 +30,13 @@ enum {
XML_ROLE_DOCTYPE_CLOSE,
XML_ROLE_GENERAL_ENTITY_NAME,
XML_ROLE_PARAM_ENTITY_NAME,
XML_ROLE_ENTITY_NONE,
XML_ROLE_ENTITY_VALUE,
XML_ROLE_ENTITY_SYSTEM_ID,
XML_ROLE_ENTITY_PUBLIC_ID,
XML_ROLE_ENTITY_COMPLETE,
XML_ROLE_ENTITY_NOTATION_NAME,
XML_ROLE_NOTATION_NONE,
XML_ROLE_NOTATION_NAME,
XML_ROLE_NOTATION_SYSTEM_ID,
XML_ROLE_NOTATION_NO_SYSTEM_ID,
......@@ -44,11 +52,13 @@ enum {
XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS,
XML_ROLE_ATTRIBUTE_ENUM_VALUE,
XML_ROLE_ATTRIBUTE_NOTATION_VALUE,
XML_ROLE_ATTLIST_NONE,
XML_ROLE_ATTLIST_ELEMENT_NAME,
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE,
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE,
XML_ROLE_DEFAULT_ATTRIBUTE_VALUE,
XML_ROLE_FIXED_ATTRIBUTE_VALUE,
XML_ROLE_ELEMENT_NONE,
XML_ROLE_ELEMENT_NAME,
XML_ROLE_CONTENT_ANY,
XML_ROLE_CONTENT_EMPTY,
......@@ -64,6 +74,8 @@ enum {
XML_ROLE_CONTENT_ELEMENT_REP,
XML_ROLE_CONTENT_ELEMENT_OPT,
XML_ROLE_CONTENT_ELEMENT_PLUS,
XML_ROLE_PI,
XML_ROLE_COMMENT,
#ifdef XML_DTD
XML_ROLE_TEXT_DECL,
XML_ROLE_IGNORE_SECT,
......@@ -73,15 +85,17 @@ enum {
};
typedef struct prolog_state {
int (*handler)(struct prolog_state *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
int (PTRCALL *handler) (struct prolog_state *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc);
unsigned level;
int role_none;
#ifdef XML_DTD
unsigned includeLevel;
int documentEntity;
int inEntityValue;
#endif /* XML_DTD */
} PROLOG_STATE;
......
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifdef COMPILED_FROM_DSP
# include "winconfig.h"
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
/* Unused - MvL
#include <expat_config.h>
*/
#endif /* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmltok.h"
#include "nametab.h"
......@@ -39,24 +41,25 @@ See the file COPYING for copying permission.
#define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits
between the bottom 5 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask. */
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
pages, 3 bits to add to that index and 5 bits to generate the mask.
*/
#define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits
between the bottom 4, 6 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask. */
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
into pages, 3 bits to add to that index and 5 bits to generate the
mask.
*/
#define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F)))
......@@ -68,59 +71,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0))
#define UTF8_INVALID3(p) \
((*p) == 0xED \
? (((p)[1] & 0x20) != 0) \
: ((*p) == 0xEF \
? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
: 0))
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
Implementation details:
(A & 0x80) == 0 means A < 0x80
and
(A & 0xC0) == 0xC0 means A > 0xBF
*/
#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
#define UTF8_INVALID2(p) \
((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
static
int isNever(const ENCODING *enc, const char *p)
#define UTF8_INVALID3(p) \
(((p)[2] & 0x80) == 0 \
|| \
((*p) == 0xEF && (p)[1] == 0xBF \
? \
(p)[2] > 0xBD \
: \
((p)[2] & 0xC0) == 0xC0) \
|| \
((*p) == 0xE0 \
? \
(p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) \
(((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
|| \
((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
|| \
((*p) == 0xF0 \
? \
(p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
isNever(const ENCODING *enc, const char *p)
{
return 0;
}
static
int utf8_isName2(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isName2(const ENCODING *enc, const char *p)
{
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
}
static
int utf8_isName3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isName3(const ENCODING *enc, const char *p)
{
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
}
#define utf8_isName4 isNever
static
int utf8_isNmstrt2(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING *enc, const char *p)
{
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
}
static
int utf8_isNmstrt3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING *enc, const char *p)
{
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
}
#define utf8_isNmstrt4 isNever
#define utf8_isInvalid2 isNever
static int PTRFASTCALL
utf8_isInvalid2(const ENCODING *enc, const char *p)
{
return UTF8_INVALID2((const unsigned char *)p);
}
static
int utf8_isInvalid3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isInvalid3(const ENCODING *enc, const char *p)
{
return UTF8_INVALID3((const unsigned char *)p);
}
static
int utf8_isInvalid4(const ENCODING *enc, const char *p)
static int PTRFASTCALL
utf8_isInvalid4(const ENCODING *enc, const char *p)
{
return UTF8_INVALID4((const unsigned char *)p);
}
......@@ -129,23 +170,25 @@ struct normal_encoding {
ENCODING enc;
unsigned char type[256];
#ifdef XML_MIN_SIZE
int (*byteType)(const ENCODING *, const char *);
int (*isNameMin)(const ENCODING *, const char *);
int (*isNmstrtMin)(const ENCODING *, const char *);
int (*byteToAscii)(const ENCODING *, const char *);
int (*charMatches)(const ENCODING *, const char *, int);
int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
#endif /* XML_MIN_SIZE */
int (*isName2)(const ENCODING *, const char *);
int (*isName3)(const ENCODING *, const char *);
int (*isName4)(const ENCODING *, const char *);
int (*isNmstrt2)(const ENCODING *, const char *);
int (*isNmstrt3)(const ENCODING *, const char *);
int (*isNmstrt4)(const ENCODING *, const char *);
int (*isInvalid2)(const ENCODING *, const char *);
int (*isInvalid3)(const ENCODING *, const char *);
int (*isInvalid4)(const ENCODING *, const char *);
int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
};
#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
#ifdef XML_MIN_SIZE
#define STANDARD_VTABLE(E) \
......@@ -172,7 +215,7 @@ struct normal_encoding {
E ## isInvalid3, \
E ## isInvalid4
static int checkCharRefNumber(int);
static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h"
#include "ascii.h"
......@@ -193,22 +236,22 @@ static int checkCharRefNumber(int);
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static
int sb_byteType(const ENCODING *enc, const char *p)
static int PTRFASTCALL
sb_byteType(const ENCODING *enc, const char *p)
{
return SB_BYTE_TYPE(enc, p);
}
#define BYTE_TYPE(enc, p) \
(((const struct normal_encoding *)(enc))->byteType(enc, p))
(AS_NORMAL_ENCODING(enc)->byteType(enc, p))
#else
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
#endif
#ifdef XML_MIN_SIZE
#define BYTE_TO_ASCII(enc, p) \
(((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
static
int sb_byteToAscii(const ENCODING *enc, const char *p)
(AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
static int PTRFASTCALL
sb_byteToAscii(const ENCODING *enc, const char *p)
{
return *p;
}
......@@ -217,17 +260,17 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#endif
#define IS_NAME_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isName ## n(enc, p))
(AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
(AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
#define IS_INVALID_CHAR(enc, p, n) \
(((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
(AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
#ifdef XML_MIN_SIZE
#define IS_NAME_CHAR_MINBPC(enc, p) \
(((const struct normal_encoding *)(enc))->isNameMin(enc, p))
(AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
(((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
(AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
#else
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
......@@ -235,9 +278,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#ifdef XML_MIN_SIZE
#define CHAR_MATCHES(enc, p, c) \
(((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
static
int sb_charMatches(const ENCODING *enc, const char *p, int c)
(AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
static int PTRCALL
sb_charMatches(const ENCODING *enc, const char *p, int c)
{
return *p == c;
}
......@@ -266,10 +309,10 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0
};
static
void utf8_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
static void PTRCALL
utf8_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
char *to;
const char *from;
......@@ -277,7 +320,7 @@ void utf8_toUtf8(const ENCODING *enc,
/* Avoid copying partial characters. */
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
break;
break;
}
for (to = *toP, from = *fromP; from != fromLim; from++, to++)
*to = *from;
......@@ -285,34 +328,36 @@ void utf8_toUtf8(const ENCODING *enc,
*toP = to;
}
static
void utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
static void PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
unsigned short *to = *toP;
const char *from = *fromP;
while (from != fromLim && to != toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2:
*to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2;
break;
case BT_LEAD3:
*to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3;
break;
case BT_LEAD4:
{
unsigned long n;
if (to + 1 == toLim)
break;
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
to[0] = (unsigned short)((n >> 10) | 0xD800);
to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
to += 2;
from += 4;
unsigned long n;
if (to + 1 == toLim)
goto after;
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
to[0] = (unsigned short)((n >> 10) | 0xD800);
to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
to += 2;
from += 4;
}
break;
default:
......@@ -320,6 +365,7 @@ void utf8_toUtf16(const ENCODING *enc,
break;
}
}
after:
*fromP = from;
*toP = to;
}
......@@ -370,10 +416,10 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
static
void latin1_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
static void PTRCALL
latin1_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
for (;;) {
unsigned char c;
......@@ -382,23 +428,23 @@ void latin1_toUtf8(const ENCODING *enc,
c = (unsigned char)**fromP;
if (c & 0x80) {
if (toLim - *toP < 2)
break;
*(*toP)++ = ((c >> 6) | UTF8_cval2);
*(*toP)++ = ((c & 0x3f) | 0x80);
break;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++;
}
else {
if (*toP == toLim)
break;
break;
*(*toP)++ = *(*fromP)++;
}
}
}
static
void latin1_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
static void PTRCALL
latin1_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
while (*fromP != fromLim && *toP != toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
......@@ -428,10 +474,10 @@ static const struct normal_encoding latin1_encoding = {
STANDARD_VTABLE(sb_)
};
static
void ascii_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
static void PTRCALL
ascii_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
while (*fromP != fromLim && *toP != toLim)
*(*toP)++ = *(*fromP)++;
......@@ -461,7 +507,8 @@ static const struct normal_encoding ascii_encoding = {
STANDARD_VTABLE(sb_)
};
static int unicode_byte_type(char hi, char lo)
static int PTRFASTCALL
unicode_byte_type(char hi, char lo)
{
switch ((unsigned char)hi) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
......@@ -480,10 +527,10 @@ static int unicode_byte_type(char hi, char lo)
}
#define DEFINE_UTF16_TO_UTF8(E) \
static \
void E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
static void PTRCALL \
E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
const char *from; \
for (from = *fromP; from != fromLim; from += 2) { \
......@@ -496,7 +543,7 @@ void E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
return; \
return; \
} \
*(*toP)++ = lo; \
break; \
......@@ -506,7 +553,7 @@ void E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
return; \
return; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
......@@ -514,7 +561,7 @@ void E ## toUtf8(const ENCODING *enc, \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
return; \
return; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
......@@ -523,8 +570,8 @@ void E ## toUtf8(const ENCODING *enc, \
break; \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
return; \
*fromP = from; \
return; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
......@@ -532,9 +579,9 @@ void E ## toUtf8(const ENCODING *enc, \
from += 2; \
lo2 = GET_LO(from); \
*(*toP)++ = (((lo & 0x3) << 4) \
| ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) \
| 0x80); \
| ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) \
| 0x80); \
*(*toP)++ = ((lo2 & 0x3f) | 0x80); \
break; \
} \
......@@ -543,10 +590,10 @@ void E ## toUtf8(const ENCODING *enc, \
}
#define DEFINE_UTF16_TO_UTF16(E) \
static \
void E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
static void PTRCALL \
E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
......@@ -593,32 +640,32 @@ DEFINE_UTF16_TO_UTF16(big2_)
#ifdef XML_MIN_SIZE
static
int little2_byteType(const ENCODING *enc, const char *p)
static int PTRFASTCALL
little2_byteType(const ENCODING *enc, const char *p)
{
return LITTLE2_BYTE_TYPE(enc, p);
}
static
int little2_byteToAscii(const ENCODING *enc, const char *p)
static int PTRFASTCALL
little2_byteToAscii(const ENCODING *enc, const char *p)
{
return LITTLE2_BYTE_TO_ASCII(enc, p);
}
static
int little2_charMatches(const ENCODING *enc, const char *p, int c)
static int PTRCALL
little2_charMatches(const ENCODING *enc, const char *p, int c)
{
return LITTLE2_CHAR_MATCHES(enc, p, c);
}
static
int little2_isNameMin(const ENCODING *enc, const char *p)
static int PTRFASTCALL
little2_isNameMin(const ENCODING *enc, const char *p)
{
return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
}
static
int little2_isNmstrtMin(const ENCODING *enc, const char *p)
static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING *enc, const char *p)
{
return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
}
......@@ -633,7 +680,7 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p)
#define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
......@@ -656,9 +703,9 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p)
#ifdef XML_NS
static const struct normal_encoding little2_encoding_ns = {
static const struct normal_encoding little2_encoding_ns = {
{ VTABLE, 2, 0,
#if XML_BYTE_ORDER == 12
#if BYTEORDER == 1234
1
#else
0
......@@ -673,9 +720,9 @@ static const struct normal_encoding little2_encoding_ns = {
#endif
static const struct normal_encoding little2_encoding = {
static const struct normal_encoding little2_encoding = {
{ VTABLE, 2, 0,
#if XML_BYTE_ORDER == 12
#if BYTEORDER == 1234
1
#else
0
......@@ -690,11 +737,11 @@ static const struct normal_encoding little2_encoding = {
STANDARD_VTABLE(little2_)
};
#if XML_BYTE_ORDER != 21
#if BYTEORDER != 4321
#ifdef XML_NS
static const struct normal_encoding internal_little2_encoding_ns = {
static const struct normal_encoding internal_little2_encoding_ns = {
{ VTABLE, 2, 0, 1 },
{
#include "iasciitab.h"
......@@ -705,7 +752,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#endif
static const struct normal_encoding internal_little2_encoding = {
static const struct normal_encoding internal_little2_encoding = {
{ VTABLE, 2, 0, 1 },
{
#define BT_COLON BT_NMSTRT
......@@ -732,32 +779,32 @@ static const struct normal_encoding internal_little2_encoding = {
#ifdef XML_MIN_SIZE
static
int big2_byteType(const ENCODING *enc, const char *p)
static int PTRFASTCALL
big2_byteType(const ENCODING *enc, const char *p)
{
return BIG2_BYTE_TYPE(enc, p);
}
static
int big2_byteToAscii(const ENCODING *enc, const char *p)
static int PTRFASTCALL
big2_byteToAscii(const ENCODING *enc, const char *p)
{
return BIG2_BYTE_TO_ASCII(enc, p);
}
static
int big2_charMatches(const ENCODING *enc, const char *p, int c)
static int PTRCALL
big2_charMatches(const ENCODING *enc, const char *p, int c)
{
return BIG2_CHAR_MATCHES(enc, p, c);
}
static
int big2_isNameMin(const ENCODING *enc, const char *p)
static int PTRFASTCALL
big2_isNameMin(const ENCODING *enc, const char *p)
{
return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
}
static
int big2_isNmstrtMin(const ENCODING *enc, const char *p)
static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING *enc, const char *p)
{
return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
}
......@@ -772,7 +819,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p)
#define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
......@@ -797,7 +844,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p)
static const struct normal_encoding big2_encoding_ns = {
{ VTABLE, 2, 0,
#if XML_BYTE_ORDER == 21
#if BYTEORDER == 4321
1
#else
0
......@@ -814,7 +861,7 @@ static const struct normal_encoding big2_encoding_ns = {
static const struct normal_encoding big2_encoding = {
{ VTABLE, 2, 0,
#if XML_BYTE_ORDER == 21
#if BYTEORDER == 4321
1
#else
0
......@@ -829,7 +876,7 @@ static const struct normal_encoding big2_encoding = {
STANDARD_VTABLE(big2_)
};
#if XML_BYTE_ORDER != 12
#if BYTEORDER != 1234
#ifdef XML_NS
......@@ -859,8 +906,8 @@ static const struct normal_encoding internal_big2_encoding = {
#undef PREFIX
static
int streqci(const char *s1, const char *s2)
static int FASTCALL
streqci(const char *s1, const char *s2)
{
for (;;) {
char c1 = *s1++;
......@@ -877,15 +924,15 @@ int streqci(const char *s1, const char *s2)
return 1;
}
static
void initUpdatePosition(const ENCODING *enc, const char *ptr,
const char *end, POSITION *pos)
static void PTRCALL
initUpdatePosition(const ENCODING *enc, const char *ptr,
const char *end, POSITION *pos)
{
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
}
static
int toAscii(const ENCODING *enc, const char *ptr, const char *end)
static int
toAscii(const ENCODING *enc, const char *ptr, const char *end)
{
char buf[1];
char *p = buf;
......@@ -896,34 +943,35 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end)
return buf[0];
}
static
int isSpace(int c)
static int FASTCALL
isSpace(int c)
{
switch (c) {
case 0x20:
case 0xD:
case 0xA:
case 0x9:
case 0x9:
return 1;
}
return 0;
}
/* Return 1 if there's just optional white space
or there's an S followed by name=val. */
static
int parsePseudoAttribute(const ENCODING *enc,
const char *ptr,
const char *end,
const char **namePtr,
const char **nameEndPtr,
const char **valPtr,
const char **nextTokPtr)
/* Return 1 if there's just optional white space or there's an S
followed by name=val.
*/
static int
parsePseudoAttribute(const ENCODING *enc,
const char *ptr,
const char *end,
const char **namePtr,
const char **nameEndPtr,
const char **valPtr,
const char **nextTokPtr)
{
int c;
char open;
if (ptr == end) {
*namePtr = 0;
*namePtr = NULL;
return 1;
}
if (!isSpace(toAscii(enc, ptr, end))) {
......@@ -934,7 +982,7 @@ int parsePseudoAttribute(const ENCODING *enc,
ptr += enc->minBytesPerChar;
} while (isSpace(toAscii(enc, ptr, end)));
if (ptr == end) {
*namePtr = 0;
*namePtr = NULL;
return 1;
}
*namePtr = ptr;
......@@ -951,11 +999,11 @@ int parsePseudoAttribute(const ENCODING *enc,
if (isSpace(c)) {
*nameEndPtr = ptr;
do {
ptr += enc->minBytesPerChar;
ptr += enc->minBytesPerChar;
} while (isSpace(c = toAscii(enc, ptr, end)));
if (c != ASCII_EQUALS) {
*nextTokPtr = ptr;
return 0;
*nextTokPtr = ptr;
return 0;
}
break;
}
......@@ -975,7 +1023,7 @@ int parsePseudoAttribute(const ENCODING *enc,
*nextTokPtr = ptr;
return 0;
}
open = c;
open = (char)c;
ptr += enc->minBytesPerChar;
*valPtr = ptr;
for (;; ptr += enc->minBytesPerChar) {
......@@ -983,11 +1031,11 @@ int parsePseudoAttribute(const ENCODING *enc,
if (c == open)
break;
if (!(ASCII_a <= c && c <= ASCII_z)
&& !(ASCII_A <= c && c <= ASCII_Z)
&& !(ASCII_0 <= c && c <= ASCII_9)
&& c != ASCII_PERIOD
&& c != ASCII_MINUS
&& c != ASCII_UNDERSCORE) {
&& !(ASCII_A <= c && c <= ASCII_Z)
&& !(ASCII_0 <= c && c <= ASCII_9)
&& c != ASCII_PERIOD
&& c != ASCII_MINUS
&& c != ASCII_UNDERSCORE) {
*nextTokPtr = ptr;
return 0;
}
......@@ -1005,7 +1053,8 @@ static const char KW_encoding[] = {
};
static const char KW_standalone[] = {
ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'
ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
ASCII_n, ASCII_e, '\0'
};
static const char KW_yes[] = {
......@@ -1016,27 +1065,28 @@ static const char KW_no[] = {
ASCII_n, ASCII_o, '\0'
};
static
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
const char *),
int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
static int
doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
const char *,
const char *),
int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
const char *val = 0;
const char *name = 0;
const char *nameEnd = 0;
const char *val = NULL;
const char *name = NULL;
const char *nameEnd = NULL;
ptr += 5 * enc->minBytesPerChar;
end -= 2 * enc->minBytesPerChar;
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) {
if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
|| !name) {
*badPtr = ptr;
return 0;
}
......@@ -1057,9 +1107,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
}
if (!name) {
if (isGeneralTextEntity) {
/* a TextDecl must have an EncodingDecl */
*badPtr = ptr;
return 0;
/* a TextDecl must have an EncodingDecl */
*badPtr = ptr;
return 0;
}
return 1;
}
......@@ -1081,7 +1131,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
if (!name)
return 1;
}
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) {
if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
|| isGeneralTextEntity) {
*badPtr = name;
return 0;
}
......@@ -1106,8 +1157,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
return 1;
}
static
int checkCharRefNumber(int result)
static int FASTCALL
checkCharRefNumber(int result)
{
switch (result >> 8) {
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
......@@ -1125,7 +1176,8 @@ int checkCharRefNumber(int result)
return result;
}
int XmlUtf8Encode(int c, char *buf)
int FASTCALL
XmlUtf8Encode(int c, char *buf)
{
enum {
/* minN is minimum legal resulting value for N byte sequence */
......@@ -1137,42 +1189,43 @@ int XmlUtf8Encode(int c, char *buf)
if (c < 0)
return 0;
if (c < min2) {
buf[0] = (c | UTF8_cval1);
buf[0] = (char)(c | UTF8_cval1);
return 1;
}
if (c < min3) {
buf[0] = ((c >> 6) | UTF8_cval2);
buf[1] = ((c & 0x3f) | 0x80);
buf[0] = (char)((c >> 6) | UTF8_cval2);
buf[1] = (char)((c & 0x3f) | 0x80);
return 2;
}
if (c < min4) {
buf[0] = ((c >> 12) | UTF8_cval3);
buf[1] = (((c >> 6) & 0x3f) | 0x80);
buf[2] = ((c & 0x3f) | 0x80);
buf[0] = (char)((c >> 12) | UTF8_cval3);
buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
buf[2] = (char)((c & 0x3f) | 0x80);
return 3;
}
if (c < 0x110000) {
buf[0] = ((c >> 18) | UTF8_cval4);
buf[1] = (((c >> 12) & 0x3f) | 0x80);
buf[2] = (((c >> 6) & 0x3f) | 0x80);
buf[3] = ((c & 0x3f) | 0x80);
buf[0] = (char)((c >> 18) | UTF8_cval4);
buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
buf[3] = (char)((c & 0x3f) | 0x80);
return 4;
}
return 0;
}
int XmlUtf16Encode(int charNum, unsigned short *buf)
int FASTCALL
XmlUtf16Encode(int charNum, unsigned short *buf)
{
if (charNum < 0)
return 0;
if (charNum < 0x10000) {
buf[0] = charNum;
buf[0] = (unsigned short)charNum;
return 1;
}
if (charNum < 0x110000) {
charNum -= 0x10000;
buf[0] = (charNum >> 10) + 0xD800;
buf[1] = (charNum & 0x3FF) + 0xDC00;
buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
return 2;
}
return 0;
......@@ -1186,65 +1239,68 @@ struct unknown_encoding {
char utf8[256][4];
};
int XmlSizeOfUnknownEncoding(void)
#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
int
XmlSizeOfUnknownEncoding(void)
{
return sizeof(struct unknown_encoding);
}
static
int unknown_isName(const ENCODING *enc, const char *p)
static int PTRFASTCALL
unknown_isName(const ENCODING *enc, const char *p)
{
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF)
return 0;
return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
}
static
int unknown_isNmstrt(const ENCODING *enc, const char *p)
static int PTRFASTCALL
unknown_isNmstrt(const ENCODING *enc, const char *p)
{
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
if (c & ~0xFFFF)
return 0;
return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
}
static
int unknown_isInvalid(const ENCODING *enc, const char *p)
static int PTRFASTCALL
unknown_isInvalid(const ENCODING *enc, const char *p)
{
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, p);
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
int c = uenc->convert(uenc->userData, p);
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
static
void unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
static void PTRCALL
unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
char buf[XML_UTF8_ENCODE_MAX];
for (;;) {
const char *utf8;
int n;
if (*fromP == fromLim)
break;
utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
int c = ((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
break;
break;
utf8 = buf;
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2);
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
if (n > toLim - *toP)
break;
break;
(*fromP)++;
}
do {
......@@ -1253,19 +1309,19 @@ void unknown_toUtf8(const ENCODING *enc,
}
}
static
void unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
static void PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
while (*fromP != fromLim && *toP != toLim) {
unsigned short c
= ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) {
c = (unsigned short)((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2);
c = (unsigned short)
uenc->convert(uenc->userData, *fromP);
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else
(*fromP)++;
......@@ -1275,18 +1331,18 @@ void unknown_toUtf16(const ENCODING *enc,
ENCODING *
XmlInitUnknownEncoding(void *mem,
int *table,
int (*convert)(void *userData, const char *p),
void *userData)
int *table,
CONVERTER convert,
void *userData)
{
int i;
struct unknown_encoding *e = mem;
struct unknown_encoding *e = (struct unknown_encoding *)mem;
for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
((char *)mem)[i] = ((char *)&latin1_encoding)[i];
for (i = 0; i < 128; i++)
if (latin1_encoding.type[i] != BT_OTHER
&& latin1_encoding.type[i] != BT_NONXML
&& table[i] != i)
&& table[i] != i)
return 0;
for (i = 0; i < 256; i++) {
int c = table[i];
......@@ -1299,20 +1355,20 @@ XmlInitUnknownEncoding(void *mem,
}
else if (c < 0) {
if (c < -4)
return 0;
e->normal.type[i] = BT_LEAD2 - (c + 2);
return 0;
e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0;
e->utf16[i] = 0;
}
else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
return 0;
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
return 0;
e->normal.type[i] = latin1_encoding.type[c];
e->utf8[i][0] = 1;
e->utf8[i][1] = (char)c;
e->utf16[i] = c == 0 ? 0xFFFF : c;
e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
}
else if (checkCharRefNumber(c) < 0) {
e->normal.type[i] = BT_NONXML;
......@@ -1323,15 +1379,15 @@ XmlInitUnknownEncoding(void *mem,
}
else {
if (c > 0xFFFF)
return 0;
return 0;
if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
e->normal.type[i] = BT_NMSTRT;
e->normal.type[i] = BT_NMSTRT;
else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
e->normal.type[i] = BT_NAME;
e->normal.type[i] = BT_NAME;
else
e->normal.type[i] = BT_OTHER;
e->normal.type[i] = BT_OTHER;
e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
e->utf16[i] = c;
e->utf16[i] = (unsigned short)c;
}
}
e->userData = userData;
......@@ -1367,26 +1423,30 @@ enum {
};
static const char KW_ISO_8859_1[] = {
ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'
ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
ASCII_MINUS, ASCII_1, '\0'
};
static const char KW_US_ASCII[] = {
ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0'
ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
'\0'
};
static const char KW_UTF_8[] = {
static const char KW_UTF_8[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
};
static const char KW_UTF_16[] = {
static const char KW_UTF_16[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
};
static const char KW_UTF_16BE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0'
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
'\0'
};
static const char KW_UTF_16LE[] = {
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0'
ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
'\0'
};
static
int getEncodingIndex(const char *name)
static int FASTCALL
getEncodingIndex(const char *name)
{
static const char *encodingNames[] = {
KW_ISO_8859_1,
......@@ -1397,7 +1457,7 @@ int getEncodingIndex(const char *name)
KW_UTF_16LE,
};
int i;
if (name == 0)
if (name == NULL)
return NO_ENC;
for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
if (streqci(name, encodingNames[i]))
......@@ -1405,27 +1465,28 @@ int getEncodingIndex(const char *name)
return UNKNOWN_ENC;
}
/* For binary compatibility, we store the index of the encoding specified
at initialization in the isUtf16 member. */
/* For binary compatibility, we store the index of the encoding
specified at initialization in the isUtf16 member.
*/
#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
/* This is what detects the encoding.
encodingTable maps from encoding indices to encodings;
INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
state is XML_CONTENT_STATE if we're parsing an external text entity,
and XML_PROLOG_STATE otherwise.
/* This is what detects the encoding. encodingTable maps from
encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
the external (protocol) specified encoding; state is
XML_CONTENT_STATE if we're parsing an external text entity, and
XML_PROLOG_STATE otherwise.
*/
static
int initScan(const ENCODING **encodingTable,
const INIT_ENCODING *enc,
int state,
const char *ptr,
const char *end,
const char **nextTokPtr)
static int
initScan(const ENCODING **encodingTable,
const INIT_ENCODING *enc,
int state,
const char *ptr,
const char *end,
const char **nextTokPtr)
{
const ENCODING **encPtr;
......@@ -1452,8 +1513,8 @@ int initScan(const ENCODING **encodingTable,
case 0xFF:
case 0xEF: /* possibly first byte of UTF-8 BOM */
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
break;
&& state == XML_CONTENT_STATE)
break;
/* fall through */
case 0x00:
case 0x3C:
......@@ -1464,23 +1525,23 @@ int initScan(const ENCODING **encodingTable,
switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
case 0xFEFF:
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
break;
&& state == XML_CONTENT_STATE)
break;
*nextTokPtr = ptr + 2;
*encPtr = encodingTable[UTF_16BE_ENC];
return XML_TOK_BOM;
/* 00 3C is handled in the default case */
case 0x3C00:
if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
|| INIT_ENC_INDEX(enc) == UTF_16_ENC)
&& state == XML_CONTENT_STATE)
break;
|| INIT_ENC_INDEX(enc) == UTF_16_ENC)
&& state == XML_CONTENT_STATE)
break;
*encPtr = encodingTable[UTF_16LE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
case 0xFFFE:
if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
&& state == XML_CONTENT_STATE)
break;
&& state == XML_CONTENT_STATE)
break;
*nextTokPtr = ptr + 2;
*encPtr = encodingTable[UTF_16LE_ENC];
return XML_TOK_BOM;
......@@ -1489,45 +1550,50 @@ int initScan(const ENCODING **encodingTable,
/* If there's an explicitly specified (external) encoding
of ISO-8859-1 or some flavour of UTF-16
and this is an external text entity,
don't look for the BOM,
because it might be a legal data. */
don't look for the BOM,
because it might be a legal data.
*/
if (state == XML_CONTENT_STATE) {
int e = INIT_ENC_INDEX(enc);
if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
break;
int e = INIT_ENC_INDEX(enc);
if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
|| e == UTF_16LE_ENC || e == UTF_16_ENC)
break;
}
if (ptr + 2 == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if ((unsigned char)ptr[2] == 0xBF) {
*nextTokPtr = ptr + 3;
*encPtr = encodingTable[UTF_8_ENC];
return XML_TOK_BOM;
*nextTokPtr = ptr + 3;
*encPtr = encodingTable[UTF_8_ENC];
return XML_TOK_BOM;
}
break;
default:
if (ptr[0] == '\0') {
/* 0 isn't a legal data character. Furthermore a document entity can only
start with ASCII characters. So the only way this can fail to be big-endian
UTF-16 if it it's an external parsed general entity that's labelled as
UTF-16LE. */
if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
break;
*encPtr = encodingTable[UTF_16BE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
/* 0 isn't a legal data character. Furthermore a document
entity can only start with ASCII characters. So the only
way this can fail to be big-endian UTF-16 if it it's an
external parsed general entity that's labelled as
UTF-16LE.
*/
if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
break;
*encPtr = encodingTable[UTF_16BE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
else if (ptr[1] == '\0') {
/* We could recover here in the case:
- parsing an external entity
- second byte is 0
- no externally specified encoding
- no encoding declaration
by assuming UTF-16LE. But we don't, because this would mean when
presented just with a single byte, we couldn't reliably determine
whether we needed further bytes. */
if (state == XML_CONTENT_STATE)
break;
*encPtr = encodingTable[UTF_16LE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
/* We could recover here in the case:
- parsing an external entity
- second byte is 0
- no externally specified encoding
- no encoding declaration
by assuming UTF-16LE. But we don't, because this would mean when
presented just with a single byte, we couldn't reliably determine
whether we needed further bytes.
*/
if (state == XML_CONTENT_STATE)
break;
*encPtr = encodingTable[UTF_16LE_ENC];
return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
break;
}
......@@ -1555,9 +1621,9 @@ int initScan(const ENCODING **encodingTable,
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
int (*convert)(void *userData, const char *p),
void *userData)
int *table,
CONVERTER convert,
void *userData)
{
ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
if (enc)
......
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlTok_INCLUDED
......@@ -11,19 +10,21 @@ extern "C" {
#endif
/* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of
illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
#define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
*/
#define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
......@@ -33,22 +34,24 @@ extern "C" {
#define XML_TOK_DATA_NEWLINE 7
#define XML_TOK_CDATA_SECT_OPEN 8
#define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10 /* numeric character reference */
#define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13
#define XML_TOK_BOM 14 /* Byte order mark */
#define XML_TOK_BOM 14 /* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_NAME 18
#define XML_TOK_NMTOKEN 19
#define XML_TOK_POUND_NAME 20 /* #name */
#define XML_TOK_OR 21 /* | */
#define XML_TOK_POUND_NAME 20 /* #name */
#define XML_TOK_OR 21 /* | */
#define XML_TOK_PERCENT 22
#define XML_TOK_OPEN_PAREN 23
#define XML_TOK_CLOSE_PAREN 24
......@@ -59,14 +62,14 @@ extern "C" {
#define XML_TOK_INSTANCE_START 29
/* The following occur only in element type declarations */
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_NAME_QUESTION 30 /* name? */
#define XML_TOK_NAME_ASTERISK 31 /* name* */
#define XML_TOK_NAME_PLUS 32 /* name+ */
#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
#define XML_TOK_COMMA 38
/* The following token is returned only by XmlAttributeValueTok */
......@@ -75,8 +78,9 @@ extern "C" {
/* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok
for a name with a colon. */
/* With namespace processing this is returned by XmlPrologTok for a
name with a colon.
*/
#define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD
......@@ -121,64 +125,73 @@ typedef struct {
struct encoding;
typedef struct encoding ENCODING;
typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char *,
const char **);
struct encoding {
int (*scanners[XML_N_STATES])(const ENCODING *,
const char *,
const char *,
const char **);
int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *,
const char *,
const char *,
const char **);
int (*sameName)(const ENCODING *,
const char *, const char *);
int (*nameMatchesAscii)(const ENCODING *,
const char *, const char *, const char *);
int (*nameLength)(const ENCODING *, const char *);
const char *(*skipS)(const ENCODING *, const char *);
int (*getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts);
int (*charRefNumber)(const ENCODING *enc, const char *ptr);
int (*predefinedEntityName)(const ENCODING *, const char *, const char *);
void (*updatePosition)(const ENCODING *,
const char *ptr,
const char *end,
POSITION *);
int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr);
void (*utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
void (*utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
const unsigned short *toLim);
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *sameName)(const ENCODING *,
const char *,
const char *);
int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *,
const char *,
const char *);
int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
int (PTRCALL *getAtts)(const ENCODING *enc,
const char *ptr,
int attsMax,
ATTRIBUTE *atts);
int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int (PTRCALL *predefinedEntityName)(const ENCODING *,
const char *,
const char *);
void (PTRCALL *updatePosition)(const ENCODING *,
const char *ptr,
const char *end,
POSITION *);
int (PTRCALL *isPublicId)(const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr);
void (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
void (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
const unsigned short *toLim);
int minBytesPerChar;
char isUtf8;
char isUtf16;
};
/*
Scan the string starting at ptr until the end of the next complete token,
but do not scan past eptr. Return an integer giving the type of token.
/* Scan the string starting at ptr until the end of the next complete
token, but do not scan past eptr. Return an integer giving the
type of token.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete token;
nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete
token; nextTokPtr will not be set.
Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
will be set to point to the character which made the token invalid.
Return XML_TOK_INVALID when the string does not start a valid
token; nextTokPtr will be set to point to the character which made
the token invalid.
Otherwise the string starts with a valid token; nextTokPtr will be set to point
to the character following the end of that token.
Otherwise the string starts with a valid token; nextTokPtr will be
set to point to the character following the end of that token.
Each data character counts as a single token, but adjacent data characters
may be returned together. Similarly for characters in the prolog outside
literals, comments and processing instructions.
Each data character counts as a single token, but adjacent data
characters may be returned together. Similarly for characters in
the prolog outside literals, comments and processing instructions.
*/
......@@ -201,9 +214,9 @@ literals, comments and processing instructions.
#endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on
the content of a literal that has already been returned by XmlTok. */
/* This is used for performing a 2nd-level tokenization on the content
of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
......@@ -250,48 +263,51 @@ typedef struct {
const ENCODING **encPtr;
} INIT_ENCODING;
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void);
int XmlUtf8Encode(int charNumber, char *buf);
int XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void);
ENCODING *
int XmlParseXmlDecl(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void);
int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void);
typedef int (*CONVERTER)(void *userData, const char *p);
ENCODING *
XmlInitUnknownEncoding(void *mem,
int *table,
int (*conv)(void *userData, const char *p),
void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *
int *table,
CONVERTER convert,
void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr,
int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *
XmlInitUnknownEncodingNS(void *mem,
int *table,
int (*conv)(void *userData, const char *p),
void *userData);
int *table,
CONVERTER convert,
void *userData);
#ifdef __cplusplus
}
#endif
......
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef IS_INVALID_CHAR
......@@ -10,7 +9,7 @@ See the file COPYING for copying permission.
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
return XML_TOK_PARTIAL_CHAR; \
if (IS_INVALID_CHAR(enc, ptr, n)) { \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID; \
......@@ -87,9 +86,9 @@ See the file COPYING for copying permission.
/* ptr points to character following "<!-" */
static
int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr != end) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
......@@ -101,22 +100,22 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_COMMENT;
}
break;
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_COMMENT;
}
break;
default:
ptr += MINBPC(enc);
break;
ptr += MINBPC(enc);
break;
}
}
}
......@@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<!" */
static
int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
......@@ -149,12 +148,12 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT:
if (ptr + MINBPC(enc) == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
/* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* fall through */
case BT_S: case BT_CR: case BT_LF:
......@@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
}
static
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
static int PTRCALL
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
const char *end, int *tokPtr)
{
int upper = 0;
*tokPtr = XML_TOK_PI;
......@@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<?" */
static
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
int tok;
const char *target = ptr;
......@@ -235,39 +235,39 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
}
break;
default:
ptr += MINBPC(enc);
break;
}
case BT_QUEST:
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
}
break;
default:
ptr += MINBPC(enc);
break;
}
}
return XML_TOK_PARTIAL;
case BT_QUEST:
if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
*nextTokPtr = ptr + MINBPC(enc);
return tok;
}
/* fall through */
default:
......@@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
}
static
int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i;
/* CDATA[ */
if (end - ptr < 6 * MINBPC(enc))
......@@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e
return XML_TOK_CDATA_SECT_OPEN;
}
static
int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_NONE;
......@@ -309,7 +309,7 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
if (n & (MINBPC(enc) - 1)) {
n &= ~(MINBPC(enc) - 1);
if (n == 0)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
end = ptr + n;
}
}
......@@ -350,8 +350,8 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
......@@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
/* ptr points to character following "</" */
static
int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
......@@ -393,21 +393,22 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
break;
case BT_GT:
*nextTokPtr = ptr + MINBPC(enc);
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
break;
case BT_GT:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_END_TAG;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
#ifdef XML_NS
case BT_COLON:
/* no need to check qname syntax here, since end-tag must match exactly */
/* no need to check qname syntax here,
since end-tag must match exactly */
ptr += MINBPC(enc);
break;
#endif
......@@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&#X" */
static
int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
......@@ -441,13 +442,13 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
break;
break;
case BT_SEMI:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_CHAR_REF;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
......@@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
/* ptr points to character following "&#" */
static
int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
if (ptr != end) {
if (CHAR_MATCHES(enc, ptr, ASCII_x))
......@@ -473,13 +474,13 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
break;
case BT_SEMI:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_CHAR_REF;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
......@@ -488,9 +489,9 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&" */
static
int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
......@@ -518,9 +519,9 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following first character of attribute name */
static
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XML_NS
int hadColon = 0;
......@@ -531,142 +532,141 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
#endif
case BT_S: case BT_CR: case BT_LF:
for (;;) {
int t;
int t;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
switch (t) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
switch (t) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_EQUALS:
{
int open;
int open;
#ifdef XML_NS
hadColon = 0;
hadColon = 0;
#endif
for (;;) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
switch (open) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
ptr += MINBPC(enc);
/* in attribute value */
for (;;) {
int t;
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
case BT_AMP:
{
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
ptr += MINBPC(enc);
break;
}
}
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_SOL:
goto sol;
case BT_GT:
goto gt;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
continue;
case BT_GT:
for (;;) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
switch (open) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
ptr += MINBPC(enc);
/* in attribute value */
for (;;) {
int t;
if (ptr == end)
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
case BT_AMP:
{
int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
ptr += MINBPC(enc);
break;
}
}
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_SOL:
goto sol;
case BT_GT:
goto gt;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
continue;
case BT_GT:
gt:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_START_TAG_WITH_ATTS;
case BT_SOL:
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_START_TAG_WITH_ATTS;
case BT_SOL:
sol:
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
}
break;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
}
break;
}
default:
*nextTokPtr = ptr;
......@@ -678,9 +678,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<" */
static
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
#ifdef XML_NS
int hadColon;
......@@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_LSQB:
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
......@@ -718,13 +719,13 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
......@@ -736,23 +737,23 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC(enc);
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
case BT_S: case BT_CR: case BT_LF:
ptr += MINBPC(enc);
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
}
return XML_TOK_PARTIAL;
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
case BT_S: case BT_CR: case BT_LF:
ptr += MINBPC(enc);
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
}
return XML_TOK_PARTIAL;
}
case BT_GT:
gt:
......@@ -762,10 +763,10 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
sol:
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
......@@ -777,9 +778,9 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
}
static
int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_NONE;
......@@ -788,7 +789,7 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
if (n & (MINBPC(enc) - 1)) {
n &= ~(MINBPC(enc) - 1);
if (n == 0)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
end = ptr + n;
}
}
......@@ -833,8 +834,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
......@@ -842,18 +843,18 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
#undef LEAD_CASE
case BT_RSQB:
if (ptr + MINBPC(enc) != end) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
if (ptr + 2*MINBPC(enc) != end) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
}
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_INVALID;
}
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
if (ptr + 2*MINBPC(enc) != end) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
}
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_AMP:
......@@ -876,9 +877,9 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "%" */
static
int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
......@@ -905,9 +906,9 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL;
}
static
int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
if (ptr == end)
return XML_TOK_PARTIAL;
......@@ -932,10 +933,10 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
return -XML_TOK_POUND_NAME;
}
static
int PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
{
while (ptr != end) {
int t = BYTE_TYPE(enc, ptr);
......@@ -945,16 +946,16 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
case BT_APOS:
ptr += MINBPC(enc);
if (t != open)
break;
break;
if (ptr == end)
return -XML_TOK_LITERAL;
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
case BT_GT: case BT_PERCNT: case BT_LSQB:
return XML_TOK_LITERAL;
return XML_TOK_LITERAL;
default:
return XML_TOK_INVALID;
return XML_TOK_INVALID;
}
default:
ptr += MINBPC(enc);
......@@ -964,9 +965,9 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
return XML_TOK_PARTIAL;
}
static
int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int tok;
if (ptr == end)
......@@ -976,7 +977,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
if (n & (MINBPC(enc) - 1)) {
n &= ~(MINBPC(enc) - 1);
if (n == 0)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
end = ptr + n;
}
}
......@@ -989,44 +990,47 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
{
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_QUEST:
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_NMSTRT:
case BT_HEX:
case BT_NONASCII:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
*nextTokPtr = ptr - MINBPC(enc);
return XML_TOK_INSTANCE_START;
*nextTokPtr = ptr - MINBPC(enc);
return XML_TOK_INSTANCE_START;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_CR:
if (ptr + MINBPC(enc) == end)
if (ptr + MINBPC(enc) == end) {
*nextTokPtr = end;
/* indicate that this might be part of a CR/LF pair */
return -XML_TOK_PROLOG_S;
}
/* fall through */
case BT_S: case BT_LF:
for (;;) {
ptr += MINBPC(enc);
if (ptr == end)
break;
break;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF:
break;
break;
case BT_CR:
/* don't split CR/LF pair */
if (ptr + MINBPC(enc) != end)
break;
/* fall through */
/* don't split CR/LF pair */
if (ptr + MINBPC(enc) != end)
break;
/* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
}
}
*nextTokPtr = ptr;
......@@ -1045,10 +1049,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if (ptr + MINBPC(enc) == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
}
}
*nextTokPtr = ptr;
......@@ -1147,40 +1151,40 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
switch (tok) {
case XML_TOK_NAME:
if (ptr == end)
return XML_TOK_PARTIAL;
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
default:
tok = XML_TOK_NMTOKEN;
break;
}
break;
if (ptr == end)
return XML_TOK_PARTIAL;
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
default:
tok = XML_TOK_NMTOKEN;
break;
}
break;
case XML_TOK_PREFIXED_NAME:
tok = XML_TOK_NMTOKEN;
break;
tok = XML_TOK_NMTOKEN;
break;
}
break;
#endif
case BT_PLUS:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_NAME_PLUS;
case BT_AST:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_NAME_ASTERISK;
case BT_QUEST:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_NAME_QUESTION;
......@@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return -tok;
}
static
int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
if (ptr == end)
......@@ -1208,7 +1212,7 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
#undef LEAD_CASE
case BT_AMP:
if (ptr == start)
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_LT:
......@@ -1217,27 +1221,27 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return XML_TOK_INVALID;
case BT_LF:
if (ptr == start) {
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_DATA_NEWLINE;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_S:
if (ptr == start) {
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_ATTRIBUTE_VALUE_S;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_ATTRIBUTE_VALUE_S;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
......@@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return XML_TOK_DATA_CHARS;
}
static
int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
if (ptr == end)
......@@ -1266,33 +1270,33 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#undef LEAD_CASE
case BT_AMP:
if (ptr == start)
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_PERCNT:
if (ptr == start) {
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_LF:
if (ptr == start) {
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_DATA_NEWLINE;
*nextTokPtr = ptr + MINBPC(enc);
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
ptr += MINBPC(enc);
if (ptr == end)
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
......@@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#ifdef XML_DTD
static
int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
int level = 0;
if (MINBPC(enc) > 1) {
......@@ -1324,30 +1328,30 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
}
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
}
}
break;
case BT_RSQB:
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
*nextTokPtr = ptr;
return XML_TOK_IGNORE_SECT;
}
--level;
}
if ((ptr += MINBPC(enc)) == end)
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
*nextTokPtr = ptr;
return XML_TOK_IGNORE_SECT;
}
--level;
}
}
break;
default:
......@@ -1360,9 +1364,9 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
#endif /* XML_DTD */
static
int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr)
static int PTRCALL
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr)
{
ptr += MINBPC(enc);
end -= MINBPC(enc);
......@@ -1392,22 +1396,22 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
break;
case BT_S:
if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
*badPtr = ptr;
return 0;
*badPtr = ptr;
return 0;
}
break;
case BT_NAME:
case BT_NMSTRT:
if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
break;
break;
default:
switch (BYTE_TO_ASCII(enc, ptr)) {
case 0x24: /* $ */
case 0x40: /* @ */
break;
break;
default:
*badPtr = ptr;
return 0;
*badPtr = ptr;
return 0;
}
break;
}
......@@ -1415,28 +1419,29 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
return 1;
}
/* This must only be called for a well-formed start-tag or empty element tag.
Returns the number of attributes. Pointers to the first attsMax attributes
are stored in atts. */
/* This must only be called for a well-formed start-tag or empty
element tag. Returns the number of attributes. Pointers to the
first attsMax attributes are stored in atts.
*/
static
int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts)
static int PTRCALL
PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts)
{
enum { other, inName, inValue } state = inName;
int nAtts = 0;
int open = 0; /* defined when state == inValue;
initialization just to shut up compilers */
initialization just to shut up compilers */
for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
#define START_NAME \
if (state == other) { \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
}
#define LEAD_CASE(n) \
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
......@@ -1450,47 +1455,47 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
#undef START_NAME
case BT_QUOT:
if (state != inValue) {
if (nAtts < attsMax)
atts[nAtts].valuePtr = ptr + MINBPC(enc);
if (nAtts < attsMax)
atts[nAtts].valuePtr = ptr + MINBPC(enc);
state = inValue;
open = BT_QUOT;
}
else if (open == BT_QUOT) {
state = other;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
nAtts++;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
nAtts++;
}
break;
case BT_APOS:
if (state != inValue) {
if (nAtts < attsMax)
atts[nAtts].valuePtr = ptr + MINBPC(enc);
if (nAtts < attsMax)
atts[nAtts].valuePtr = ptr + MINBPC(enc);
state = inValue;
open = BT_APOS;
}
else if (open == BT_APOS) {
state = other;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
nAtts++;
if (nAtts < attsMax)
atts[nAtts].valueEnd = ptr;
nAtts++;
}
break;
case BT_AMP:
if (nAtts < attsMax)
atts[nAtts].normalized = 0;
atts[nAtts].normalized = 0;
break;
case BT_S:
if (state == inName)
state = other;
else if (state == inValue
&& nAtts < attsMax
&& atts[nAtts].normalized
&& (ptr == atts[nAtts].valuePtr
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
atts[nAtts].normalized = 0;
&& nAtts < attsMax
&& atts[nAtts].normalized
&& (ptr == atts[nAtts].valuePtr
|| BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
|| BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
|| BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
atts[nAtts].normalized = 0;
break;
case BT_CR: case BT_LF:
/* This case ensures that the first attribute name is counted
......@@ -1498,12 +1503,12 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
if (state == inName)
state = other;
else if (state == inValue && nAtts < attsMax)
atts[nAtts].normalized = 0;
atts[nAtts].normalized = 0;
break;
case BT_GT:
case BT_SOL:
if (state != inValue)
return nAtts;
return nAtts;
break;
default:
break;
......@@ -1512,32 +1517,36 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
/* not reached */
}
static
int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{
int result = 0;
/* skip &# */
ptr += 2*MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
for (ptr += MINBPC(enc);
!CHAR_MATCHES(enc, ptr, ASCII_SEMI);
ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr);
switch (c) {
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
result <<= 4;
result |= (c - ASCII_0);
break;
case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
result <<= 4;
result += 10 + (c - ASCII_A);
break;
case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
result <<= 4;
result += 10 + (c - ASCII_a);
break;
result <<= 4;
result |= (c - ASCII_0);
break;
case ASCII_A: case ASCII_B: case ASCII_C:
case ASCII_D: case ASCII_E: case ASCII_F:
result <<= 4;
result += 10 + (c - ASCII_A);
break;
case ASCII_a: case ASCII_b: case ASCII_c:
case ASCII_d: case ASCII_e: case ASCII_f:
result <<= 4;
result += 10 + (c - ASCII_a);
break;
}
if (result >= 0x110000)
return -1;
return -1;
}
}
else {
......@@ -1546,23 +1555,24 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
result *= 10;
result += (c - ASCII_0);
if (result >= 0x110000)
return -1;
return -1;
}
}
return checkCharRefNumber(result);
}
static
int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
static int PTRCALL
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
const char *end)
{
switch ((end - ptr)/MINBPC(enc)) {
case 2:
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
switch (BYTE_TO_ASCII(enc, ptr)) {
case ASCII_l:
return ASCII_LT;
return ASCII_LT;
case ASCII_g:
return ASCII_GT;
return ASCII_GT;
}
}
break;
......@@ -1570,9 +1580,9 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_p))
return ASCII_AMP;
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_p))
return ASCII_AMP;
}
}
break;
......@@ -1581,23 +1591,23 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
case ASCII_q:
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_t))
return ASCII_QUOT;
}
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_t))
return ASCII_QUOT;
}
}
break;
case ASCII_a:
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_s))
return ASCII_APOS;
}
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
ptr += MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_s))
return ASCII_APOS;
}
}
break;
}
......@@ -1605,20 +1615,20 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
return 0;
}
static
int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
static int PTRCALL
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{
for (;;) {
switch (BYTE_TYPE(enc, ptr1)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (*ptr1++ != *ptr2++) \
return 0;
return 0;
LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
#undef LEAD_CASE
/* fall through */
if (*ptr1++ != *ptr2++)
return 0;
return 0;
break;
case BT_NONASCII:
case BT_NMSTRT:
......@@ -1630,23 +1640,23 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
case BT_NAME:
case BT_MINUS:
if (*ptr2++ != *ptr1++)
return 0;
return 0;
if (MINBPC(enc) > 1) {
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 2) {
if (*ptr2++ != *ptr1++)
return 0;
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 2) {
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 3) {
if (*ptr2++ != *ptr1++)
return 0;
}
}
if (*ptr2++ != *ptr1++)
return 0;
}
}
}
break;
default:
if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
return 1;
return 1;
switch (BYTE_TYPE(enc, ptr2)) {
case BT_LEAD2:
case BT_LEAD3:
......@@ -1660,18 +1670,18 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
return 0;
return 0;
default:
return 1;
return 1;
}
}
}
/* not reached */
}
static
int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
const char *end1, const char *ptr2)
static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
const char *end1, const char *ptr2)
{
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (ptr1 == end1)
......@@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
return ptr1 == end1;
}
static
int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
static int PTRFASTCALL
PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
{
const char *start = ptr;
for (;;) {
......@@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
}
}
static
const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
static const char * PTRFASTCALL
PREFIX(skipS)(const ENCODING *enc, const char *ptr)
{
for (;;) {
switch (BYTE_TYPE(enc, ptr)) {
......@@ -1725,11 +1735,11 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
}
}
static
void PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr,
const char *end,
POSITION *pos)
static void PTRCALL
PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr,
const char *end,
POSITION *pos)
{
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
......@@ -1748,7 +1758,7 @@ void PREFIX(updatePosition)(const ENCODING *enc,
pos->lineNumber++;
ptr += MINBPC(enc);
if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
ptr += MINBPC(enc);
pos->columnNumber = (unsigned)-1;
break;
default:
......@@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc,
#undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES
const ENCODING *NS(XmlGetUtf8InternalEncoding)(void)
const ENCODING *
NS(XmlGetUtf8InternalEncoding)(void)
{
return &ns(internal_utf8_encoding).enc;
}
const ENCODING *NS(XmlGetUtf16InternalEncoding)(void)
const ENCODING *
NS(XmlGetUtf16InternalEncoding)(void)
{
#if XML_BYTE_ORDER == 12
#if BYTEORDER == 1234
return &ns(internal_little2_encoding).enc;
#elif XML_BYTE_ORDER == 21
#elif BYTEORDER == 4321
return &ns(internal_big2_encoding).enc;
#else
const short n = 1;
return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc;
return (*(const char *)&n
? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
#endif
}
static
const ENCODING *NS(encodings)[] = {
static const ENCODING *NS(encodings)[] = {
&ns(latin1_encoding).enc,
&ns(ascii_encoding).enc,
&ns(utf8_encoding).enc,
......@@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = {
&ns(utf8_encoding).enc /* NO_ENC */
};
static
int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
static
int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
static int PTRCALL
NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
int
NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
const char *name)
{
int i = getEncodingIndex(name);
if (i == UNKNOWN_ENC)
......@@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n
return 1;
}
static
const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
static const ENCODING *
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
{
#define ENCODING_MAX 128
char buf[ENCODING_MAX];
......@@ -73,26 +80,27 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha
return NS(encodings)[i];
}
int NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
int
NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr,
const char **versionPtr,
const char **versionEndPtr,
const char **encodingName,
const ENCODING **encoding,
int *standalone)
{
return doParseXmlDecl(NS(findEncoding),
isGeneralTextEntity,
enc,
ptr,
end,
badPtr,
versionPtr,
versionEndPtr,
encodingName,
encoding,
standalone);
isGeneralTextEntity,
enc,
ptr,
end,
badPtr,
versionPtr,
versionEndPtr,
encodingName,
encoding,
standalone);
}
......@@ -690,9 +690,9 @@ class PyBuildExt(build_ext):
# More information on Expat can be found at www.libexpat.org.
#
if sys.byteorder == "little":
xmlbo = "12"
xmlbo = "1234"
else:
xmlbo = "21"
xmlbo = "4321"
expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')
exts.append(Extension('pyexpat',
sources = [
......@@ -702,10 +702,9 @@ class PyBuildExt(build_ext):
'expat/xmltok.c',
],
define_macros = [
('HAVE_EXPAT_H',None),
('XML_NS', '1'),
('XML_DTD', '1'),
('XML_BYTE_ORDER', xmlbo),
('BYTEORDER', xmlbo),
('XML_CONTEXT_BYTES','1024'),
],
include_dirs = [expatinc]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment