Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
fc03a94a
Commit
fc03a94a
authored
Jan 25, 2003
by
Martin v. Löwis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Incorporate Expat 1.95.6.
parent
5a772d32
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
5455 additions
and
3953 deletions
+5455
-3953
Modules/expat/ascii.h
Modules/expat/ascii.h
+3
-4
Modules/expat/expat.h
Modules/expat/expat.h
+527
-340
Modules/expat/internal.h
Modules/expat/internal.h
+73
-0
Modules/expat/xmlparse.c
Modules/expat/xmlparse.c
+3236
-2154
Modules/expat/xmlrole.c
Modules/expat/xmlrole.c
+523
-476
Modules/expat/xmlrole.h
Modules/expat/xmlrole.h
+22
-8
Modules/expat/xmltok.c
Modules/expat/xmltok.c
+403
-337
Modules/expat/xmltok.h
Modules/expat/xmltok.h
+138
-122
Modules/expat/xmltok_impl.c
Modules/expat/xmltok_impl.c
+481
-470
Modules/expat/xmltok_ns.c
Modules/expat/xmltok_ns.c
+46
-38
setup.py
setup.py
+3
-4
No files found.
Modules/expat/ascii.h
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#define ASCII_A 0x41
...
...
@@ -69,7 +68,7 @@ See the file COPYING for copying permission.
#define ASCII_9 0x39
#define ASCII_TAB 0x09
#define ASCII_SPACE 0x20
#define ASCII_SPACE 0x20
#define ASCII_EXCL 0x21
#define ASCII_QUOT 0x22
#define ASCII_AMP 0x26
...
...
Modules/expat/expat.h
View file @
fc03a94a
/*
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlParse_INCLUDED
#define XmlParse_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
#endif
#include <stdlib.h>
#ifndef XMLPARSEAPI
# if defined(__declspec) && !defined(__BEOS__) && !defined(__CYGWIN__)
# define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
# else
# define XMLPARSEAPI(type) type
# endif
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
#ifdef XML_STATIC
#define XMLPARSEAPI(type) type __cdecl
#else
#define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
#endif
#else
#define XMLPARSEAPI(type) type
#endif
#endif
/* not defined XMLPARSEAPI */
#ifdef __cplusplus
extern
"C"
{
#endif
typedef
void
*
XML_Parser
;
#ifdef XML_UNICODE_WCHAR_T
#define XML_UNICODE
#endif
struct
XML_ParserStruct
;
typedef
struct
XML_ParserStruct
*
XML_Parser
;
/* Information is UTF-8 encoded. */
#ifdef XML_UNICODE
/* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T
typedef
wchar_t
XML_Char
;
typedef
wchar_t
XML_LChar
;
#else
typedef
unsigned
short
XML_Char
;
typedef
char
XML_LChar
;
#endif
/* XML_UNICODE_WCHAR_T */
#else
/* Information is UTF-8 encoded. */
typedef
char
XML_Char
;
typedef
char
XML_LChar
;
#endif
/* XML_UNICODE */
/* Should this be defined using stdbool.h when C99 is available? */
typedef
unsigned
char
XML_Bool
;
#define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0)
enum
XML_Error
{
XML_ERROR_NONE
,
XML_ERROR_NO_MEMORY
,
XML_ERROR_SYNTAX
,
XML_ERROR_NO_ELEMENTS
,
XML_ERROR_INVALID_TOKEN
,
XML_ERROR_UNCLOSED_TOKEN
,
XML_ERROR_PARTIAL_CHAR
,
XML_ERROR_TAG_MISMATCH
,
XML_ERROR_DUPLICATE_ATTRIBUTE
,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT
,
XML_ERROR_PARAM_ENTITY_REF
,
XML_ERROR_UNDEFINED_ENTITY
,
XML_ERROR_RECURSIVE_ENTITY_REF
,
XML_ERROR_ASYNC_ENTITY
,
XML_ERROR_BAD_CHAR_REF
,
XML_ERROR_BINARY_ENTITY_REF
,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
,
XML_ERROR_MISPLACED_XML_PI
,
XML_ERROR_UNKNOWN_ENCODING
,
XML_ERROR_INCORRECT_ENCODING
,
XML_ERROR_UNCLOSED_CDATA_SECTION
,
XML_ERROR_EXTERNAL_ENTITY_HANDLING
,
XML_ERROR_NOT_STANDALONE
,
XML_ERROR_UNEXPECTED_STATE
,
XML_ERROR_ENTITY_DECLARED_IN_PE
,
XML_ERROR_FEATURE_REQUIRES_XML_DTD
,
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
};
enum
XML_Content_Type
{
XML_CTYPE_EMPTY
=
1
,
...
...
@@ -63,11 +124,11 @@ enum XML_Content_Quant {
typedef
struct
XML_cp
XML_Content
;
struct
XML_cp
{
enum
XML_Content_Type
type
;
enum
XML_Content_Quant
quant
;
XML_Char
*
name
;
unsigned
int
numchildren
;
XML_Content
*
children
;
enum
XML_Content_Type
type
;
enum
XML_Content_Quant
quant
;
XML_Char
*
name
;
unsigned
int
numchildren
;
XML_Content
*
children
;
};
...
...
@@ -75,53 +136,49 @@ struct XML_cp {
description of the model argument. It's the caller's responsibility
to free model when finished with it.
*/
typedef
void
(
*
XML_ElementDeclHandler
)
(
void
*
userData
,
const
XML_Char
*
name
,
XML_Content
*
model
);
XMLPARSEAPI
(
void
)
XML_SetElementDeclHandler
(
XML_Parser
parser
,
XML_ElementDeclHandler
eldecl
);
/*
The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword.
The "isrequired" parameter will be true and the default value will
be NULL in the case of "#REQUIRED". If "isrequired" is true and
default is non-NULL, then this is a "#FIXED" default.
*/
typedef
void
(
*
XML_AttlistDeclHandler
)
(
void
*
userData
,
XML_ElementDeclHandler
eldecl
);
/* The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED"
keyword. The "isrequired" parameter will be true and the default
value will be NULL in the case of "#REQUIRED". If "isrequired" is
true and default is non-NULL, then this is a "#FIXED" default.
*/
typedef
void
(
*
XML_AttlistDeclHandler
)
(
void
*
userData
,
const
XML_Char
*
elname
,
const
XML_Char
*
attname
,
const
XML_Char
*
att_type
,
const
XML_Char
*
dflt
,
int
isrequired
);
int
isrequired
);
XMLPARSEAPI
(
void
)
XML_SetAttlistDeclHandler
(
XML_Parser
parser
,
XML_AttlistDeclHandler
attdecl
);
/* The XML declaration handler is called for *both* XML declarations and
text declarations. The way to distinguish is that the version parameter
will be null for text declarations. The encoding parameter may be null
for XML declarations. The standalone parameter will be -1, 0, or 1
indicating respectively that there was no standalone parameter in
the declaration, that it was given as no, or that it was given as yes.
*/
typedef
void
(
*
XML_XmlDeclHandler
)
(
void
*
userData
,
const
XML_Char
*
version
,
const
XML_Char
*
encoding
,
int
standalone
);
XML_AttlistDeclHandler
attdecl
);
/* The XML declaration handler is called for *both* XML declarations
and text declarations. The way to distinguish is that the version
parameter will be NULL for text declarations. The encoding
parameter may be NULL for XML declarations. The standalone
parameter will be -1, 0, or 1 indicating respectively that there
was no standalone parameter in the declaration, that it was given
as no, or that it was given as yes.
*/
typedef
void
(
*
XML_XmlDeclHandler
)
(
void
*
userData
,
const
XML_Char
*
version
,
const
XML_Char
*
encoding
,
int
standalone
);
XMLPARSEAPI
(
void
)
XML_SetXmlDeclHandler
(
XML_Parser
parser
,
XML_XmlDeclHandler
xmldecl
);
XML_XmlDeclHandler
xmldecl
);
typedef
struct
{
...
...
@@ -131,26 +188,27 @@ typedef struct {
}
XML_Memory_Handling_Suite
;
/* Constructs a new parser; encoding is the encoding specified by the
external protocol or null if there is none specified. */
external protocol or NULL if there is none specified.
*/
XMLPARSEAPI
(
XML_Parser
)
XML_ParserCreate
(
const
XML_Char
*
encoding
);
/* Constructs a new parser and namespace processor. Element type
names and attribute names that belong to a namespace will be expanded;
unprefixed attribute names are never expanded; unprefixed element type
names are expanded only if there is a default namespace. The expanded
name is the concatenation of the namespace URI, the namespace
separator character, and the local part of the name. If the namespace
separator is '\0' then the namespace URI and the local part will be
concatenated without any separator. When a namespace is not declared,
the name and prefix will be passed through without expansion. */
names and attribute names that belong to a namespace will be
expanded; unprefixed attribute names are never expanded; unprefixed
element type names are expanded only if there is a default
namespace. The expanded name is the concatenation of the namespace
URI, the namespace separator character, and the local part of the
name. If the namespace separator is '\0' then the namespace URI
and the local part will be concatenated without any separator.
When a namespace is not declared, the name and prefix will be
passed through without expansion.
*/
XMLPARSEAPI
(
XML_Parser
)
XML_ParserCreateNS
(
const
XML_Char
*
encoding
,
XML_Char
namespaceSeparator
);
/* Constructs a new parser using the memory management suit referred to
/* Constructs a new parser using the memory management suit
e
referred to
by memsuite. If memsuite is NULL, then use the standard library memory
suite. If namespaceSeparator is non-NULL it creates a parser with
namespace processing as described above. The character pointed at
...
...
@@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
All further memory operations used for the created parser will come from
the given suite.
*/
XMLPARSEAPI
(
XML_Parser
)
XML_ParserCreate_MM
(
const
XML_Char
*
encoding
,
const
XML_Memory_Handling_Suite
*
memsuite
,
const
XML_Char
*
namespaceSeparator
);
const
XML_Memory_Handling_Suite
*
memsuite
,
const
XML_Char
*
namespaceSeparator
);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated. */
/* Prepare a parser object to be re-used. This is particularly
valuable when memory allocation overhead is disproportionatly high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
unknownEncodingHandler. The parser's external state is re-initialized
except for the values of ns and ns_triplets.
Added in Expat 1.95.3.
*/
XMLPARSEAPI
(
XML_Bool
)
XML_ParserReset
(
XML_Parser
parser
,
const
XML_Char
*
encoding
);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated.
*/
typedef
void
(
*
XML_StartElementHandler
)(
void
*
userData
,
const
XML_Char
*
name
,
const
XML_Char
**
atts
);
...
...
@@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data);
typedef
void
(
*
XML_StartCdataSectionHandler
)(
void
*
userData
);
typedef
void
(
*
XML_EndCdataSectionHandler
)(
void
*
userData
);
/* This is called for any characters in the XML document for
which there is no applicable handler. This includes both
characters that are part of markup which is of a kind that is
not reported (comments, markup declarations), or characters
that are part of a construct which could be reported but
for which no handler has been supplied. The characters are passed
exactly as they were in the XML document except that
they will be encoded in UTF-8. Line boundaries are not normalized.
Note that a byte order mark character is not passed to the default handler.
There are no guarantees about how characters are divided between calls
to the
default handler: for example, a comment might be split between
multiple calls. */
/* This is called for any characters in the XML document for
which
there is no applicable handler. This includes both characters that
are part of markup which is of a kind that is not reported
(comments, markup declarations), or characters that are part of a
construct which could be reported but for which no handler has been
supplied. The characters are passed exactly as they were in the XML
document except that they will be encoded in UTF-8 or UTF-16.
Line boundaries are not normalized. Note that a byte order mark
character is not passed to the default handler. There are no
guarantees about how characters are divided between calls to the
default handler: for example, a comment might be split between
multiple calls.
*/
typedef
void
(
*
XML_DefaultHandler
)(
void
*
userData
,
const
XML_Char
*
s
,
int
len
);
/* This is called for the start of the DOCTYPE declaration, before
any DTD or internal subset is parsed.
*/
any DTD or internal subset is parsed.
*/
typedef
void
(
*
XML_StartDoctypeDeclHandler
)(
void
*
userData
,
const
XML_Char
*
doctypeName
,
const
XML_Char
*
sysid
,
...
...
@@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
int
has_internal_subset
);
/* This is called for the start of the DOCTYPE declaration when the
closing > is encountered, but after processing any external subset. */
closing > is encountered, but after processing any external
subset.
*/
typedef
void
(
*
XML_EndDoctypeDeclHandler
)(
void
*
userData
);
/* This is called for entity declarations. The is_parameter_entity
...
...
@@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
otherwise.
For internal entities (<!ENTITY foo "bar">), value will
be non-
null and systemId, publicID, and notationName will be null
.
The value string is NOT nul
l
terminated; the length is provided in
be non-
NULL and systemId, publicID, and notationName will be NULL
.
The value string is NOT nul
-
terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities.
For external entities, value will be null and systemId will be non-null.
The publicId argument will be null unless a public identifier was
provided. The notationName argument will have a non-null value only
for unparsed entity declarations.
*/
For external entities, value will be NULL and systemId will be
non-NULL. The publicId argument will be NULL unless a public
identifier was provided. The notationName argument will have a
non-NULL value only for unparsed entity declarations.
Note that is_parameter_entity can't be changed to XML_Bool, since
that would break binary compatibility.
*/
typedef
void
(
*
XML_EntityDeclHandler
)
(
void
*
userData
,
const
XML_Char
*
entityName
,
int
is_parameter_entity
,
...
...
@@ -247,19 +320,20 @@ typedef void (*XML_EntityDeclHandler) (void *userData,
const
XML_Char
*
systemId
,
const
XML_Char
*
publicId
,
const
XML_Char
*
notationName
);
XMLPARSEAPI
(
void
)
XML_SetEntityDeclHandler
(
XML_Parser
parser
,
XML_EntityDeclHandler
handler
);
XML_EntityDeclHandler
handler
);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above.
It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA)
entity. The base argument is whatever was set by XML_SetBase.
The entityName, systemId and notationName arguments will never be null.
The other arguments may be. */
This is called for a declaration of an unparsed (NDATA) entity.
The base argument is whatever was set by XML_SetBase. The
entityName, systemId and notationName arguments will never be
NULL. The other arguments may be.
*/
typedef
void
(
*
XML_UnparsedEntityDeclHandler
)(
void
*
userData
,
const
XML_Char
*
entityName
,
const
XML_Char
*
base
,
...
...
@@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const
XML_Char
*
publicId
,
const
XML_Char
*
notationName
);
/* This is called for a declaration of notation.
The base argument is whatever was set by XML_SetBase.
The notationName will never be null. The other arguments can be. */
/* This is called for a declaration of notation.
The base argument is
whatever was set by XML_SetBase. The notationName will never be
NULL. The other arguments can be.
*/
typedef
void
(
*
XML_NotationDeclHandler
)(
void
*
userData
,
const
XML_Char
*
notationName
,
const
XML_Char
*
base
,
...
...
@@ -278,11 +352,11 @@ typedef void (*XML_NotationDeclHandler)(void *userData,
const
XML_Char
*
publicId
);
/* When namespace processing is enabled, these are called once for
each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be null.
For an xmlns="" attribute, uri will be null. */
each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be
NULL. For an xmlns="" attribute, uri will be NULL.
*/
typedef
void
(
*
XML_StartNamespaceDeclHandler
)(
void
*
userData
,
const
XML_Char
*
prefix
,
const
XML_Char
*
uri
);
...
...
@@ -290,87 +364,123 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
typedef
void
(
*
XML_EndNamespaceDeclHandler
)(
void
*
userData
,
const
XML_Char
*
prefix
);
/* This is called if the document is not standalone (it has an
external subset or a reference to a parameter entity, but does not
have standalone="yes"). If this handler returns 0, then processing
will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error. */
/* This is called if the document is not standalone, that is, it has an
external subset or a reference to a parameter entity, but does not
have standalone="yes". If this handler returns XML_STATUS_ERROR,
then processing will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error.
If parameter entity parsing is enabled, then in addition to the
conditions above this handler will only be called if the referenced
entity was actually read.
*/
typedef
int
(
*
XML_NotStandaloneHandler
)(
void
*
userData
);
/* This is called for a reference to an external parsed general entity.
The referenced entity is not automatically parsed.
The application can parse it immediately or later using
XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the reference;
it can be passed as the parser argument to XML_ExternalEntityParserCreate.
The systemId argument is the system identifier as specified in the entity
declaration; it will not be null.
The base argument is the system identifier that should be used as the base for
resolving systemId if systemId was relative; this is set by XML_SetBase;
it may be null.
The publicId argument is the public identifier as specified in the entity
declaration, or null if none was specified; the whitespace in the public
identifier will have been normalized as required by the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to
XML_ExternalEntityParserCreate; context is valid only until the handler
returns, so if the referenced entity is to be parsed later, it must be copied.
The handler should return 0 if processing should not continue because of
a fatal error in the handling of the external entity.
In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser, not
userData. */
/* This is called for a reference to an external parsed general
entity. The referenced entity is not automatically parsed. The
application can parse it immediately or later using
XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the
reference; it can be passed as the parser argument to
XML_ExternalEntityParserCreate. The systemId argument is the
system identifier as specified in the entity declaration; it will
not be NULL.
The base argument is the system identifier that should be used as
the base for resolving systemId if systemId was relative; this is
set by XML_SetBase; it may be NULL.
The publicId argument is the public identifier as specified in the
entity declaration, or NULL if none was specified; the whitespace
in the public identifier will have been normalized as required by
the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to XML_ExternalEntityParserCreate;
context is valid only until the handler returns, so if the
referenced entity is to be parsed later, it must be copied.
context is NULL only when the entity is a parameter entity.
The handler should return XML_STATUS_ERROR if processing should not
continue because of a fatal error in the handling of the external
entity. In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser,
not userData.
*/
typedef
int
(
*
XML_ExternalEntityRefHandler
)(
XML_Parser
parser
,
const
XML_Char
*
context
,
const
XML_Char
*
base
,
const
XML_Char
*
systemId
,
const
XML_Char
*
publicId
);
/* This structure is filled in by the XML_UnknownEncodingHandler
to provide information to the parser about encodings that are unknown
to the parser.
The map[b] member gives information about byte sequences
whose first byte is b.
If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar
value c.
If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
sequence that encodes a single Unicode scalar value.
The data member will be passed as the first argument to the convert function.
The convert function is used to convert multibyte sequences;
s will point to a n-byte sequence where map[(unsigned char)*s] == -n.
The convert function must return the Unicode scalar value
represented by this byte sequence or -1 if the byte sequence is malformed.
The convert function may be null if the encoding is a single-byte encoding,
that is if map[b] >= -1 for all bytes b.
When the parser is finished with the encoding, then if release is not null,
it will call release passing it the data member;
once release has been called, the convert function will not be called again.
Expat places certain restrictions on the encodings that are supported
using this mechanism.
1. Every ASCII character that can appear in a well-formed XML document,
other than the characters
$@\^`{}~
must be represented by a single byte, and that byte must be the
same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e.,
characters that would be encoded by surrogates in UTF-16 are not
allowed). Note that this restriction doesn't apply to the built-in
support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct sequence
of bytes. */
/* This is called in two situations:
1) An entity reference is encountered for which no declaration
has been read *and* this is not an error.
2) An internal entity reference is read, but not expanded, because
XML_SetDefaultHandler has been called.
Note: skipped parameter entities in declarations and skipped general
entities in attribute values cannot be reported, because
the event would be out of sync with the reporting of the
declarations or attribute values
*/
typedef
void
(
*
XML_SkippedEntityHandler
)(
void
*
userData
,
const
XML_Char
*
entityName
,
int
is_parameter_entity
);
/* This structure is filled in by the XML_UnknownEncodingHandler to
provide information to the parser about encodings that are unknown
to the parser.
The map[b] member gives information about byte sequences whose
first byte is b.
If map[b] is c where c is >= 0, then b by itself encodes the
Unicode scalar value c.
If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an
n-byte sequence that encodes a single Unicode scalar value.
The data member will be passed as the first argument to the convert
function.
The convert function is used to convert multibyte sequences; s will
point to a n-byte sequence where map[(unsigned char)*s] == -n. The
convert function must return the Unicode scalar value represented
by this byte sequence or -1 if the byte sequence is malformed.
The convert function may be NULL if the encoding is a single-byte
encoding, that is if map[b] >= -1 for all bytes b.
When the parser is finished with the encoding, then if release is
not NULL, it will call release passing it the data member; once
release has been called, the convert function will not be called
again.
Expat places certain restrictions on the encodings that are supported
using this mechanism.
1. Every ASCII character that can appear in a well-formed XML document,
other than the characters
$@\^`{}~
must be represented by a single byte, and that byte must be the
same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <=
0xFFFF, (i.e., characters that would be encoded by surrogates in
UTF-16 are not allowed). Note that this restriction doesn't
apply to the built-in support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct
sequence of bytes.
*/
typedef
struct
{
int
map
[
256
];
void
*
data
;
...
...
@@ -379,24 +489,28 @@ typedef struct {
}
XML_Encoding
;
/* This is called for an encoding that is unknown to the parser.
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding,
it must fill in the XML_Encoding structure, and return 1.
Otherwise it must return 0.
If info does not describe a suitable encoding,
then the parser will return an XML_UNKNOWN_ENCODING error. */
The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in
the encoding declaration.
If the callback can provide information about the encoding, it must
fill in the XML_Encoding structure, and return XML_STATUS_OK.
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
return an XML_UNKNOWN_ENCODING error.
*/
typedef
int
(
*
XML_UnknownEncodingHandler
)(
void
*
encodingHandlerData
,
const
XML_Char
*
name
,
XML_Encoding
*
info
);
XMLPARSEAPI
(
void
)
XML_SetElementHandler
(
XML_Parser
parser
,
XML_StartElementHandler
start
,
XML_EndElementHandler
end
);
XML_StartElementHandler
start
,
XML_EndElementHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetStartElementHandler
(
XML_Parser
,
XML_StartElementHandler
);
...
...
@@ -406,19 +520,19 @@ XML_SetEndElementHandler(XML_Parser, XML_EndElementHandler);
XMLPARSEAPI
(
void
)
XML_SetCharacterDataHandler
(
XML_Parser
parser
,
XML_CharacterDataHandler
handler
);
XML_CharacterDataHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetProcessingInstructionHandler
(
XML_Parser
parser
,
XML_ProcessingInstructionHandler
handler
);
XML_ProcessingInstructionHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetCommentHandler
(
XML_Parser
parser
,
XML_CommentHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetCdataSectionHandler
(
XML_Parser
parser
,
XML_StartCdataSectionHandler
start
,
XML_EndCdataSectionHandler
end
);
XML_StartCdataSectionHandler
start
,
XML_EndCdataSectionHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetStartCdataSectionHandler
(
XML_Parser
parser
,
...
...
@@ -429,89 +543,98 @@ XML_SetEndCdataSectionHandler(XML_Parser parser,
XML_EndCdataSectionHandler
end
);
/* This sets the default handler and also inhibits expansion of
internal entities. The entity reference will be passed to the default
handler. */
internal entities. These entity references will be passed to the
default handler, or to the skipped entity handler, if one is set.
*/
XMLPARSEAPI
(
void
)
XML_SetDefaultHandler
(
XML_Parser
parser
,
XML_DefaultHandler
handler
);
XML_DefaultHandler
handler
);
/* This sets the default handler but does not inhibit expansion of
internal entities. The entity reference will not be passed to the
default handler. */
internal entities. The entity reference will not be passed to the
default handler.
*/
XMLPARSEAPI
(
void
)
XML_SetDefaultHandlerExpand
(
XML_Parser
parser
,
XML_DefaultHandler
handler
);
XML_DefaultHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetDoctypeDeclHandler
(
XML_Parser
parser
,
XML_StartDoctypeDeclHandler
start
,
XML_EndDoctypeDeclHandler
end
);
XML_StartDoctypeDeclHandler
start
,
XML_EndDoctypeDeclHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetStartDoctypeDeclHandler
(
XML_Parser
parser
,
XML_StartDoctypeDeclHandler
start
);
XML_StartDoctypeDeclHandler
start
);
XMLPARSEAPI
(
void
)
XML_SetEndDoctypeDeclHandler
(
XML_Parser
parser
,
XML_EndDoctypeDeclHandler
end
);
XML_EndDoctypeDeclHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetUnparsedEntityDeclHandler
(
XML_Parser
parser
,
XML_UnparsedEntityDeclHandler
handler
);
XML_UnparsedEntityDeclHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetNotationDeclHandler
(
XML_Parser
parser
,
XML_NotationDeclHandler
handler
);
XML_NotationDeclHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetNamespaceDeclHandler
(
XML_Parser
parser
,
XML_StartNamespaceDeclHandler
start
,
XML_EndNamespaceDeclHandler
end
);
XML_StartNamespaceDeclHandler
start
,
XML_EndNamespaceDeclHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetStartNamespaceDeclHandler
(
XML_Parser
parser
,
XML_StartNamespaceDeclHandler
start
);
XML_StartNamespaceDeclHandler
start
);
XMLPARSEAPI
(
void
)
XML_SetEndNamespaceDeclHandler
(
XML_Parser
parser
,
XML_EndNamespaceDeclHandler
end
);
XML_EndNamespaceDeclHandler
end
);
XMLPARSEAPI
(
void
)
XML_SetNotStandaloneHandler
(
XML_Parser
parser
,
XML_NotStandaloneHandler
handler
);
XML_NotStandaloneHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetExternalEntityRefHandler
(
XML_Parser
parser
,
XML_ExternalEntityRefHandler
handler
);
XML_ExternalEntityRefHandler
handler
);
/* If a non-null value for arg is specified here, then it will be passed
as the first argument to the external entity ref handler instead
of the parser object. */
/* If a non-NULL value for arg is specified here, then it will be
passed as the first argument to the external entity ref handler
instead of the parser object.
*/
XMLPARSEAPI
(
void
)
XML_SetExternalEntityRefHandlerArg
(
XML_Parser
,
void
*
arg
);
XMLPARSEAPI
(
void
)
XML_SetSkippedEntityHandler
(
XML_Parser
parser
,
XML_SkippedEntityHandler
handler
);
XMLPARSEAPI
(
void
)
XML_SetUnknownEncodingHandler
(
XML_Parser
parser
,
XML_UnknownEncodingHandler
handler
,
void
*
encodingHandlerData
);
XML_UnknownEncodingHandler
handler
,
void
*
encodingHandlerData
);
/* This can be called within a handler for a start element, end element,
processing instruction or character data. It causes the corresponding
markup to be passed to the default handler. */
/* This can be called within a handler for a start element, end
element, processing instruction or character data. It causes the
corresponding markup to be passed to the default handler.
*/
XMLPARSEAPI
(
void
)
XML_DefaultCurrent
(
XML_Parser
parser
);
/* If do_nst is non-zero, and namespace processing is in effect, and
a name has a prefix (i.e. an explicit namespace qualifier) then
that name is returned as a triplet in a single
string separated by the separator character specified when the parser
was created: URI
+ sep + local_name + sep + prefix.
that name is returned as a triplet in a single
string separated by
the separator character specified when the parser was created: URI
+ sep + local_name + sep + prefix.
If do_nst is zero, then namespace information is returned in the
default manner (URI + sep + local_name) whether or not the name
s
default manner (URI + sep + local_name) whether or not the name
has a prefix.
Note: Calling XML_SetReturnNSTriplet after XML_Parse or
XML_ParseBuffer has no effect.
*/
XMLPARSEAPI
(
void
)
...
...
@@ -521,84 +644,130 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst);
XMLPARSEAPI
(
void
)
XML_SetUserData
(
XML_Parser
parser
,
void
*
userData
);
/* Returns the last value set by XML_SetUserData or
null
. */
/* Returns the last value set by XML_SetUserData or
NULL
. */
#define XML_GetUserData(parser) (*(void **)(parser))
/* This is equivalent to supplying an encoding argument
to XML_ParserCreate. It must not be called after XML_Parse
or XML_ParseBuffer. */
XMLPARSEAPI
(
int
)
/* This is equivalent to supplying an encoding argument to
XML_ParserCreate. On success XML_SetEncoding returns non-zero,
zero otherwise.
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
has no effect and returns XML_STATUS_ERROR.
*/
XMLPARSEAPI
(
enum
XML_Status
)
XML_SetEncoding
(
XML_Parser
parser
,
const
XML_Char
*
encoding
);
/* If this function is called, then the parser will be passed
as the first argument to callbacks instead of userData.
The userData will still be accessible using XML_GetUserData. */
/* If this function is called, then the parser will be passed
as the
first argument to callbacks instead of userData. The userData will
still be accessible using XML_GetUserData.
*/
XMLPARSEAPI
(
void
)
XML_UseParserAsHandlerArg
(
XML_Parser
parser
);
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is left
to the application: this value will be passed through as the base
argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler
and XML_UnparsedEntityDeclHandler. The base argument will be copied.
Returns zero if out of memory, non-zero otherwise. */
/* If useDTD == XML_TRUE is passed to this function, then the parser
will assume that there is an external subset, even if none is
specified in the document. In such a case the parser will call the
externalEntityRefHandler with a value of NULL for the systemId
argument (the publicId and context arguments will be NULL as well).
Note: If this function is called, then this must be done before
the first call to XML_Parse or XML_ParseBuffer, since it will
have no effect after that. Returns
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING.
Note: If the document does not have a DOCTYPE declaration at all,
then startDoctypeDeclHandler and endDoctypeDeclHandler will not
be called, despite an external subset being parsed.
Note: If XML_DTD is not defined when Expat is compiled, returns
XML_ERROR_FEATURE_REQUIRES_XML_DTD.
*/
XMLPARSEAPI
(
enum
XML_Error
)
XML_UseForeignDTD
(
XML_Parser
parser
,
XML_Bool
useDTD
);
XMLPARSEAPI
(
int
)
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is
left to the application: this value will be passed through as the
base argument to the XML_ExternalEntityRefHandler,
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base
argument will be copied. Returns XML_STATUS_ERROR if out of memory,
XML_STATUS_OK otherwise.
*/
XMLPARSEAPI
(
enum
XML_Status
)
XML_SetBase
(
XML_Parser
parser
,
const
XML_Char
*
base
);
XMLPARSEAPI
(
const
XML_Char
*
)
XML_GetBase
(
XML_Parser
parser
);
/* Returns the number of the attribute/value pairs passed in last call
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the
XML_StartElementHandler. */
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the
XML_StartElementHandler.
*/
XMLPARSEAPI
(
int
)
XML_GetSpecifiedAttributeCount
(
XML_Parser
parser
);
/* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an index
into the atts array passed to the XML_StartElementHandler. */
XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an
index into the atts array passed to the XML_StartElementHandler.
*/
XMLPARSEAPI
(
int
)
XML_GetIdAttributeIndex
(
XML_Parser
parser
);
/* Parses some input. Returns 0 if a fatal error is detected.
The last call to XML_Parse must have isFinal true;
len may be zero for this call (or any other). */
XMLPARSEAPI
(
int
)
/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
detected. The last call to XML_Parse must have isFinal true; len
may be zero for this call (or any other).
The XML_Status enum gives the possible return values for the
XML_Parse and XML_ParseBuffer functions. Though the return values
for these functions has always been described as a Boolean value,
the implementation, at least for the 1.95.x series, has always
returned exactly one of these values. The preprocessor #defines
are included so this stanza can be added to code that still needs
to support older versions of Expat 1.95.x:
#ifndef XML_STATUS_OK
#define XML_STATUS_OK 1
#define XML_STATUS_ERROR 0
#endif
Otherwise, the #define hackery is quite ugly and would have been dropped.
*/
enum
XML_Status
{
XML_STATUS_ERROR
=
0
,
#define XML_STATUS_ERROR XML_STATUS_ERROR
XML_STATUS_OK
=
1
#define XML_STATUS_OK XML_STATUS_OK
};
XMLPARSEAPI
(
enum
XML_Status
)
XML_Parse
(
XML_Parser
parser
,
const
char
*
s
,
int
len
,
int
isFinal
);
XMLPARSEAPI
(
void
*
)
XML_GetBuffer
(
XML_Parser
parser
,
int
len
);
XMLPARSEAPI
(
int
)
XMLPARSEAPI
(
enum
XML_Status
)
XML_ParseBuffer
(
XML_Parser
parser
,
int
len
,
int
isFinal
);
/* Creates an XML_Parser object that can parse an external general
entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of the
externally specified encoding, or null if there is no externally
specified encoding. The context string consists of a sequence of
tokens separated by formfeeds (\f); a token consisting of a name
specifies that the general entity of the name is open; a token of the
form prefix=uri specifies the namespace for a particular prefix; a
token of the form =uri specifies the default namespace. This can be
called at any point after the first call to an
ExternalEntityRefHandler so longer as the parser has not yet been
freed. The new parser is completely independent and may safely be
used in a separate thread. The handlers and userData are initialized
from the parser argument. Returns 0 if out of memory. Otherwise
returns a new XML_Parser object. */
entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of
the externally specified encoding, or NULL if there is no
externally specified encoding. The context string consists of a
sequence of tokens separated by formfeeds (\f); a token consisting
of a name specifies that the general entity of the name is open; a
token of the form prefix=uri specifies the namespace for a
particular prefix; a token of the form =uri specifies the default
namespace. This can be called at any point after the first call to
an ExternalEntityRefHandler so longer as the parser has not yet
been freed. The new parser is completely independent and may
safely be used in a separate thread. The handlers and userData are
initialized from the parser argument. Returns NULL if out of memory.
Otherwise returns a new XML_Parser object.
*/
XMLPARSEAPI
(
XML_Parser
)
XML_ExternalEntityParserCreate
(
XML_Parser
parser
,
const
XML_Char
*
context
,
const
XML_Char
*
encoding
);
const
XML_Char
*
context
,
const
XML_Char
*
encoding
);
enum
XML_ParamEntityParsing
{
XML_PARAM_ENTITY_PARSING_NEVER
,
...
...
@@ -607,76 +776,56 @@ enum XML_ParamEntityParsing {
};
/* Controls parsing of parameter entities (including the external DTD
subset). If parsing of parameter entities is enabled, then references
to external parameter entities (including the external DTD subset)
will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can only
be parsed synchronously. If the external parameter entity is to be
parsed, it must be parsed during the call to the external entity ref
handler: the complete sequence of XML_ExternalEntityParserCreate,
XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during
this call. After XML_ExternalEntityParserCreate has been called to
create the parser for the external parameter entity (context must be 0
for this call), it is illegal to make any calls on the old parser
until XML_ParserFree has been called on the newly created parser. If
the library has been compiled without support for parameter entity
parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero. */
subset). If parsing of parameter entities is enabled, then
references to external parameter entities (including the external
DTD subset) will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can
only be parsed synchronously. If the external parameter entity is
to be parsed, it must be parsed during the call to the external
entity ref handler: the complete sequence of
XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and
XML_ParserFree calls must be made during this call. After
XML_ExternalEntityParserCreate has been called to create the parser
for the external parameter entity (context must be 0 for this
call), it is illegal to make any calls on the old parser until
XML_ParserFree has been called on the newly created parser.
If the library has been compiled without support for parameter
entity parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero.
Note: If XML_SetParamEntityParsing is called after XML_Parse or
XML_ParseBuffer, then it has no effect and will always return 0.
*/
XMLPARSEAPI
(
int
)
XML_SetParamEntityParsing
(
XML_Parser
parser
,
enum
XML_ParamEntityParsing
parsing
);
enum
XML_Error
{
XML_ERROR_NONE
,
XML_ERROR_NO_MEMORY
,
XML_ERROR_SYNTAX
,
XML_ERROR_NO_ELEMENTS
,
XML_ERROR_INVALID_TOKEN
,
XML_ERROR_UNCLOSED_TOKEN
,
XML_ERROR_PARTIAL_CHAR
,
XML_ERROR_TAG_MISMATCH
,
XML_ERROR_DUPLICATE_ATTRIBUTE
,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT
,
XML_ERROR_PARAM_ENTITY_REF
,
XML_ERROR_UNDEFINED_ENTITY
,
XML_ERROR_RECURSIVE_ENTITY_REF
,
XML_ERROR_ASYNC_ENTITY
,
XML_ERROR_BAD_CHAR_REF
,
XML_ERROR_BINARY_ENTITY_REF
,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
,
XML_ERROR_MISPLACED_XML_PI
,
XML_ERROR_UNKNOWN_ENCODING
,
XML_ERROR_INCORRECT_ENCODING
,
XML_ERROR_UNCLOSED_CDATA_SECTION
,
XML_ERROR_EXTERNAL_ENTITY_HANDLING
,
XML_ERROR_NOT_STANDALONE
,
XML_ERROR_UNEXPECTED_STATE
};
/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode
returns information about the error. */
enum
XML_ParamEntityParsing
parsing
);
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_GetErrorCode returns information about the error.
*/
XMLPARSEAPI
(
enum
XML_Error
)
XML_GetErrorCode
(
XML_Parser
parser
);
/* These functions return information about the current parse location.
They may be called when XML_Parse or XML_ParseBuffer return 0;
in this case the location is the location of the character at which
the error was detected.
They may also be called from any other callback called to report
some parse event; in this the location is the location of the first
of the sequence of characters that generated the event. */
/* These functions return information about the current parse
location. They may be called from any callback called to report
some parse event; in this case the location is the location of
the first of the sequence of characters that generated the event.
They may also be called after returning from a call to XML_Parse
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
the location is the location of the character at which the error
was detected; otherwise the location is the location of the last
parse event, as described above.
*/
XMLPARSEAPI
(
int
)
XML_GetCurrentLineNumber
(
XML_Parser
parser
);
XMLPARSEAPI
(
int
)
XML_GetCurrentColumnNumber
(
XML_Parser
parser
);
XMLPARSEAPI
(
long
)
XML_GetCurrentByteIndex
(
XML_Parser
parser
);
/* Return the number of bytes in the current event.
Returns 0 if the event is in an internal entity. */
Returns 0 if the event is in an internal entity.
*/
XMLPARSEAPI
(
int
)
XML_GetCurrentByteCount
(
XML_Parser
parser
);
...
...
@@ -684,21 +833,35 @@ XML_GetCurrentByteCount(XML_Parser parser);
the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise
returns a
null pointer. Also returns a null
pointer if a parse isn't
returns a
NULL pointer. Also returns a NULL
pointer if a parse isn't
active.
NOTE: The character pointer returned should not be used outside
the handler that makes the call.
*/
the handler that makes the call.
*/
XMLPARSEAPI
(
const
char
*
)
XML_GetInputContext
(
XML_Parser
parser
,
int
*
offset
,
int
*
size
);
int
*
offset
,
int
*
size
);
/* For backwards compatibility with previous versions. */
#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
#define XML_GetErrorLineNumber
XML_GetCurrentLineNumber
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
/* Frees the content model passed to the element declaration handler */
XMLPARSEAPI
(
void
)
XML_FreeContentModel
(
XML_Parser
parser
,
XML_Content
*
model
);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI
(
void
*
)
XML_MemMalloc
(
XML_Parser
parser
,
size_t
size
);
XMLPARSEAPI
(
void
*
)
XML_MemRealloc
(
XML_Parser
parser
,
void
*
ptr
,
size_t
size
);
XMLPARSEAPI
(
void
)
XML_MemFree
(
XML_Parser
parser
,
void
*
ptr
);
/* Frees memory used by the parser. */
XMLPARSEAPI
(
void
)
...
...
@@ -706,7 +869,7 @@ XML_ParserFree(XML_Parser parser);
/* Returns a string describing the error. */
XMLPARSEAPI
(
const
XML_LChar
*
)
XML_ErrorString
(
int
code
);
XML_ErrorString
(
enum
XML_Error
code
);
/* Return a string containing the version number of this expat */
XMLPARSEAPI
(
const
XML_LChar
*
)
...
...
@@ -719,18 +882,42 @@ typedef struct {
}
XML_Expat_Version
;
/* Return an XML_Expat_Version structure containing numeric version
number information for this version of expat
*/
number information for this version of expat
.
*/
XMLPARSEAPI
(
XML_Expat_Version
)
XML_ExpatVersionInfo
(
void
);
/* VERSION is not defined in expat.h.in, but it really belongs here,
and defining it on the command line gives difficulties with MSVC. */
#define VERSION "1.95.2"
/* Added in Expat 1.95.5. */
enum
XML_FeatureEnum
{
XML_FEATURE_END
=
0
,
XML_FEATURE_UNICODE
,
XML_FEATURE_UNICODE_WCHAR_T
,
XML_FEATURE_DTD
,
XML_FEATURE_CONTEXT_BYTES
,
XML_FEATURE_MIN_SIZE
,
XML_FEATURE_SIZEOF_XML_CHAR
,
XML_FEATURE_SIZEOF_XML_LCHAR
/* Additional features must be added to the end of this enum. */
};
typedef
struct
{
enum
XML_FeatureEnum
feature
;
const
XML_LChar
*
name
;
long
int
value
;
}
XML_Feature
;
XMLPARSEAPI
(
const
XML_Feature
*
)
XML_GetFeatureList
(
void
);
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/
#define XML_MAJOR_VERSION 1
#define XML_MINOR_VERSION 95
#define XML_MICRO_VERSION
2
#define XML_MICRO_VERSION
6
#ifdef __cplusplus
}
...
...
Modules/expat/internal.h
0 → 100644
View file @
fc03a94a
/* internal.h
Internal definitions used by Expat. This is not needed to compile
client code.
The following calling convention macros are defined for frequently
called functions:
FASTCALL - Used for those internal functions that have a simple
body and a low number of arguments and local variables.
PTRCALL - Used for functions called though function pointers.
PTRFASTCALL - Like PTRCALL, but for low number of arguments.
inline - Used for selected internal functions for which inlining
may improve performance on some platforms.
Note: Use of these macros is based on judgement, not hard rules,
and therefore subject to change.
*/
#if defined(__GNUC__)
/* Instability reported with egcs on a RedHat Linux 7.3.
Let's comment it out:
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRCALL
#define PTRFASTCALL __attribute__((regparm(3)))
#elif defined(WIN32)
/* Using __fastcall seems to have an unexpected negative effect under
MS VC++, especially for function pointers, so we won't use it for
now on that platform. It may be reconsidered for a future release
if it can be made more effective.
Likely reason: __fastcall on Windows is like stdcall, therefore
the compiler cannot perform stack optimizations for call clusters.
*/
#define FASTCALL
#define PTRCALL
#define PTRFASTCALL
#endif
#ifndef FASTCALL
#define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif
/* __GNUC__ */
#endif
#endif
/* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
#else
#ifndef inline
#define inline
#endif
#endif
Modules/expat/xmlparse.c
View file @
fc03a94a
This source diff could not be displayed because it is too large. You can
view the blob
instead.
Modules/expat/xmlrole.c
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifdef COMPILED_FROM_DSP
# include "winconfig.h"
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <expat_config.h>
#endif
/* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmlrole.h"
#include "ascii.h"
...
...
@@ -21,29 +21,56 @@ See the file COPYING for copying permission.
*/
static
const
char
KW_ANY
[]
=
{
ASCII_A
,
ASCII_N
,
ASCII_Y
,
'\0'
};
static
const
char
KW_ATTLIST
[]
=
{
ASCII_A
,
ASCII_T
,
ASCII_T
,
ASCII_L
,
ASCII_I
,
ASCII_S
,
ASCII_T
,
'\0'
};
static
const
char
KW_CDATA
[]
=
{
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_DOCTYPE
[]
=
{
ASCII_D
,
ASCII_O
,
ASCII_C
,
ASCII_T
,
ASCII_Y
,
ASCII_P
,
ASCII_E
,
'\0'
};
static
const
char
KW_ELEMENT
[]
=
{
ASCII_E
,
ASCII_L
,
ASCII_E
,
ASCII_M
,
ASCII_E
,
ASCII_N
,
ASCII_T
,
'\0'
};
static
const
char
KW_EMPTY
[]
=
{
ASCII_E
,
ASCII_M
,
ASCII_P
,
ASCII_T
,
ASCII_Y
,
'\0'
};
static
const
char
KW_ENTITIES
[]
=
{
ASCII_E
,
ASCII_N
,
ASCII_T
,
ASCII_I
,
ASCII_T
,
ASCII_I
,
ASCII_E
,
ASCII_S
,
'\0'
};
static
const
char
KW_ENTITY
[]
=
{
ASCII_E
,
ASCII_N
,
ASCII_T
,
ASCII_I
,
ASCII_T
,
ASCII_Y
,
'\0'
};
static
const
char
KW_FIXED
[]
=
{
ASCII_F
,
ASCII_I
,
ASCII_X
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_ID
[]
=
{
ASCII_I
,
ASCII_D
,
'\0'
};
static
const
char
KW_IDREF
[]
=
{
ASCII_I
,
ASCII_D
,
ASCII_R
,
ASCII_E
,
ASCII_F
,
'\0'
};
static
const
char
KW_IDREFS
[]
=
{
ASCII_I
,
ASCII_D
,
ASCII_R
,
ASCII_E
,
ASCII_F
,
ASCII_S
,
'\0'
};
static
const
char
KW_IGNORE
[]
=
{
ASCII_I
,
ASCII_G
,
ASCII_N
,
ASCII_O
,
ASCII_R
,
ASCII_E
,
'\0'
};
static
const
char
KW_IMPLIED
[]
=
{
ASCII_I
,
ASCII_M
,
ASCII_P
,
ASCII_L
,
ASCII_I
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_INCLUDE
[]
=
{
ASCII_I
,
ASCII_N
,
ASCII_C
,
ASCII_L
,
ASCII_U
,
ASCII_D
,
ASCII_E
,
'\0'
};
static
const
char
KW_NDATA
[]
=
{
ASCII_N
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_NMTOKEN
[]
=
{
ASCII_N
,
ASCII_M
,
ASCII_T
,
ASCII_O
,
ASCII_K
,
ASCII_E
,
ASCII_N
,
'\0'
};
static
const
char
KW_NMTOKENS
[]
=
{
ASCII_N
,
ASCII_M
,
ASCII_T
,
ASCII_O
,
ASCII_K
,
ASCII_E
,
ASCII_N
,
ASCII_S
,
'\0'
};
static
const
char
KW_NOTATION
[]
=
{
ASCII_N
,
ASCII_O
,
ASCII_T
,
ASCII_A
,
ASCII_T
,
ASCII_I
,
ASCII_O
,
ASCII_N
,
'\0'
};
static
const
char
KW_PCDATA
[]
=
{
ASCII_P
,
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_PUBLIC
[]
=
{
ASCII_P
,
ASCII_U
,
ASCII_B
,
ASCII_L
,
ASCII_I
,
ASCII_C
,
'\0'
};
static
const
char
KW_REQUIRED
[]
=
{
ASCII_R
,
ASCII_E
,
ASCII_Q
,
ASCII_U
,
ASCII_I
,
ASCII_R
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_SYSTEM
[]
=
{
ASCII_S
,
ASCII_Y
,
ASCII_S
,
ASCII_T
,
ASCII_E
,
ASCII_M
,
'\0'
};
static
const
char
KW_ANY
[]
=
{
ASCII_A
,
ASCII_N
,
ASCII_Y
,
'\0'
};
static
const
char
KW_ATTLIST
[]
=
{
ASCII_A
,
ASCII_T
,
ASCII_T
,
ASCII_L
,
ASCII_I
,
ASCII_S
,
ASCII_T
,
'\0'
};
static
const
char
KW_CDATA
[]
=
{
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_DOCTYPE
[]
=
{
ASCII_D
,
ASCII_O
,
ASCII_C
,
ASCII_T
,
ASCII_Y
,
ASCII_P
,
ASCII_E
,
'\0'
};
static
const
char
KW_ELEMENT
[]
=
{
ASCII_E
,
ASCII_L
,
ASCII_E
,
ASCII_M
,
ASCII_E
,
ASCII_N
,
ASCII_T
,
'\0'
};
static
const
char
KW_EMPTY
[]
=
{
ASCII_E
,
ASCII_M
,
ASCII_P
,
ASCII_T
,
ASCII_Y
,
'\0'
};
static
const
char
KW_ENTITIES
[]
=
{
ASCII_E
,
ASCII_N
,
ASCII_T
,
ASCII_I
,
ASCII_T
,
ASCII_I
,
ASCII_E
,
ASCII_S
,
'\0'
};
static
const
char
KW_ENTITY
[]
=
{
ASCII_E
,
ASCII_N
,
ASCII_T
,
ASCII_I
,
ASCII_T
,
ASCII_Y
,
'\0'
};
static
const
char
KW_FIXED
[]
=
{
ASCII_F
,
ASCII_I
,
ASCII_X
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_ID
[]
=
{
ASCII_I
,
ASCII_D
,
'\0'
};
static
const
char
KW_IDREF
[]
=
{
ASCII_I
,
ASCII_D
,
ASCII_R
,
ASCII_E
,
ASCII_F
,
'\0'
};
static
const
char
KW_IDREFS
[]
=
{
ASCII_I
,
ASCII_D
,
ASCII_R
,
ASCII_E
,
ASCII_F
,
ASCII_S
,
'\0'
};
static
const
char
KW_IGNORE
[]
=
{
ASCII_I
,
ASCII_G
,
ASCII_N
,
ASCII_O
,
ASCII_R
,
ASCII_E
,
'\0'
};
static
const
char
KW_IMPLIED
[]
=
{
ASCII_I
,
ASCII_M
,
ASCII_P
,
ASCII_L
,
ASCII_I
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_INCLUDE
[]
=
{
ASCII_I
,
ASCII_N
,
ASCII_C
,
ASCII_L
,
ASCII_U
,
ASCII_D
,
ASCII_E
,
'\0'
};
static
const
char
KW_NDATA
[]
=
{
ASCII_N
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_NMTOKEN
[]
=
{
ASCII_N
,
ASCII_M
,
ASCII_T
,
ASCII_O
,
ASCII_K
,
ASCII_E
,
ASCII_N
,
'\0'
};
static
const
char
KW_NMTOKENS
[]
=
{
ASCII_N
,
ASCII_M
,
ASCII_T
,
ASCII_O
,
ASCII_K
,
ASCII_E
,
ASCII_N
,
ASCII_S
,
'\0'
};
static
const
char
KW_NOTATION
[]
=
{
ASCII_N
,
ASCII_O
,
ASCII_T
,
ASCII_A
,
ASCII_T
,
ASCII_I
,
ASCII_O
,
ASCII_N
,
'\0'
};
static
const
char
KW_PCDATA
[]
=
{
ASCII_P
,
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
'\0'
};
static
const
char
KW_PUBLIC
[]
=
{
ASCII_P
,
ASCII_U
,
ASCII_B
,
ASCII_L
,
ASCII_I
,
ASCII_C
,
'\0'
};
static
const
char
KW_REQUIRED
[]
=
{
ASCII_R
,
ASCII_E
,
ASCII_Q
,
ASCII_U
,
ASCII_I
,
ASCII_R
,
ASCII_E
,
ASCII_D
,
'\0'
};
static
const
char
KW_SYSTEM
[]
=
{
ASCII_S
,
ASCII_Y
,
ASCII_S
,
ASCII_T
,
ASCII_E
,
ASCII_M
,
'\0'
};
#ifndef MIN_BYTES_PER_CHAR
#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
...
...
@@ -58,18 +85,18 @@ static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, A
#define setTopLevel(state) ((state)->handler = internalSubset)
#endif
/* not XML_DTD */
typedef
int
PROLOG_HANDLER
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
);
typedef
int
P
TRCALL
P
ROLOG_HANDLER
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
);
static
PROLOG_HANDLER
prolog0
,
prolog1
,
prolog2
,
doctype0
,
doctype1
,
doctype2
,
doctype3
,
doctype4
,
doctype5
,
internalSubset
,
entity0
,
entity1
,
entity2
,
entity3
,
entity4
,
entity5
,
entity6
,
entity7
,
entity8
,
entity9
,
entity7
,
entity8
,
entity9
,
entity10
,
notation0
,
notation1
,
notation2
,
notation3
,
notation4
,
attlist0
,
attlist1
,
attlist2
,
attlist3
,
attlist4
,
attlist5
,
attlist6
,
attlist7
,
attlist8
,
attlist9
,
...
...
@@ -82,15 +109,14 @@ static PROLOG_HANDLER
declClose
,
error
;
static
int
common
(
PROLOG_STATE
*
state
,
int
tok
);
static
int
FASTCALL
common
(
PROLOG_STATE
*
state
,
int
tok
);
static
int
prolog0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
prolog0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
...
...
@@ -101,19 +127,20 @@ int prolog0(PROLOG_STATE *state,
return
XML_ROLE_XML_DECL
;
case
XML_TOK_PI
:
state
->
handler
=
prolog1
;
return
XML_ROLE_
NONE
;
return
XML_ROLE_
PI
;
case
XML_TOK_COMMENT
:
state
->
handler
=
prolog1
;
return
XML_ROLE_COMMENT
;
case
XML_TOK_BOM
:
return
XML_ROLE_NONE
;
case
XML_TOK_DECL_OPEN
:
if
(
!
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_DOCTYPE
))
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_DOCTYPE
))
break
;
state
->
handler
=
doctype0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_INSTANCE_START
:
state
->
handler
=
error
;
return
XML_ROLE_INSTANCE_START
;
...
...
@@ -121,28 +148,30 @@ int prolog0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
prolog1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
prolog1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
case
XML_TOK_PI
:
return
XML_ROLE_PI
;
case
XML_TOK_COMMENT
:
return
XML_ROLE_COMMENT
;
case
XML_TOK_BOM
:
return
XML_ROLE_NONE
;
case
XML_TOK_DECL_OPEN
:
if
(
!
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_DOCTYPE
))
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_DOCTYPE
))
break
;
state
->
handler
=
doctype0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_INSTANCE_START
:
state
->
handler
=
error
;
return
XML_ROLE_INSTANCE_START
;
...
...
@@ -150,19 +179,20 @@ int prolog1(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
prolog2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
prolog2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
case
XML_TOK_PI
:
return
XML_ROLE_PI
;
case
XML_TOK_COMMENT
:
return
XML_ROLE_
NONE
;
return
XML_ROLE_
COMMENT
;
case
XML_TOK_INSTANCE_START
:
state
->
handler
=
error
;
return
XML_ROLE_INSTANCE_START
;
...
...
@@ -170,16 +200,16 @@ int prolog2(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
doctype0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
state
->
handler
=
doctype1
;
...
...
@@ -188,16 +218,16 @@ int doctype0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
doctype1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_OPEN_BRACKET
:
state
->
handler
=
internalSubset
;
return
XML_ROLE_DOCTYPE_INTERNAL_SUBSET
;
...
...
@@ -207,27 +237,27 @@ int doctype1(PROLOG_STATE *state,
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_SYSTEM
))
{
state
->
handler
=
doctype3
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_PUBLIC
))
{
state
->
handler
=
doctype2
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
}
break
;
}
return
common
(
state
,
tok
);
}
static
int
doctype2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
doctype3
;
return
XML_ROLE_DOCTYPE_PUBLIC_ID
;
...
...
@@ -235,16 +265,16 @@ int doctype2(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
doctype3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
doctype4
;
return
XML_ROLE_DOCTYPE_SYSTEM_ID
;
...
...
@@ -252,16 +282,16 @@ int doctype3(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
doctype4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_OPEN_BRACKET
:
state
->
handler
=
internalSubset
;
return
XML_ROLE_DOCTYPE_INTERNAL_SUBSET
;
...
...
@@ -272,16 +302,16 @@ int doctype4(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
doctype5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
doctype5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
case
XML_TOK_DECL_CLOSE
:
state
->
handler
=
prolog2
;
return
XML_ROLE_DOCTYPE_CLOSE
;
...
...
@@ -289,66 +319,67 @@ int doctype5(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
int
ernalSubset
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
internalSubset
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
case
XML_TOK_DECL_OPEN
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ENTITY
))
{
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ENTITY
))
{
state
->
handler
=
entity0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ATTLIST
))
{
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ATTLIST
))
{
state
->
handler
=
attlist0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ELEMENT
))
{
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_ELEMENT
))
{
state
->
handler
=
element0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_NOTATION
))
{
ptr
+
2
*
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_NOTATION
))
{
state
->
handler
=
notation0
;
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
}
break
;
case
XML_TOK_PI
:
return
XML_ROLE_PI
;
case
XML_TOK_COMMENT
:
return
XML_ROLE_
NONE
;
return
XML_ROLE_
COMMENT
;
case
XML_TOK_PARAM_ENTITY_REF
:
return
XML_ROLE_PARAM_ENTITY_REF
;
case
XML_TOK_CLOSE_BRACKET
:
state
->
handler
=
doctype5
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
DOCTYPE_
NONE
;
}
return
common
(
state
,
tok
);
}
#ifdef XML_DTD
static
int
externalSubset0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
externalSubset0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
state
->
handler
=
externalSubset1
;
if
(
tok
==
XML_TOK_XML_DECL
)
...
...
@@ -356,12 +387,12 @@ int externalSubset0(PROLOG_STATE *state,
return
externalSubset1
(
state
,
tok
,
ptr
,
end
,
enc
);
}
static
int
externalSubset1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
externalSubset1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_COND_SECT_OPEN
:
...
...
@@ -388,19 +419,19 @@ int externalSubset1(PROLOG_STATE *state,
#endif
/* XML_DTD */
static
int
entity0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_PERCENT
:
state
->
handler
=
entity1
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_NAME
:
state
->
handler
=
entity2
;
return
XML_ROLE_GENERAL_ENTITY_NAME
;
...
...
@@ -408,16 +439,16 @@ int entity0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
entity1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_NAME
:
state
->
handler
=
entity7
;
return
XML_ROLE_PARAM_ENTITY_NAME
;
...
...
@@ -425,43 +456,44 @@ int entity1(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
entity2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_SYSTEM
))
{
state
->
handler
=
entity4
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_PUBLIC
))
{
state
->
handler
=
entity3
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
break
;
case
XML_TOK_LITERAL
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ENTITY_NONE
;
return
XML_ROLE_ENTITY_VALUE
;
}
return
common
(
state
,
tok
);
}
static
int
entity3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
entity4
;
return
XML_ROLE_ENTITY_PUBLIC_ID
;
...
...
@@ -469,17 +501,16 @@ int entity3(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
entity4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
entity5
;
return
XML_ROLE_ENTITY_SYSTEM_ID
;
...
...
@@ -487,83 +518,85 @@ int entity4(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
entity5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_DECL_CLOSE
:
setTopLevel
(
state
);
return
XML_ROLE_ENTITY_COMPLETE
;
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_NDATA
))
{
state
->
handler
=
entity6
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
break
;
}
return
common
(
state
,
tok
);
}
static
int
entity6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_NAME
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ENTITY_NONE
;
return
XML_ROLE_ENTITY_NOTATION_NAME
;
}
return
common
(
state
,
tok
);
}
static
int
entity7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_SYSTEM
))
{
state
->
handler
=
entity9
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_PUBLIC
))
{
state
->
handler
=
entity8
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
}
break
;
case
XML_TOK_LITERAL
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ENTITY_NONE
;
return
XML_ROLE_ENTITY_VALUE
;
}
return
common
(
state
,
tok
);
}
static
int
entity8
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity8
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
entity9
;
return
XML_ROLE_ENTITY_PUBLIC_ID
;
...
...
@@ -571,33 +604,50 @@ int entity8(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
entity9
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity9
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ENTITY_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
declClose
;
state
->
handler
=
entity10
;
return
XML_ROLE_ENTITY_SYSTEM_ID
;
}
return
common
(
state
,
tok
);
}
static
int
notation
0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
entity1
0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_ENTITY_NONE
;
case
XML_TOK_DECL_CLOSE
:
setTopLevel
(
state
);
return
XML_ROLE_ENTITY_COMPLETE
;
}
return
common
(
state
,
tok
);
}
static
int
PTRCALL
notation0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NOTATION_NONE
;
case
XML_TOK_NAME
:
state
->
handler
=
notation1
;
return
XML_ROLE_NOTATION_NAME
;
...
...
@@ -605,40 +655,40 @@ int notation0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
notation1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
notation1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_SYSTEM
))
{
state
->
handler
=
notation3
;
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_PUBLIC
))
{
state
->
handler
=
notation2
;
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
}
break
;
}
return
common
(
state
,
tok
);
}
static
int
notation2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
notation2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
notation4
;
return
XML_ROLE_NOTATION_PUBLIC_ID
;
...
...
@@ -646,35 +696,37 @@ int notation2(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
notation3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
notation3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_NOTATION_NONE
;
return
XML_ROLE_NOTATION_SYSTEM_ID
;
}
return
common
(
state
,
tok
);
}
static
int
notation4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
notation4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_NO
TATION_NO
NE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_NOTATION_NONE
;
return
XML_ROLE_NOTATION_SYSTEM_ID
;
case
XML_TOK_DECL_CLOSE
:
setTopLevel
(
state
);
...
...
@@ -683,16 +735,16 @@ int notation4(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
state
->
handler
=
attlist1
;
...
...
@@ -701,19 +753,19 @@ int attlist0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_DECL_CLOSE
:
setTopLevel
(
state
);
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
state
->
handler
=
attlist2
;
...
...
@@ -722,20 +774,20 @@ int attlist1(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_NAME
:
{
static
const
char
*
types
[]
=
{
KW_CDATA
,
KW_CDATA
,
KW_ID
,
KW_IDREF
,
KW_IDREFS
,
...
...
@@ -746,33 +798,33 @@ int attlist2(PROLOG_STATE *state,
};
int
i
;
for
(
i
=
0
;
i
<
(
int
)(
sizeof
(
types
)
/
sizeof
(
types
[
0
]));
i
++
)
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
types
[
i
]))
{
state
->
handler
=
attlist8
;
return
XML_ROLE_ATTRIBUTE_TYPE_CDATA
+
i
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
types
[
i
]))
{
state
->
handler
=
attlist8
;
return
XML_ROLE_ATTRIBUTE_TYPE_CDATA
+
i
;
}
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_NOTATION
))
{
state
->
handler
=
attlist5
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
break
;
case
XML_TOK_OPEN_PAREN
:
state
->
handler
=
attlist3
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
return
common
(
state
,
tok
);
}
static
int
attlist3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_NMTOKEN
:
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
...
...
@@ -782,54 +834,53 @@ int attlist3(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_CLOSE_PAREN
:
state
->
handler
=
attlist8
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_OR
:
state
->
handler
=
attlist3
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
return
common
(
state
,
tok
);
}
static
int
attlist5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_OPEN_PAREN
:
state
->
handler
=
attlist6
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
return
common
(
state
,
tok
);
}
static
int
attlist6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_NAME
:
state
->
handler
=
attlist7
;
return
XML_ROLE_ATTRIBUTE_NOTATION_VALUE
;
...
...
@@ -837,58 +888,58 @@ int attlist6(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_CLOSE_PAREN
:
state
->
handler
=
attlist8
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_OR
:
state
->
handler
=
attlist6
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
return
common
(
state
,
tok
);
}
/* default value */
static
int
attlist8
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist8
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_POUND_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_IMPLIED
))
{
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_IMPLIED
))
{
state
->
handler
=
attlist1
;
return
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_REQUIRED
))
{
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_REQUIRED
))
{
state
->
handler
=
attlist1
;
return
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_FIXED
))
{
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_FIXED
))
{
state
->
handler
=
attlist9
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
}
break
;
case
XML_TOK_LITERAL
:
...
...
@@ -898,16 +949,16 @@ int attlist8(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
attlist9
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
attlist9
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ATTLIST_
NONE
;
case
XML_TOK_LITERAL
:
state
->
handler
=
attlist1
;
return
XML_ROLE_FIXED_ATTRIBUTE_VALUE
;
...
...
@@ -915,16 +966,16 @@ int attlist9(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
state
->
handler
=
element1
;
...
...
@@ -933,23 +984,25 @@ int element0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_EMPTY
))
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
return
XML_ROLE_CONTENT_EMPTY
;
}
if
(
XmlNameMatchesAscii
(
enc
,
ptr
,
end
,
KW_ANY
))
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
return
XML_ROLE_CONTENT_ANY
;
}
break
;
...
...
@@ -961,21 +1014,21 @@ int element1(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_POUND_NAME
:
if
(
XmlNameMatchesAscii
(
enc
,
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_PCDATA
))
{
ptr
+
MIN_BYTES_PER_CHAR
(
enc
),
end
,
KW_PCDATA
))
{
state
->
handler
=
element3
;
return
XML_ROLE_CONTENT_PCDATA
;
}
...
...
@@ -1001,39 +1054,41 @@ int element2(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element3
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_CLOSE_PAREN
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
return
XML_ROLE_GROUP_CLOSE
;
case
XML_TOK_CLOSE_PAREN_ASTERISK
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
return
XML_ROLE_GROUP_CLOSE_REP
;
case
XML_TOK_OR
:
state
->
handler
=
element4
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
}
return
common
(
state
,
tok
);
}
static
int
element4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element4
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_NAME
:
case
XML_TOK_PREFIXED_NAME
:
state
->
handler
=
element5
;
...
...
@@ -1042,36 +1097,37 @@ int element4(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element5
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_CLOSE_PAREN_ASTERISK
:
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
return
XML_ROLE_GROUP_CLOSE_REP
;
case
XML_TOK_OR
:
state
->
handler
=
element4
;
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
}
return
common
(
state
,
tok
);
}
static
int
element6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element6
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_OPEN_PAREN
:
state
->
level
+=
1
;
return
XML_ROLE_GROUP_OPEN
;
...
...
@@ -1092,35 +1148,43 @@ int element6(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
element7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
element7
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
XML_ROLE_
ELEMENT_
NONE
;
case
XML_TOK_CLOSE_PAREN
:
state
->
level
-=
1
;
if
(
state
->
level
==
0
)
if
(
state
->
level
==
0
)
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
}
return
XML_ROLE_GROUP_CLOSE
;
case
XML_TOK_CLOSE_PAREN_ASTERISK
:
state
->
level
-=
1
;
if
(
state
->
level
==
0
)
if
(
state
->
level
==
0
)
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
}
return
XML_ROLE_GROUP_CLOSE_REP
;
case
XML_TOK_CLOSE_PAREN_QUESTION
:
state
->
level
-=
1
;
if
(
state
->
level
==
0
)
if
(
state
->
level
==
0
)
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
}
return
XML_ROLE_GROUP_CLOSE_OPT
;
case
XML_TOK_CLOSE_PAREN_PLUS
:
state
->
level
-=
1
;
if
(
state
->
level
==
0
)
if
(
state
->
level
==
0
)
{
state
->
handler
=
declClose
;
state
->
role_none
=
XML_ROLE_ELEMENT_NONE
;
}
return
XML_ROLE_GROUP_CLOSE_PLUS
;
case
XML_TOK_COMMA
:
state
->
handler
=
element6
;
...
...
@@ -1134,12 +1198,12 @@ int element7(PROLOG_STATE *state,
#ifdef XML_DTD
static
int
condSect0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
condSect0
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
...
...
@@ -1158,12 +1222,12 @@ int condSect0(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
condSect1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
condSect1
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
...
...
@@ -1176,12 +1240,12 @@ int condSect1(PROLOG_STATE *state,
return
common
(
state
,
tok
);
}
static
int
condSect2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
condSect2
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
...
...
@@ -1195,55 +1259,35 @@ int condSect2(PROLOG_STATE *state,
#endif
/* XML_DTD */
static
int
declClose
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
declClose
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
switch
(
tok
)
{
case
XML_TOK_PROLOG_S
:
return
XML_ROLE_NONE
;
return
state
->
role_none
;
case
XML_TOK_DECL_CLOSE
:
setTopLevel
(
state
);
return
XML_ROLE_NONE
;
return
state
->
role_none
;
}
return
common
(
state
,
tok
);
}
#if 0
static
int ignore(PROLOG_STATE *state,
int tok,
const char *ptr,
const char *end,
const ENCODING *enc)
{
switch (tok) {
case XML_TOK_DECL_CLOSE:
state->handler = internalSubset;
return 0;
default:
return XML_ROLE_NONE;
}
return common(state, tok);
}
#endif
static
int
error
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
static
int
PTRCALL
error
(
PROLOG_STATE
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
)
{
return
XML_ROLE_NONE
;
}
static
int
common
(
PROLOG_STATE
*
state
,
int
tok
)
static
int
FASTCALL
common
(
PROLOG_STATE
*
state
,
int
tok
)
{
#ifdef XML_DTD
if
(
!
state
->
documentEntity
&&
tok
==
XML_TOK_PARAM_ENTITY_REF
)
...
...
@@ -1253,18 +1297,21 @@ int common(PROLOG_STATE *state, int tok)
return
XML_ROLE_ERROR
;
}
void
XmlPrologStateInit
(
PROLOG_STATE
*
state
)
void
XmlPrologStateInit
(
PROLOG_STATE
*
state
)
{
state
->
handler
=
prolog0
;
#ifdef XML_DTD
state
->
documentEntity
=
1
;
state
->
includeLevel
=
0
;
state
->
inEntityValue
=
0
;
#endif
/* XML_DTD */
}
#ifdef XML_DTD
void
XmlPrologStateInitExternalEntity
(
PROLOG_STATE
*
state
)
void
XmlPrologStateInitExternalEntity
(
PROLOG_STATE
*
state
)
{
state
->
handler
=
externalSubset0
;
state
->
documentEntity
=
0
;
...
...
Modules/expat/xmlrole.h
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlRole_INCLUDED
#define XmlRole_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h"
#ifdef __cplusplus
...
...
@@ -17,6 +22,7 @@ enum {
XML_ROLE_NONE
=
0
,
XML_ROLE_XML_DECL
,
XML_ROLE_INSTANCE_START
,
XML_ROLE_DOCTYPE_NONE
,
XML_ROLE_DOCTYPE_NAME
,
XML_ROLE_DOCTYPE_SYSTEM_ID
,
XML_ROLE_DOCTYPE_PUBLIC_ID
,
...
...
@@ -24,11 +30,13 @@ enum {
XML_ROLE_DOCTYPE_CLOSE
,
XML_ROLE_GENERAL_ENTITY_NAME
,
XML_ROLE_PARAM_ENTITY_NAME
,
XML_ROLE_ENTITY_NONE
,
XML_ROLE_ENTITY_VALUE
,
XML_ROLE_ENTITY_SYSTEM_ID
,
XML_ROLE_ENTITY_PUBLIC_ID
,
XML_ROLE_ENTITY_COMPLETE
,
XML_ROLE_ENTITY_NOTATION_NAME
,
XML_ROLE_NOTATION_NONE
,
XML_ROLE_NOTATION_NAME
,
XML_ROLE_NOTATION_SYSTEM_ID
,
XML_ROLE_NOTATION_NO_SYSTEM_ID
,
...
...
@@ -44,11 +52,13 @@ enum {
XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS
,
XML_ROLE_ATTRIBUTE_ENUM_VALUE
,
XML_ROLE_ATTRIBUTE_NOTATION_VALUE
,
XML_ROLE_ATTLIST_NONE
,
XML_ROLE_ATTLIST_ELEMENT_NAME
,
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE
,
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE
,
XML_ROLE_DEFAULT_ATTRIBUTE_VALUE
,
XML_ROLE_FIXED_ATTRIBUTE_VALUE
,
XML_ROLE_ELEMENT_NONE
,
XML_ROLE_ELEMENT_NAME
,
XML_ROLE_CONTENT_ANY
,
XML_ROLE_CONTENT_EMPTY
,
...
...
@@ -64,6 +74,8 @@ enum {
XML_ROLE_CONTENT_ELEMENT_REP
,
XML_ROLE_CONTENT_ELEMENT_OPT
,
XML_ROLE_CONTENT_ELEMENT_PLUS
,
XML_ROLE_PI
,
XML_ROLE_COMMENT
,
#ifdef XML_DTD
XML_ROLE_TEXT_DECL
,
XML_ROLE_IGNORE_SECT
,
...
...
@@ -73,15 +85,17 @@ enum {
};
typedef
struct
prolog_state
{
int
(
*
handler
)
(
struct
prolog_state
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
);
int
(
PTRCALL
*
handler
)
(
struct
prolog_state
*
state
,
int
tok
,
const
char
*
ptr
,
const
char
*
end
,
const
ENCODING
*
enc
);
unsigned
level
;
int
role_none
;
#ifdef XML_DTD
unsigned
includeLevel
;
int
documentEntity
;
int
inEntityValue
;
#endif
/* XML_DTD */
}
PROLOG_STATE
;
...
...
Modules/expat/xmltok.c
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifdef COMPILED_FROM_DSP
# include "winconfig.h"
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
#else
#ifdef HAVE_CONFIG_H
#
include <
config.h>
#endif
/* Unused - MvL
#
include <expat_
config.h>
*/
#endif
/* ndef COMPILED_FROM_DSP */
#include "internal.h"
#include "xmltok.h"
#include "nametab.h"
...
...
@@ -39,24 +41,25 @@ See the file COPYING for copying permission.
#define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits
between the bottom 5 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask.
*/
/* A 2 byte UTF-8 representation splits the characters 11 bits
between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
pages, 3 bits to add to that index and 5 bits to generate the mask.
*/
#define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits
between the bottom 4, 6 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask. */
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
into pages, 3 bits to add to that index and 5 bits to generate the
mask.
*/
#define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F)))
...
...
@@ -68,59 +71,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0))
#define UTF8_INVALID3(p) \
((*p) == 0xED \
? (((p)[1] & 0x20) != 0) \
: ((*p) == 0xEF \
? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
: 0))
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
Implementation details:
(A & 0x80) == 0 means A < 0x80
and
(A & 0xC0) == 0xC0 means A > 0xBF
*/
#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
#define UTF8_INVALID2(p) \
((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
static
int
isNever
(
const
ENCODING
*
enc
,
const
char
*
p
)
#define UTF8_INVALID3(p) \
(((p)[2] & 0x80) == 0 \
|| \
((*p) == 0xEF && (p)[1] == 0xBF \
? \
(p)[2] > 0xBD \
: \
((p)[2] & 0xC0) == 0xC0) \
|| \
((*p) == 0xE0 \
? \
(p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
#define UTF8_INVALID4(p) \
(((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
|| \
((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
|| \
((*p) == 0xF0 \
? \
(p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
: \
((p)[1] & 0x80) == 0 \
|| \
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static
int
PTRFASTCALL
isNever
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
0
;
}
static
int
utf8_isName2
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isName2
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_GET_NAMING2
(
namePages
,
(
const
unsigned
char
*
)
p
);
}
static
int
utf8_isName3
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isName3
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_GET_NAMING3
(
namePages
,
(
const
unsigned
char
*
)
p
);
}
#define utf8_isName4 isNever
static
int
utf8_isNmstrt2
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isNmstrt2
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_GET_NAMING2
(
nmstrtPages
,
(
const
unsigned
char
*
)
p
);
}
static
int
utf8_isNmstrt3
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isNmstrt3
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_GET_NAMING3
(
nmstrtPages
,
(
const
unsigned
char
*
)
p
);
}
#define utf8_isNmstrt4 isNever
#define utf8_isInvalid2 isNever
static
int
PTRFASTCALL
utf8_isInvalid2
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_INVALID2
((
const
unsigned
char
*
)
p
);
}
static
int
utf8_isInvalid3
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isInvalid3
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_INVALID3
((
const
unsigned
char
*
)
p
);
}
static
int
utf8_isInvalid4
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
utf8_isInvalid4
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
UTF8_INVALID4
((
const
unsigned
char
*
)
p
);
}
...
...
@@ -129,23 +170,25 @@ struct normal_encoding {
ENCODING
enc
;
unsigned
char
type
[
256
];
#ifdef XML_MIN_SIZE
int
(
*
byteType
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isNameMin
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isNmstrtMin
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
byteToAscii
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
charMatches
)(
const
ENCODING
*
,
const
char
*
,
int
);
int
(
PTRFASTCALL
*
byteType
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isNameMin
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isNmstrtMin
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
byteToAscii
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRCALL
*
charMatches
)(
const
ENCODING
*
,
const
char
*
,
int
);
#endif
/* XML_MIN_SIZE */
int
(
*
isName2
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isName3
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isName4
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isNmstrt2
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isNmstrt3
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isNmstrt4
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isInvalid2
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isInvalid3
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
isInvalid4
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isName2
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isName3
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isName4
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isNmstrt2
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isNmstrt3
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isNmstrt4
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isInvalid2
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isInvalid3
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRFASTCALL
*
isInvalid4
)(
const
ENCODING
*
,
const
char
*
);
};
#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
#ifdef XML_MIN_SIZE
#define STANDARD_VTABLE(E) \
...
...
@@ -172,7 +215,7 @@ struct normal_encoding {
E ## isInvalid3, \
E ## isInvalid4
static
int
checkCharRefNumber
(
int
);
static
int
FASTCALL
checkCharRefNumber
(
int
);
#include "xmltok_impl.h"
#include "ascii.h"
...
...
@@ -193,22 +236,22 @@ static int checkCharRefNumber(int);
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static
int
sb_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
sb_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
SB_BYTE_TYPE
(
enc
,
p
);
}
#define BYTE_TYPE(enc, p) \
(
((const struct normal_encoding *)(enc)
)->byteType(enc, p))
(
AS_NORMAL_ENCODING(enc
)->byteType(enc, p))
#else
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
#endif
#ifdef XML_MIN_SIZE
#define BYTE_TO_ASCII(enc, p) \
(
((const struct normal_encoding *)(enc)
)->byteToAscii(enc, p))
static
int
sb_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
(
AS_NORMAL_ENCODING(enc
)->byteToAscii(enc, p))
static
int
PTRFASTCALL
sb_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
*
p
;
}
...
...
@@ -217,17 +260,17 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#endif
#define IS_NAME_CHAR(enc, p, n) \
(
((const struct normal_encoding *)(enc)
)->isName ## n(enc, p))
(
AS_NORMAL_ENCODING(enc
)->isName ## n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) \
(
((const struct normal_encoding *)(enc)
)->isNmstrt ## n(enc, p))
(
AS_NORMAL_ENCODING(enc
)->isNmstrt ## n(enc, p))
#define IS_INVALID_CHAR(enc, p, n) \
(
((const struct normal_encoding *)(enc)
)->isInvalid ## n(enc, p))
(
AS_NORMAL_ENCODING(enc
)->isInvalid ## n(enc, p))
#ifdef XML_MIN_SIZE
#define IS_NAME_CHAR_MINBPC(enc, p) \
(
((const struct normal_encoding *)(enc)
)->isNameMin(enc, p))
(
AS_NORMAL_ENCODING(enc
)->isNameMin(enc, p))
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
(
((const struct normal_encoding *)(enc)
)->isNmstrtMin(enc, p))
(
AS_NORMAL_ENCODING(enc
)->isNmstrtMin(enc, p))
#else
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
...
...
@@ -235,9 +278,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p)
#ifdef XML_MIN_SIZE
#define CHAR_MATCHES(enc, p, c) \
(
((const struct normal_encoding *)(enc)
)->charMatches(enc, p, c))
static
int
sb_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
(
AS_NORMAL_ENCODING(enc
)->charMatches(enc, p, c))
static
int
PTRCALL
sb_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
{
return
*
p
==
c
;
}
...
...
@@ -266,10 +309,10 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4
=
0xf0
};
static
void
utf8_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
static
void
PTRCALL
utf8_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
{
char
*
to
;
const
char
*
from
;
...
...
@@ -277,7 +320,7 @@ void utf8_toUtf8(const ENCODING *enc,
/* Avoid copying partial characters. */
for
(
fromLim
=
*
fromP
+
(
toLim
-
*
toP
);
fromLim
>
*
fromP
;
fromLim
--
)
if
(((
unsigned
char
)
fromLim
[
-
1
]
&
0xc0
)
!=
0x80
)
break
;
break
;
}
for
(
to
=
*
toP
,
from
=
*
fromP
;
from
!=
fromLim
;
from
++
,
to
++
)
*
to
=
*
from
;
...
...
@@ -285,34 +328,36 @@ void utf8_toUtf8(const ENCODING *enc,
*
toP
=
to
;
}
static
void
utf8_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
static
void
PTRCALL
utf8_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
{
unsigned
short
*
to
=
*
toP
;
const
char
*
from
=
*
fromP
;
while
(
from
!=
fromLim
&&
to
!=
toLim
)
{
switch
(((
struct
normal_encoding
*
)
enc
)
->
type
[(
unsigned
char
)
*
from
])
{
case
BT_LEAD2
:
*
to
++
=
(
(
from
[
0
]
&
0x1f
)
<<
6
)
|
(
from
[
1
]
&
0x3f
);
*
to
++
=
(
unsigned
short
)(((
from
[
0
]
&
0x1f
)
<<
6
)
|
(
from
[
1
]
&
0x3f
)
);
from
+=
2
;
break
;
case
BT_LEAD3
:
*
to
++
=
((
from
[
0
]
&
0xf
)
<<
12
)
|
((
from
[
1
]
&
0x3f
)
<<
6
)
|
(
from
[
2
]
&
0x3f
);
*
to
++
=
(
unsigned
short
)(((
from
[
0
]
&
0xf
)
<<
12
)
|
((
from
[
1
]
&
0x3f
)
<<
6
)
|
(
from
[
2
]
&
0x3f
));
from
+=
3
;
break
;
case
BT_LEAD4
:
{
unsigned
long
n
;
if
(
to
+
1
==
toLim
)
break
;
n
=
((
from
[
0
]
&
0x7
)
<<
18
)
|
((
from
[
1
]
&
0x3f
)
<<
12
)
|
((
from
[
2
]
&
0x3f
)
<<
6
)
|
(
from
[
3
]
&
0x3f
);
n
-=
0x10000
;
to
[
0
]
=
(
unsigned
short
)((
n
>>
10
)
|
0xD800
);
to
[
1
]
=
(
unsigned
short
)((
n
&
0x3FF
)
|
0xDC00
);
to
+=
2
;
from
+=
4
;
unsigned
long
n
;
if
(
to
+
1
==
toLim
)
goto
after
;
n
=
((
from
[
0
]
&
0x7
)
<<
18
)
|
((
from
[
1
]
&
0x3f
)
<<
12
)
|
((
from
[
2
]
&
0x3f
)
<<
6
)
|
(
from
[
3
]
&
0x3f
);
n
-=
0x10000
;
to
[
0
]
=
(
unsigned
short
)((
n
>>
10
)
|
0xD800
);
to
[
1
]
=
(
unsigned
short
)((
n
&
0x3FF
)
|
0xDC00
);
to
+=
2
;
from
+=
4
;
}
break
;
default:
...
...
@@ -320,6 +365,7 @@ void utf8_toUtf16(const ENCODING *enc,
break
;
}
}
after:
*
fromP
=
from
;
*
toP
=
to
;
}
...
...
@@ -370,10 +416,10 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE
(
sb_
)
NORMAL_VTABLE
(
utf8_
)
};
static
void
latin1_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
static
void
PTRCALL
latin1_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
{
for
(;;)
{
unsigned
char
c
;
...
...
@@ -382,23 +428,23 @@ void latin1_toUtf8(const ENCODING *enc,
c
=
(
unsigned
char
)
**
fromP
;
if
(
c
&
0x80
)
{
if
(
toLim
-
*
toP
<
2
)
break
;
*
(
*
toP
)
++
=
((
c
>>
6
)
|
UTF8_cval2
);
*
(
*
toP
)
++
=
((
c
&
0x3f
)
|
0x80
);
break
;
*
(
*
toP
)
++
=
(
char
)(
(
c
>>
6
)
|
UTF8_cval2
);
*
(
*
toP
)
++
=
(
char
)(
(
c
&
0x3f
)
|
0x80
);
(
*
fromP
)
++
;
}
else
{
if
(
*
toP
==
toLim
)
break
;
break
;
*
(
*
toP
)
++
=
*
(
*
fromP
)
++
;
}
}
}
static
void
latin1_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
static
void
PTRCALL
latin1_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
{
while
(
*
fromP
!=
fromLim
&&
*
toP
!=
toLim
)
*
(
*
toP
)
++
=
(
unsigned
char
)
*
(
*
fromP
)
++
;
...
...
@@ -428,10 +474,10 @@ static const struct normal_encoding latin1_encoding = {
STANDARD_VTABLE
(
sb_
)
};
static
void
ascii_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
static
void
PTRCALL
ascii_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
{
while
(
*
fromP
!=
fromLim
&&
*
toP
!=
toLim
)
*
(
*
toP
)
++
=
*
(
*
fromP
)
++
;
...
...
@@ -461,7 +507,8 @@ static const struct normal_encoding ascii_encoding = {
STANDARD_VTABLE
(
sb_
)
};
static
int
unicode_byte_type
(
char
hi
,
char
lo
)
static
int
PTRFASTCALL
unicode_byte_type
(
char
hi
,
char
lo
)
{
switch
((
unsigned
char
)
hi
)
{
case
0xD8
:
case
0xD9
:
case
0xDA
:
case
0xDB
:
...
...
@@ -480,10 +527,10 @@ static int unicode_byte_type(char hi, char lo)
}
#define DEFINE_UTF16_TO_UTF8(E) \
static \
void
E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
static
void PTRCALL
\
E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
const char *from; \
for (from = *fromP; from != fromLim; from += 2) { \
...
...
@@ -496,7 +543,7 @@ void E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
return; \
return; \
} \
*(*toP)++ = lo; \
break; \
...
...
@@ -506,7 +553,7 @@ void E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
return; \
return; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
...
...
@@ -514,7 +561,7 @@ void E ## toUtf8(const ENCODING *enc, \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
return; \
return; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */
\
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
...
...
@@ -523,8 +570,8 @@ void E ## toUtf8(const ENCODING *enc, \
break; \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
return; \
*fromP = from; \
return; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
...
...
@@ -532,9 +579,9 @@ void E ## toUtf8(const ENCODING *enc, \
from += 2; \
lo2 = GET_LO(from); \
*(*toP)++ = (((lo & 0x3) << 4) \
| ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) \
| 0x80); \
| ((GET_HI(from) & 0x3) << 2) \
| (lo2 >> 6) \
| 0x80); \
*(*toP)++ = ((lo2 & 0x3f) | 0x80); \
break; \
} \
...
...
@@ -543,10 +590,10 @@ void E ## toUtf8(const ENCODING *enc, \
}
#define DEFINE_UTF16_TO_UTF16(E) \
static \
void
E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
static
void PTRCALL
\
E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
/* Avoid copying first half only of surrogate */
\
if (fromLim - *fromP > ((toLim - *toP) << 1) \
...
...
@@ -593,32 +640,32 @@ DEFINE_UTF16_TO_UTF16(big2_)
#ifdef XML_MIN_SIZE
static
int
little2_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
little2_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
LITTLE2_BYTE_TYPE
(
enc
,
p
);
}
static
int
little2_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
little2_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
LITTLE2_BYTE_TO_ASCII
(
enc
,
p
);
}
static
int
little2_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
static
int
PTRCALL
little2_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
{
return
LITTLE2_CHAR_MATCHES
(
enc
,
p
,
c
);
}
static
int
little2_isNameMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
little2_isNameMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
LITTLE2_IS_NAME_CHAR_MINBPC
(
enc
,
p
);
}
static
int
little2_isNmstrtMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
little2_isNmstrtMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
LITTLE2_IS_NMSTRT_CHAR_MINBPC
(
enc
,
p
);
}
...
...
@@ -633,7 +680,7 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p)
#define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
...
...
@@ -656,9 +703,9 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p)
#ifdef XML_NS
static
const
struct
normal_encoding
little2_encoding_ns
=
{
static
const
struct
normal_encoding
little2_encoding_ns
=
{
{
VTABLE
,
2
,
0
,
#if
XML_BYTE_ORDER == 12
#if
BYTEORDER == 1234
1
#else
0
...
...
@@ -673,9 +720,9 @@ static const struct normal_encoding little2_encoding_ns = {
#endif
static
const
struct
normal_encoding
little2_encoding
=
{
static
const
struct
normal_encoding
little2_encoding
=
{
{
VTABLE
,
2
,
0
,
#if
XML_BYTE_ORDER == 12
#if
BYTEORDER == 1234
1
#else
0
...
...
@@ -690,11 +737,11 @@ static const struct normal_encoding little2_encoding = {
STANDARD_VTABLE
(
little2_
)
};
#if
XML_BYTE_ORDER !=
21
#if
BYTEORDER != 43
21
#ifdef XML_NS
static
const
struct
normal_encoding
internal_little2_encoding_ns
=
{
static
const
struct
normal_encoding
internal_little2_encoding_ns
=
{
{
VTABLE
,
2
,
0
,
1
},
{
#include "iasciitab.h"
...
...
@@ -705,7 +752,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#endif
static
const
struct
normal_encoding
internal_little2_encoding
=
{
static
const
struct
normal_encoding
internal_little2_encoding
=
{
{
VTABLE
,
2
,
0
,
1
},
{
#define BT_COLON BT_NMSTRT
...
...
@@ -732,32 +779,32 @@ static const struct normal_encoding internal_little2_encoding = {
#ifdef XML_MIN_SIZE
static
int
big2_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
big2_byteType
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
BIG2_BYTE_TYPE
(
enc
,
p
);
}
static
int
big2_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
big2_byteToAscii
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
BIG2_BYTE_TO_ASCII
(
enc
,
p
);
}
static
int
big2_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
static
int
PTRCALL
big2_charMatches
(
const
ENCODING
*
enc
,
const
char
*
p
,
int
c
)
{
return
BIG2_CHAR_MATCHES
(
enc
,
p
,
c
);
}
static
int
big2_isNameMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
big2_isNameMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
BIG2_IS_NAME_CHAR_MINBPC
(
enc
,
p
);
}
static
int
big2_isNmstrtMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
big2_isNmstrtMin
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
return
BIG2_IS_NMSTRT_CHAR_MINBPC
(
enc
,
p
);
}
...
...
@@ -772,7 +819,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p)
#define MINBPC(enc) 2
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR(enc, p, n) 0
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
...
...
@@ -797,7 +844,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p)
static
const
struct
normal_encoding
big2_encoding_ns
=
{
{
VTABLE
,
2
,
0
,
#if
XML_BYTE_ORDER ==
21
#if
BYTEORDER == 43
21
1
#else
0
...
...
@@ -814,7 +861,7 @@ static const struct normal_encoding big2_encoding_ns = {
static
const
struct
normal_encoding
big2_encoding
=
{
{
VTABLE
,
2
,
0
,
#if
XML_BYTE_ORDER ==
21
#if
BYTEORDER == 43
21
1
#else
0
...
...
@@ -829,7 +876,7 @@ static const struct normal_encoding big2_encoding = {
STANDARD_VTABLE
(
big2_
)
};
#if
XML_BYTE_ORDER != 12
#if
BYTEORDER != 1234
#ifdef XML_NS
...
...
@@ -859,8 +906,8 @@ static const struct normal_encoding internal_big2_encoding = {
#undef PREFIX
static
int
streqci
(
const
char
*
s1
,
const
char
*
s2
)
static
int
FASTCALL
streqci
(
const
char
*
s1
,
const
char
*
s2
)
{
for
(;;)
{
char
c1
=
*
s1
++
;
...
...
@@ -877,15 +924,15 @@ int streqci(const char *s1, const char *s2)
return
1
;
}
static
void
initUpdatePosition
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
pos
)
static
void
PTRCALL
initUpdatePosition
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
pos
)
{
normal_updatePosition
(
&
utf8_encoding
.
enc
,
ptr
,
end
,
pos
);
}
static
int
toAscii
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
static
int
toAscii
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
{
char
buf
[
1
];
char
*
p
=
buf
;
...
...
@@ -896,34 +943,35 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end)
return
buf
[
0
];
}
static
i
nt
i
sSpace
(
int
c
)
static
int
FASTCALL
isSpace
(
int
c
)
{
switch
(
c
)
{
case
0x20
:
case
0xD
:
case
0xA
:
case
0x9
:
case
0x9
:
return
1
;
}
return
0
;
}
/* Return 1 if there's just optional white space
or there's an S followed by name=val. */
static
int
parsePseudoAttribute
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
namePtr
,
const
char
**
nameEndPtr
,
const
char
**
valPtr
,
const
char
**
nextTokPtr
)
/* Return 1 if there's just optional white space or there's an S
followed by name=val.
*/
static
int
parsePseudoAttribute
(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
namePtr
,
const
char
**
nameEndPtr
,
const
char
**
valPtr
,
const
char
**
nextTokPtr
)
{
int
c
;
char
open
;
if
(
ptr
==
end
)
{
*
namePtr
=
0
;
*
namePtr
=
NULL
;
return
1
;
}
if
(
!
isSpace
(
toAscii
(
enc
,
ptr
,
end
)))
{
...
...
@@ -934,7 +982,7 @@ int parsePseudoAttribute(const ENCODING *enc,
ptr
+=
enc
->
minBytesPerChar
;
}
while
(
isSpace
(
toAscii
(
enc
,
ptr
,
end
)));
if
(
ptr
==
end
)
{
*
namePtr
=
0
;
*
namePtr
=
NULL
;
return
1
;
}
*
namePtr
=
ptr
;
...
...
@@ -951,11 +999,11 @@ int parsePseudoAttribute(const ENCODING *enc,
if
(
isSpace
(
c
))
{
*
nameEndPtr
=
ptr
;
do
{
ptr
+=
enc
->
minBytesPerChar
;
ptr
+=
enc
->
minBytesPerChar
;
}
while
(
isSpace
(
c
=
toAscii
(
enc
,
ptr
,
end
)));
if
(
c
!=
ASCII_EQUALS
)
{
*
nextTokPtr
=
ptr
;
return
0
;
*
nextTokPtr
=
ptr
;
return
0
;
}
break
;
}
...
...
@@ -975,7 +1023,7 @@ int parsePseudoAttribute(const ENCODING *enc,
*
nextTokPtr
=
ptr
;
return
0
;
}
open
=
c
;
open
=
(
char
)
c
;
ptr
+=
enc
->
minBytesPerChar
;
*
valPtr
=
ptr
;
for
(;;
ptr
+=
enc
->
minBytesPerChar
)
{
...
...
@@ -983,11 +1031,11 @@ int parsePseudoAttribute(const ENCODING *enc,
if
(
c
==
open
)
break
;
if
(
!
(
ASCII_a
<=
c
&&
c
<=
ASCII_z
)
&&
!
(
ASCII_A
<=
c
&&
c
<=
ASCII_Z
)
&&
!
(
ASCII_0
<=
c
&&
c
<=
ASCII_9
)
&&
c
!=
ASCII_PERIOD
&&
c
!=
ASCII_MINUS
&&
c
!=
ASCII_UNDERSCORE
)
{
&&
!
(
ASCII_A
<=
c
&&
c
<=
ASCII_Z
)
&&
!
(
ASCII_0
<=
c
&&
c
<=
ASCII_9
)
&&
c
!=
ASCII_PERIOD
&&
c
!=
ASCII_MINUS
&&
c
!=
ASCII_UNDERSCORE
)
{
*
nextTokPtr
=
ptr
;
return
0
;
}
...
...
@@ -1005,7 +1053,8 @@ static const char KW_encoding[] = {
};
static
const
char
KW_standalone
[]
=
{
ASCII_s
,
ASCII_t
,
ASCII_a
,
ASCII_n
,
ASCII_d
,
ASCII_a
,
ASCII_l
,
ASCII_o
,
ASCII_n
,
ASCII_e
,
'\0'
ASCII_s
,
ASCII_t
,
ASCII_a
,
ASCII_n
,
ASCII_d
,
ASCII_a
,
ASCII_l
,
ASCII_o
,
ASCII_n
,
ASCII_e
,
'\0'
};
static
const
char
KW_yes
[]
=
{
...
...
@@ -1016,27 +1065,28 @@ static const char KW_no[] = {
ASCII_n
,
ASCII_o
,
'\0'
};
static
int
doParseXmlDecl
(
const
ENCODING
*
(
*
encodingFinder
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
),
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingName
,
const
ENCODING
**
encoding
,
int
*
standalone
)
static
int
doParseXmlDecl
(
const
ENCODING
*
(
*
encodingFinder
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
),
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingName
,
const
ENCODING
**
encoding
,
int
*
standalone
)
{
const
char
*
val
=
0
;
const
char
*
name
=
0
;
const
char
*
nameEnd
=
0
;
const
char
*
val
=
NULL
;
const
char
*
name
=
NULL
;
const
char
*
nameEnd
=
NULL
;
ptr
+=
5
*
enc
->
minBytesPerChar
;
end
-=
2
*
enc
->
minBytesPerChar
;
if
(
!
parsePseudoAttribute
(
enc
,
ptr
,
end
,
&
name
,
&
nameEnd
,
&
val
,
&
ptr
)
||
!
name
)
{
if
(
!
parsePseudoAttribute
(
enc
,
ptr
,
end
,
&
name
,
&
nameEnd
,
&
val
,
&
ptr
)
||
!
name
)
{
*
badPtr
=
ptr
;
return
0
;
}
...
...
@@ -1057,9 +1107,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
}
if
(
!
name
)
{
if
(
isGeneralTextEntity
)
{
/* a TextDecl must have an EncodingDecl */
*
badPtr
=
ptr
;
return
0
;
/* a TextDecl must have an EncodingDecl */
*
badPtr
=
ptr
;
return
0
;
}
return
1
;
}
...
...
@@ -1081,7 +1131,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
if
(
!
name
)
return
1
;
}
if
(
!
XmlNameMatchesAscii
(
enc
,
name
,
nameEnd
,
KW_standalone
)
||
isGeneralTextEntity
)
{
if
(
!
XmlNameMatchesAscii
(
enc
,
name
,
nameEnd
,
KW_standalone
)
||
isGeneralTextEntity
)
{
*
badPtr
=
name
;
return
0
;
}
...
...
@@ -1106,8 +1157,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
return
1
;
}
static
int
checkCharRefNumber
(
int
result
)
static
int
FASTCALL
checkCharRefNumber
(
int
result
)
{
switch
(
result
>>
8
)
{
case
0xD8
:
case
0xD9
:
case
0xDA
:
case
0xDB
:
...
...
@@ -1125,7 +1176,8 @@ int checkCharRefNumber(int result)
return
result
;
}
int
XmlUtf8Encode
(
int
c
,
char
*
buf
)
int
FASTCALL
XmlUtf8Encode
(
int
c
,
char
*
buf
)
{
enum
{
/* minN is minimum legal resulting value for N byte sequence */
...
...
@@ -1137,42 +1189,43 @@ int XmlUtf8Encode(int c, char *buf)
if
(
c
<
0
)
return
0
;
if
(
c
<
min2
)
{
buf
[
0
]
=
(
c
|
UTF8_cval1
);
buf
[
0
]
=
(
c
har
)(
c
|
UTF8_cval1
);
return
1
;
}
if
(
c
<
min3
)
{
buf
[
0
]
=
((
c
>>
6
)
|
UTF8_cval2
);
buf
[
1
]
=
((
c
&
0x3f
)
|
0x80
);
buf
[
0
]
=
(
char
)(
(
c
>>
6
)
|
UTF8_cval2
);
buf
[
1
]
=
(
char
)(
(
c
&
0x3f
)
|
0x80
);
return
2
;
}
if
(
c
<
min4
)
{
buf
[
0
]
=
((
c
>>
12
)
|
UTF8_cval3
);
buf
[
1
]
=
(((
c
>>
6
)
&
0x3f
)
|
0x80
);
buf
[
2
]
=
((
c
&
0x3f
)
|
0x80
);
buf
[
0
]
=
(
char
)(
(
c
>>
12
)
|
UTF8_cval3
);
buf
[
1
]
=
(
char
)(
((
c
>>
6
)
&
0x3f
)
|
0x80
);
buf
[
2
]
=
(
char
)(
(
c
&
0x3f
)
|
0x80
);
return
3
;
}
if
(
c
<
0x110000
)
{
buf
[
0
]
=
((
c
>>
18
)
|
UTF8_cval4
);
buf
[
1
]
=
(((
c
>>
12
)
&
0x3f
)
|
0x80
);
buf
[
2
]
=
(((
c
>>
6
)
&
0x3f
)
|
0x80
);
buf
[
3
]
=
((
c
&
0x3f
)
|
0x80
);
buf
[
0
]
=
(
char
)(
(
c
>>
18
)
|
UTF8_cval4
);
buf
[
1
]
=
(
char
)(
((
c
>>
12
)
&
0x3f
)
|
0x80
);
buf
[
2
]
=
(
char
)(
((
c
>>
6
)
&
0x3f
)
|
0x80
);
buf
[
3
]
=
(
char
)(
(
c
&
0x3f
)
|
0x80
);
return
4
;
}
return
0
;
}
int
XmlUtf16Encode
(
int
charNum
,
unsigned
short
*
buf
)
int
FASTCALL
XmlUtf16Encode
(
int
charNum
,
unsigned
short
*
buf
)
{
if
(
charNum
<
0
)
return
0
;
if
(
charNum
<
0x10000
)
{
buf
[
0
]
=
charNum
;
buf
[
0
]
=
(
unsigned
short
)
charNum
;
return
1
;
}
if
(
charNum
<
0x110000
)
{
charNum
-=
0x10000
;
buf
[
0
]
=
(
charNum
>>
10
)
+
0xD800
;
buf
[
1
]
=
(
charNum
&
0x3FF
)
+
0xDC00
;
buf
[
0
]
=
(
unsigned
short
)((
charNum
>>
10
)
+
0xD800
)
;
buf
[
1
]
=
(
unsigned
short
)((
charNum
&
0x3FF
)
+
0xDC00
)
;
return
2
;
}
return
0
;
...
...
@@ -1186,65 +1239,68 @@ struct unknown_encoding {
char
utf8
[
256
][
4
];
};
int
XmlSizeOfUnknownEncoding
(
void
)
#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
int
XmlSizeOfUnknownEncoding
(
void
)
{
return
sizeof
(
struct
unknown_encoding
);
}
static
int
unknown_isName
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
unknown_isName
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
int
c
=
((
const
struct
unknown_encoding
*
)
enc
)
->
convert
(((
const
struct
unknown_encoding
*
)
enc
)
->
userData
,
p
);
const
struct
unknown_encoding
*
uenc
=
AS_UNKNOWN_ENCODING
(
enc
);
int
c
=
uenc
->
convert
(
uenc
->
userData
,
p
);
if
(
c
&
~
0xFFFF
)
return
0
;
return
UCS2_GET_NAMING
(
namePages
,
c
>>
8
,
c
&
0xFF
);
}
static
int
unknown_isNmstrt
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
unknown_isNmstrt
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
int
c
=
((
const
struct
unknown_encoding
*
)
enc
)
->
convert
(((
const
struct
unknown_encoding
*
)
enc
)
->
userData
,
p
);
const
struct
unknown_encoding
*
uenc
=
AS_UNKNOWN_ENCODING
(
enc
);
int
c
=
uenc
->
convert
(
uenc
->
userData
,
p
);
if
(
c
&
~
0xFFFF
)
return
0
;
return
UCS2_GET_NAMING
(
nmstrtPages
,
c
>>
8
,
c
&
0xFF
);
}
static
int
unknown_isInvalid
(
const
ENCODING
*
enc
,
const
char
*
p
)
static
int
PTRFASTCALL
unknown_isInvalid
(
const
ENCODING
*
enc
,
const
char
*
p
)
{
int
c
=
((
const
struct
unknown_encoding
*
)
enc
)
->
convert
(((
const
struct
unknown_encoding
*
)
enc
)
->
userData
,
p
);
const
struct
unknown_encoding
*
uenc
=
AS_UNKNOWN_ENCODING
(
enc
);
int
c
=
uenc
->
convert
(
uenc
->
userData
,
p
);
return
(
c
&
~
0xFFFF
)
||
checkCharRefNumber
(
c
)
<
0
;
}
static
void
unknown_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
static
void
PTRCALL
unknown_toUtf8
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
)
{
const
struct
unknown_encoding
*
uenc
=
AS_UNKNOWN_ENCODING
(
enc
);
char
buf
[
XML_UTF8_ENCODE_MAX
];
for
(;;)
{
const
char
*
utf8
;
int
n
;
if
(
*
fromP
==
fromLim
)
break
;
utf8
=
((
const
struct
unknown_encoding
*
)
enc
)
->
utf8
[(
unsigned
char
)
**
fromP
];
utf8
=
uenc
->
utf8
[(
unsigned
char
)
**
fromP
];
n
=
*
utf8
++
;
if
(
n
==
0
)
{
int
c
=
((
const
struct
unknown_encoding
*
)
enc
)
->
convert
(((
const
struct
unknown_encoding
*
)
enc
)
->
userData
,
*
fromP
);
int
c
=
uenc
->
convert
(
uenc
->
userData
,
*
fromP
);
n
=
XmlUtf8Encode
(
c
,
buf
);
if
(
n
>
toLim
-
*
toP
)
break
;
break
;
utf8
=
buf
;
*
fromP
+=
(
(
const
struct
normal_encoding
*
)
enc
)
->
type
[(
unsigned
char
)
**
fromP
]
-
(
BT_LEAD2
-
2
);
*
fromP
+=
(
AS_NORMAL_ENCODING
(
enc
)
->
type
[(
unsigned
char
)
**
fromP
]
-
(
BT_LEAD2
-
2
)
);
}
else
{
if
(
n
>
toLim
-
*
toP
)
break
;
break
;
(
*
fromP
)
++
;
}
do
{
...
...
@@ -1253,19 +1309,19 @@ void unknown_toUtf8(const ENCODING *enc,
}
}
static
void
unknown_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
static
void
PTRCALL
unknown_toUtf16
(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
)
{
const
struct
unknown_encoding
*
uenc
=
AS_UNKNOWN_ENCODING
(
enc
);
while
(
*
fromP
!=
fromLim
&&
*
toP
!=
toLim
)
{
unsigned
short
c
=
((
const
struct
unknown_encoding
*
)
enc
)
->
utf16
[(
unsigned
char
)
**
fromP
];
unsigned
short
c
=
uenc
->
utf16
[(
unsigned
char
)
**
fromP
];
if
(
c
==
0
)
{
c
=
(
unsigned
short
)
((
const
struct
unknown_encoding
*
)
enc
)
->
convert
(((
const
struct
unknown_encoding
*
)
enc
)
->
userData
,
*
fromP
);
*
fromP
+=
(
(
const
struct
normal_encoding
*
)
enc
)
->
type
[(
unsigned
char
)
**
fromP
]
-
(
BT_LEAD2
-
2
);
c
=
(
unsigned
short
)
uenc
->
convert
(
uenc
->
userData
,
*
fromP
);
*
fromP
+=
(
AS_NORMAL_ENCODING
(
enc
)
->
type
[(
unsigned
char
)
**
fromP
]
-
(
BT_LEAD2
-
2
)
);
}
else
(
*
fromP
)
++
;
...
...
@@ -1275,18 +1331,18 @@ void unknown_toUtf16(const ENCODING *enc,
ENCODING
*
XmlInitUnknownEncoding
(
void
*
mem
,
int
*
table
,
int
(
*
convert
)(
void
*
userData
,
const
char
*
p
),
void
*
userData
)
int
*
table
,
CONVERTER
convert
,
void
*
userData
)
{
int
i
;
struct
unknown_encoding
*
e
=
mem
;
struct
unknown_encoding
*
e
=
(
struct
unknown_encoding
*
)
mem
;
for
(
i
=
0
;
i
<
(
int
)
sizeof
(
struct
normal_encoding
);
i
++
)
((
char
*
)
mem
)[
i
]
=
((
char
*
)
&
latin1_encoding
)[
i
];
for
(
i
=
0
;
i
<
128
;
i
++
)
if
(
latin1_encoding
.
type
[
i
]
!=
BT_OTHER
&&
latin1_encoding
.
type
[
i
]
!=
BT_NONXML
&&
table
[
i
]
!=
i
)
&&
table
[
i
]
!=
i
)
return
0
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
int
c
=
table
[
i
];
...
...
@@ -1299,20 +1355,20 @@ XmlInitUnknownEncoding(void *mem,
}
else
if
(
c
<
0
)
{
if
(
c
<
-
4
)
return
0
;
e
->
normal
.
type
[
i
]
=
BT_LEAD2
-
(
c
+
2
);
return
0
;
e
->
normal
.
type
[
i
]
=
(
unsigned
char
)(
BT_LEAD2
-
(
c
+
2
)
);
e
->
utf8
[
i
][
0
]
=
0
;
e
->
utf16
[
i
]
=
0
;
}
else
if
(
c
<
0x80
)
{
if
(
latin1_encoding
.
type
[
c
]
!=
BT_OTHER
&&
latin1_encoding
.
type
[
c
]
!=
BT_NONXML
&&
c
!=
i
)
return
0
;
&&
latin1_encoding
.
type
[
c
]
!=
BT_NONXML
&&
c
!=
i
)
return
0
;
e
->
normal
.
type
[
i
]
=
latin1_encoding
.
type
[
c
];
e
->
utf8
[
i
][
0
]
=
1
;
e
->
utf8
[
i
][
1
]
=
(
char
)
c
;
e
->
utf16
[
i
]
=
c
==
0
?
0xFFFF
:
c
;
e
->
utf16
[
i
]
=
(
unsigned
short
)(
c
==
0
?
0xFFFF
:
c
)
;
}
else
if
(
checkCharRefNumber
(
c
)
<
0
)
{
e
->
normal
.
type
[
i
]
=
BT_NONXML
;
...
...
@@ -1323,15 +1379,15 @@ XmlInitUnknownEncoding(void *mem,
}
else
{
if
(
c
>
0xFFFF
)
return
0
;
return
0
;
if
(
UCS2_GET_NAMING
(
nmstrtPages
,
c
>>
8
,
c
&
0xff
))
e
->
normal
.
type
[
i
]
=
BT_NMSTRT
;
e
->
normal
.
type
[
i
]
=
BT_NMSTRT
;
else
if
(
UCS2_GET_NAMING
(
namePages
,
c
>>
8
,
c
&
0xff
))
e
->
normal
.
type
[
i
]
=
BT_NAME
;
e
->
normal
.
type
[
i
]
=
BT_NAME
;
else
e
->
normal
.
type
[
i
]
=
BT_OTHER
;
e
->
normal
.
type
[
i
]
=
BT_OTHER
;
e
->
utf8
[
i
][
0
]
=
(
char
)
XmlUtf8Encode
(
c
,
e
->
utf8
[
i
]
+
1
);
e
->
utf16
[
i
]
=
c
;
e
->
utf16
[
i
]
=
(
unsigned
short
)
c
;
}
}
e
->
userData
=
userData
;
...
...
@@ -1367,26 +1423,30 @@ enum {
};
static
const
char
KW_ISO_8859_1
[]
=
{
ASCII_I
,
ASCII_S
,
ASCII_O
,
ASCII_MINUS
,
ASCII_8
,
ASCII_8
,
ASCII_5
,
ASCII_9
,
ASCII_MINUS
,
ASCII_1
,
'\0'
ASCII_I
,
ASCII_S
,
ASCII_O
,
ASCII_MINUS
,
ASCII_8
,
ASCII_8
,
ASCII_5
,
ASCII_9
,
ASCII_MINUS
,
ASCII_1
,
'\0'
};
static
const
char
KW_US_ASCII
[]
=
{
ASCII_U
,
ASCII_S
,
ASCII_MINUS
,
ASCII_A
,
ASCII_S
,
ASCII_C
,
ASCII_I
,
ASCII_I
,
'\0'
ASCII_U
,
ASCII_S
,
ASCII_MINUS
,
ASCII_A
,
ASCII_S
,
ASCII_C
,
ASCII_I
,
ASCII_I
,
'\0'
};
static
const
char
KW_UTF_8
[]
=
{
static
const
char
KW_UTF_8
[]
=
{
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_8
,
'\0'
};
static
const
char
KW_UTF_16
[]
=
{
static
const
char
KW_UTF_16
[]
=
{
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_1
,
ASCII_6
,
'\0'
};
static
const
char
KW_UTF_16BE
[]
=
{
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_1
,
ASCII_6
,
ASCII_B
,
ASCII_E
,
'\0'
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_1
,
ASCII_6
,
ASCII_B
,
ASCII_E
,
'\0'
};
static
const
char
KW_UTF_16LE
[]
=
{
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_1
,
ASCII_6
,
ASCII_L
,
ASCII_E
,
'\0'
ASCII_U
,
ASCII_T
,
ASCII_F
,
ASCII_MINUS
,
ASCII_1
,
ASCII_6
,
ASCII_L
,
ASCII_E
,
'\0'
};
static
int
getEncodingIndex
(
const
char
*
name
)
static
int
FASTCALL
getEncodingIndex
(
const
char
*
name
)
{
static
const
char
*
encodingNames
[]
=
{
KW_ISO_8859_1
,
...
...
@@ -1397,7 +1457,7 @@ int getEncodingIndex(const char *name)
KW_UTF_16LE
,
};
int
i
;
if
(
name
==
0
)
if
(
name
==
NULL
)
return
NO_ENC
;
for
(
i
=
0
;
i
<
(
int
)(
sizeof
(
encodingNames
)
/
sizeof
(
encodingNames
[
0
]));
i
++
)
if
(
streqci
(
name
,
encodingNames
[
i
]))
...
...
@@ -1405,27 +1465,28 @@ int getEncodingIndex(const char *name)
return
UNKNOWN_ENC
;
}
/* For binary compatibility, we store the index of the encoding specified
at initialization in the isUtf16 member. */
/* For binary compatibility, we store the index of the encoding
specified at initialization in the isUtf16 member.
*/
#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
/* This is what detects the encoding.
encodingTable maps from encoding indices to encodings;
INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
state is XML_CONTENT_STATE if we're parsing an external text entity,
and
XML_PROLOG_STATE otherwise.
/* This is what detects the encoding.
encodingTable maps from
encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
the external (protocol) specified encoding; state is
XML_CONTENT_STATE if we're parsing an external text entity, and
XML_PROLOG_STATE otherwise.
*/
static
in
t
in
itScan
(
const
ENCODING
**
encodingTable
,
const
INIT_ENCODING
*
enc
,
int
state
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
initScan
(
const
ENCODING
**
encodingTable
,
const
INIT_ENCODING
*
enc
,
int
state
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
const
ENCODING
**
encPtr
;
...
...
@@ -1452,8 +1513,8 @@ int initScan(const ENCODING **encodingTable,
case
0xFF
:
case
0xEF
:
/* possibly first byte of UTF-8 BOM */
if
(
INIT_ENC_INDEX
(
enc
)
==
ISO_8859_1_ENC
&&
state
==
XML_CONTENT_STATE
)
break
;
&&
state
==
XML_CONTENT_STATE
)
break
;
/* fall through */
case
0x00
:
case
0x3C
:
...
...
@@ -1464,23 +1525,23 @@ int initScan(const ENCODING **encodingTable,
switch
(((
unsigned
char
)
ptr
[
0
]
<<
8
)
|
(
unsigned
char
)
ptr
[
1
])
{
case
0xFEFF
:
if
(
INIT_ENC_INDEX
(
enc
)
==
ISO_8859_1_ENC
&&
state
==
XML_CONTENT_STATE
)
break
;
&&
state
==
XML_CONTENT_STATE
)
break
;
*
nextTokPtr
=
ptr
+
2
;
*
encPtr
=
encodingTable
[
UTF_16BE_ENC
];
return
XML_TOK_BOM
;
/* 00 3C is handled in the default case */
case
0x3C00
:
if
((
INIT_ENC_INDEX
(
enc
)
==
UTF_16BE_ENC
||
INIT_ENC_INDEX
(
enc
)
==
UTF_16_ENC
)
&&
state
==
XML_CONTENT_STATE
)
break
;
||
INIT_ENC_INDEX
(
enc
)
==
UTF_16_ENC
)
&&
state
==
XML_CONTENT_STATE
)
break
;
*
encPtr
=
encodingTable
[
UTF_16LE_ENC
];
return
XmlTok
(
*
encPtr
,
state
,
ptr
,
end
,
nextTokPtr
);
case
0xFFFE
:
if
(
INIT_ENC_INDEX
(
enc
)
==
ISO_8859_1_ENC
&&
state
==
XML_CONTENT_STATE
)
break
;
&&
state
==
XML_CONTENT_STATE
)
break
;
*
nextTokPtr
=
ptr
+
2
;
*
encPtr
=
encodingTable
[
UTF_16LE_ENC
];
return
XML_TOK_BOM
;
...
...
@@ -1489,45 +1550,50 @@ int initScan(const ENCODING **encodingTable,
/* If there's an explicitly specified (external) encoding
of ISO-8859-1 or some flavour of UTF-16
and this is an external text entity,
don't look for the BOM,
because it might be a legal data. */
don't look for the BOM,
because it might be a legal data.
*/
if
(
state
==
XML_CONTENT_STATE
)
{
int
e
=
INIT_ENC_INDEX
(
enc
);
if
(
e
==
ISO_8859_1_ENC
||
e
==
UTF_16BE_ENC
||
e
==
UTF_16LE_ENC
||
e
==
UTF_16_ENC
)
break
;
int
e
=
INIT_ENC_INDEX
(
enc
);
if
(
e
==
ISO_8859_1_ENC
||
e
==
UTF_16BE_ENC
||
e
==
UTF_16LE_ENC
||
e
==
UTF_16_ENC
)
break
;
}
if
(
ptr
+
2
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
((
unsigned
char
)
ptr
[
2
]
==
0xBF
)
{
*
nextTokPtr
=
ptr
+
3
;
*
encPtr
=
encodingTable
[
UTF_8_ENC
];
return
XML_TOK_BOM
;
*
nextTokPtr
=
ptr
+
3
;
*
encPtr
=
encodingTable
[
UTF_8_ENC
];
return
XML_TOK_BOM
;
}
break
;
default:
if
(
ptr
[
0
]
==
'\0'
)
{
/* 0 isn't a legal data character. Furthermore a document entity can only
start with ASCII characters. So the only way this can fail to be big-endian
UTF-16 if it it's an external parsed general entity that's labelled as
UTF-16LE. */
if
(
state
==
XML_CONTENT_STATE
&&
INIT_ENC_INDEX
(
enc
)
==
UTF_16LE_ENC
)
break
;
*
encPtr
=
encodingTable
[
UTF_16BE_ENC
];
return
XmlTok
(
*
encPtr
,
state
,
ptr
,
end
,
nextTokPtr
);
/* 0 isn't a legal data character. Furthermore a document
entity can only start with ASCII characters. So the only
way this can fail to be big-endian UTF-16 if it it's an
external parsed general entity that's labelled as
UTF-16LE.
*/
if
(
state
==
XML_CONTENT_STATE
&&
INIT_ENC_INDEX
(
enc
)
==
UTF_16LE_ENC
)
break
;
*
encPtr
=
encodingTable
[
UTF_16BE_ENC
];
return
XmlTok
(
*
encPtr
,
state
,
ptr
,
end
,
nextTokPtr
);
}
else
if
(
ptr
[
1
]
==
'\0'
)
{
/* We could recover here in the case:
- parsing an external entity
- second byte is 0
- no externally specified encoding
- no encoding declaration
by assuming UTF-16LE. But we don't, because this would mean when
presented just with a single byte, we couldn't reliably determine
whether we needed further bytes. */
if
(
state
==
XML_CONTENT_STATE
)
break
;
*
encPtr
=
encodingTable
[
UTF_16LE_ENC
];
return
XmlTok
(
*
encPtr
,
state
,
ptr
,
end
,
nextTokPtr
);
/* We could recover here in the case:
- parsing an external entity
- second byte is 0
- no externally specified encoding
- no encoding declaration
by assuming UTF-16LE. But we don't, because this would mean when
presented just with a single byte, we couldn't reliably determine
whether we needed further bytes.
*/
if
(
state
==
XML_CONTENT_STATE
)
break
;
*
encPtr
=
encodingTable
[
UTF_16LE_ENC
];
return
XmlTok
(
*
encPtr
,
state
,
ptr
,
end
,
nextTokPtr
);
}
break
;
}
...
...
@@ -1555,9 +1621,9 @@ int initScan(const ENCODING **encodingTable,
ENCODING
*
XmlInitUnknownEncodingNS
(
void
*
mem
,
int
*
table
,
int
(
*
convert
)(
void
*
userData
,
const
char
*
p
),
void
*
userData
)
int
*
table
,
CONVERTER
convert
,
void
*
userData
)
{
ENCODING
*
enc
=
XmlInitUnknownEncoding
(
mem
,
table
,
convert
,
userData
);
if
(
enc
)
...
...
Modules/expat/xmltok.h
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef XmlTok_INCLUDED
...
...
@@ -11,19 +10,21 @@ extern "C" {
#endif
/* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5
/* ] or ]] at the end of the scan; might be start of
illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
#define XML_TOK_NONE -4
/* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3
/* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2
/* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1
/* only part of a token */
#define XML_TOK_TRAILING_RSQB -5
/* ] or ]] at the end of the scan; might be
start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4
/* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3
/* A CR at the end of the scan;
might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2
/* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1
/* only part of a token */
#define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
*/
#define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3
/* empty element tag <e/> */
...
...
@@ -33,22 +34,24 @@ extern "C" {
#define XML_TOK_DATA_NEWLINE 7
#define XML_TOK_CDATA_SECT_OPEN 8
#define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10
/* numeric character reference */
#define XML_TOK_CHAR_REF 10
/* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
#define XML_TOK_PI 11
/* processing instruction */
#define XML_TOK_XML_DECL 12
/* XML decl or text decl */
/* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11
/* processing instruction */
#define XML_TOK_XML_DECL 12
/* XML decl or text decl */
#define XML_TOK_COMMENT 13
#define XML_TOK_BOM 14
/* Byte order mark */
#define XML_TOK_BOM 14
/* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16
/* <!foo */
#define XML_TOK_DECL_CLOSE 17
/* > */
#define XML_TOK_DECL_OPEN 16
/* <!foo */
#define XML_TOK_DECL_CLOSE 17
/* > */
#define XML_TOK_NAME 18
#define XML_TOK_NMTOKEN 19
#define XML_TOK_POUND_NAME 20
/* #name */
#define XML_TOK_OR 21
/* | */
#define XML_TOK_POUND_NAME 20
/* #name */
#define XML_TOK_OR 21
/* | */
#define XML_TOK_PERCENT 22
#define XML_TOK_OPEN_PAREN 23
#define XML_TOK_CLOSE_PAREN 24
...
...
@@ -59,14 +62,14 @@ extern "C" {
#define XML_TOK_INSTANCE_START 29
/* The following occur only in element type declarations */
#define XML_TOK_NAME_QUESTION 30
/* name? */
#define XML_TOK_NAME_ASTERISK 31
/* name* */
#define XML_TOK_NAME_PLUS 32
/* name+ */
#define XML_TOK_COND_SECT_OPEN 33
/* <![ */
#define XML_TOK_COND_SECT_CLOSE 34
/* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35
/* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36
/* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37
/* )+ */
#define XML_TOK_NAME_QUESTION 30
/* name? */
#define XML_TOK_NAME_ASTERISK 31
/* name* */
#define XML_TOK_NAME_PLUS 32
/* name+ */
#define XML_TOK_COND_SECT_OPEN 33
/* <![ */
#define XML_TOK_COND_SECT_CLOSE 34
/* ]]> */
#define XML_TOK_CLOSE_PAREN_QUESTION 35
/* )? */
#define XML_TOK_CLOSE_PAREN_ASTERISK 36
/* )* */
#define XML_TOK_CLOSE_PAREN_PLUS 37
/* )+ */
#define XML_TOK_COMMA 38
/* The following token is returned only by XmlAttributeValueTok */
...
...
@@ -75,8 +78,9 @@ extern "C" {
/* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok
for a name with a colon. */
/* With namespace processing this is returned by XmlPrologTok for a
name with a colon.
*/
#define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD
...
...
@@ -121,64 +125,73 @@ typedef struct {
struct
encoding
;
typedef
struct
encoding
ENCODING
;
typedef
int
(
PTRCALL
*
SCANNER
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
,
const
char
**
);
struct
encoding
{
int
(
*
scanners
[
XML_N_STATES
])(
const
ENCODING
*
,
const
char
*
,
const
char
*
,
const
char
**
);
int
(
*
literalScanners
[
XML_N_LITERAL_TYPES
])(
const
ENCODING
*
,
const
char
*
,
const
char
*
,
const
char
**
);
int
(
*
sameName
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
);
int
(
*
nameMatchesAscii
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
,
const
char
*
);
int
(
*
nameLength
)(
const
ENCODING
*
,
const
char
*
);
const
char
*
(
*
skipS
)(
const
ENCODING
*
,
const
char
*
);
int
(
*
getAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
int
attsMax
,
ATTRIBUTE
*
atts
);
int
(
*
charRefNumber
)(
const
ENCODING
*
enc
,
const
char
*
ptr
);
int
(
*
predefinedEntityName
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
);
void
(
*
updatePosition
)(
const
ENCODING
*
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
);
int
(
*
isPublicId
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
);
void
(
*
utf8Convert
)(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
);
void
(
*
utf16Convert
)(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
);
SCANNER
scanners
[
XML_N_STATES
];
SCANNER
literalScanners
[
XML_N_LITERAL_TYPES
];
int
(
PTRCALL
*
sameName
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
);
int
(
PTRCALL
*
nameMatchesAscii
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
,
const
char
*
);
int
(
PTRFASTCALL
*
nameLength
)(
const
ENCODING
*
,
const
char
*
);
const
char
*
(
PTRFASTCALL
*
skipS
)(
const
ENCODING
*
,
const
char
*
);
int
(
PTRCALL
*
getAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
int
attsMax
,
ATTRIBUTE
*
atts
);
int
(
PTRFASTCALL
*
charRefNumber
)(
const
ENCODING
*
enc
,
const
char
*
ptr
);
int
(
PTRCALL
*
predefinedEntityName
)(
const
ENCODING
*
,
const
char
*
,
const
char
*
);
void
(
PTRCALL
*
updatePosition
)(
const
ENCODING
*
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
);
int
(
PTRCALL
*
isPublicId
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
);
void
(
PTRCALL
*
utf8Convert
)(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
char
**
toP
,
const
char
*
toLim
);
void
(
PTRCALL
*
utf16Convert
)(
const
ENCODING
*
enc
,
const
char
**
fromP
,
const
char
*
fromLim
,
unsigned
short
**
toP
,
const
unsigned
short
*
toLim
);
int
minBytesPerChar
;
char
isUtf8
;
char
isUtf16
;
};
/*
Scan the string starting at ptr until the end of the next complete token,
but do not scan past eptr. Return an integer giving the
type of token.
/*
Scan the string starting at ptr until the end of the next complete
token, but do not scan past eptr. Return an integer giving the
type of token.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete token;
nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete
token;
nextTokPtr will not be set.
Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
will be set to point to the character which made the token invalid.
Return XML_TOK_INVALID when the string does not start a valid
token; nextTokPtr will be set to point to the character which made
the token invalid.
Otherwise the string starts with a valid token; nextTokPtr will be set to point
to the character following the end of that token.
Otherwise the string starts with a valid token; nextTokPtr will be
set to point
to the character following the end of that token.
Each data character counts as a single token, but adjacent data characters
may be returned together. Similarly for characters in the prolog outside
literals, comments and processing instructions.
Each data character counts as a single token, but adjacent data
characters may be returned together. Similarly for characters in
the prolog outside
literals, comments and processing instructions.
*/
...
...
@@ -201,9 +214,9 @@ literals, comments and processing instructions.
#endif
/* XML_DTD */
/* This is used for performing a 2nd-level tokenization on
the content of a literal that has already been returned by XmlTok. */
/* This is used for performing a 2nd-level tokenization on
the content
of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
...
...
@@ -250,48 +263,51 @@ typedef struct {
const
ENCODING
**
encPtr
;
}
INIT_ENCODING
;
int
XmlParseXmlDecl
(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingNamePtr
,
const
ENCODING
**
namedEncodingPtr
,
int
*
standalonePtr
);
int
XmlInitEncoding
(
INIT_ENCODING
*
,
const
ENCODING
**
,
const
char
*
name
);
const
ENCODING
*
XmlGetUtf8InternalEncoding
(
void
);
const
ENCODING
*
XmlGetUtf16InternalEncoding
(
void
);
int
XmlUtf8Encode
(
int
charNumber
,
char
*
buf
);
int
XmlUtf16Encode
(
int
charNumber
,
unsigned
short
*
buf
);
int
XmlSizeOfUnknownEncoding
(
void
);
ENCODING
*
int
XmlParseXmlDecl
(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingNamePtr
,
const
ENCODING
**
namedEncodingPtr
,
int
*
standalonePtr
);
int
XmlInitEncoding
(
INIT_ENCODING
*
,
const
ENCODING
**
,
const
char
*
name
);
const
ENCODING
*
XmlGetUtf8InternalEncoding
(
void
);
const
ENCODING
*
XmlGetUtf16InternalEncoding
(
void
);
int
FASTCALL
XmlUtf8Encode
(
int
charNumber
,
char
*
buf
);
int
FASTCALL
XmlUtf16Encode
(
int
charNumber
,
unsigned
short
*
buf
);
int
XmlSizeOfUnknownEncoding
(
void
);
typedef
int
(
*
CONVERTER
)(
void
*
userData
,
const
char
*
p
);
ENCODING
*
XmlInitUnknownEncoding
(
void
*
mem
,
int
*
table
,
int
(
*
conv
)(
void
*
userData
,
const
char
*
p
),
void
*
userData
);
int
XmlParseXmlDeclNS
(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingNamePtr
,
const
ENCODING
**
namedEncodingPtr
,
int
*
standalonePtr
);
int
XmlInitEncodingNS
(
INIT_ENCODING
*
,
const
ENCODING
**
,
const
char
*
name
);
const
ENCODING
*
XmlGetUtf8InternalEncodingNS
(
void
);
const
ENCODING
*
XmlGetUtf16InternalEncodingNS
(
void
);
ENCODING
*
int
*
table
,
CONVERTER
convert
,
void
*
userData
);
int
XmlParseXmlDeclNS
(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingNamePtr
,
const
ENCODING
**
namedEncodingPtr
,
int
*
standalonePtr
);
int
XmlInitEncodingNS
(
INIT_ENCODING
*
,
const
ENCODING
**
,
const
char
*
name
);
const
ENCODING
*
XmlGetUtf8InternalEncodingNS
(
void
);
const
ENCODING
*
XmlGetUtf16InternalEncodingNS
(
void
);
ENCODING
*
XmlInitUnknownEncodingNS
(
void
*
mem
,
int
*
table
,
int
(
*
conv
)(
void
*
userData
,
const
char
*
p
)
,
void
*
userData
);
int
*
table
,
CONVERTER
convert
,
void
*
userData
);
#ifdef __cplusplus
}
#endif
...
...
Modules/expat/xmltok_impl.c
View file @
fc03a94a
/*
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
#ifndef IS_INVALID_CHAR
...
...
@@ -10,7 +9,7 @@ See the file COPYING for copying permission.
#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
case BT_LEAD ## n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
return XML_TOK_PARTIAL_CHAR; \
if (IS_INVALID_CHAR(enc, ptr, n)) { \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID; \
...
...
@@ -87,9 +86,9 @@ See the file COPYING for copying permission.
/* ptr points to character following "<!-" */
static
int
PREFIX
(
scanComment
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanComment
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
!=
end
)
{
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_MINUS
))
{
...
...
@@ -101,22 +100,22 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
INVALID_CASES
(
ptr
,
nextTokPtr
)
case
BT_MINUS
:
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_MINUS
))
{
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_COMMENT
;
}
break
;
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_MINUS
))
{
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_COMMENT
;
}
break
;
default:
ptr
+=
MINBPC
(
enc
);
break
;
ptr
+=
MINBPC
(
enc
);
break
;
}
}
}
...
...
@@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<!" */
static
int
PREFIX
(
scanDecl
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanDecl
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
...
...
@@ -149,12 +148,12 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_PERCNT
:
if
(
ptr
+
MINBPC
(
enc
)
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
/* don't allow <!ENTITY% foo "whatever"> */
switch
(
BYTE_TYPE
(
enc
,
ptr
+
MINBPC
(
enc
)))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
case
BT_PERCNT
:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
/* fall through */
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
...
...
@@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
return
XML_TOK_PARTIAL
;
}
static
int
PREFIX
(
checkPiTarget
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
int
*
tokPtr
)
static
int
PTRCALL
PREFIX
(
checkPiTarget
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
int
*
tokPtr
)
{
int
upper
=
0
;
*
tokPtr
=
XML_TOK_PI
;
...
...
@@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<?" */
static
int
PREFIX
(
scanPi
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanPi
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
int
tok
;
const
char
*
target
=
ptr
;
...
...
@@ -235,39 +235,39 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
CHECK_NAME_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
if
(
!
PREFIX
(
checkPiTarget
)(
enc
,
target
,
ptr
,
&
tok
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
ptr
+=
MINBPC
(
enc
);
while
(
ptr
!=
end
)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
INVALID_CASES
(
ptr
,
nextTokPtr
)
case
BT_QUEST
:
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
tok
;
}
break
;
default:
ptr
+=
MINBPC
(
enc
);
break
;
}
case
BT_QUEST
:
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
tok
;
}
break
;
default:
ptr
+=
MINBPC
(
enc
);
break
;
}
}
return
XML_TOK_PARTIAL
;
case
BT_QUEST
:
if
(
!
PREFIX
(
checkPiTarget
)(
enc
,
target
,
ptr
,
&
tok
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
tok
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
tok
;
}
/* fall through */
default:
...
...
@@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
return
XML_TOK_PARTIAL
;
}
static
int
PREFIX
(
scanCdataSection
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanCdataSection
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
static
const
char
CDATA_LSQB
[]
=
{
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
ASCII_LSQB
};
static
const
char
CDATA_LSQB
[]
=
{
ASCII_C
,
ASCII_D
,
ASCII_A
,
ASCII_T
,
ASCII_A
,
ASCII_LSQB
};
int
i
;
/* CDATA[ */
if
(
end
-
ptr
<
6
*
MINBPC
(
enc
))
...
...
@@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e
return
XML_TOK_CDATA_SECT_OPEN
;
}
static
int
PREFIX
(
cdataSectionTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
cdataSectionTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_NONE
;
...
...
@@ -309,7 +309,7 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
if
(
n
&
(
MINBPC
(
enc
)
-
1
))
{
n
&=
~
(
MINBPC
(
enc
)
-
1
);
if
(
n
==
0
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
end
=
ptr
+
n
;
}
}
...
...
@@ -350,8 +350,8 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
...
...
@@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
/* ptr points to character following "</" */
static
int
PREFIX
(
scanEndTag
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanEndTag
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
...
...
@@ -393,21 +393,22 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
CHECK_NAME_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
for
(
ptr
+=
MINBPC
(
enc
);
ptr
!=
end
;
ptr
+=
MINBPC
(
enc
))
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
break
;
case
BT_GT
:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
break
;
case
BT_GT
:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_END_TAG
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
return
XML_TOK_PARTIAL
;
#ifdef XML_NS
case
BT_COLON
:
/* no need to check qname syntax here, since end-tag must match exactly */
/* no need to check qname syntax here,
since end-tag must match exactly */
ptr
+=
MINBPC
(
enc
);
break
;
#endif
...
...
@@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&#X" */
static
int
PREFIX
(
scanHexCharRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanHexCharRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
!=
end
)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
...
...
@@ -441,13 +442,13 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_DIGIT
:
case
BT_HEX
:
break
;
break
;
case
BT_SEMI
:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_CHAR_REF
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_CHAR_REF
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
}
...
...
@@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
/* ptr points to character following "&#" */
static
int
PREFIX
(
scanCharRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanCharRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
!=
end
)
{
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_x
))
...
...
@@ -473,13 +474,13 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
for
(
ptr
+=
MINBPC
(
enc
);
ptr
!=
end
;
ptr
+=
MINBPC
(
enc
))
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_DIGIT
:
break
;
break
;
case
BT_SEMI
:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_CHAR_REF
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_CHAR_REF
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
}
...
...
@@ -488,9 +489,9 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&" */
static
int
PREFIX
(
scanRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanRef
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
...
...
@@ -518,9 +519,9 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following first character of attribute name */
static
int
PREFIX
(
scanAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
#ifdef XML_NS
int
hadColon
=
0
;
...
...
@@ -531,142 +532,141 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
case
BT_COLON
:
if
(
hadColon
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
hadColon
=
1
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
break
;
#endif
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
for
(;;)
{
int
t
;
int
t
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
t
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
t
==
BT_EQUALS
)
break
;
switch
(
t
)
{
case
BT_S
:
case
BT_LF
:
case
BT_CR
:
break
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
t
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
t
==
BT_EQUALS
)
break
;
switch
(
t
)
{
case
BT_S
:
case
BT_LF
:
case
BT_CR
:
break
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
/* fall through */
case
BT_EQUALS
:
{
int
open
;
int
open
;
#ifdef XML_NS
hadColon
=
0
;
hadColon
=
0
;
#endif
for
(;;)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
open
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
open
==
BT_QUOT
||
open
==
BT_APOS
)
break
;
switch
(
open
)
{
case
BT_S
:
case
BT_LF
:
case
BT_CR
:
break
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
ptr
+=
MINBPC
(
enc
);
/* in attribute value */
for
(;;)
{
int
t
;
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
t
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
t
==
open
)
break
;
switch
(
t
)
{
INVALID_CASES
(
ptr
,
nextTokPtr
)
case
BT_AMP
:
{
int
tok
=
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
&
ptr
);
if
(
tok
<=
0
)
{
if
(
tok
==
XML_TOK_INVALID
)
*
nextTokPtr
=
ptr
;
return
tok
;
}
break
;
}
case
BT_LT
:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
default:
ptr
+=
MINBPC
(
enc
);
break
;
}
}
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
break
;
case
BT_SOL
:
goto
sol
;
case
BT_GT
:
goto
gt
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
/* ptr points to closing quote */
for
(;;)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
continue
;
case
BT_GT
:
for
(;;)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
open
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
open
==
BT_QUOT
||
open
==
BT_APOS
)
break
;
switch
(
open
)
{
case
BT_S
:
case
BT_LF
:
case
BT_CR
:
break
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
}
ptr
+=
MINBPC
(
enc
);
/* in attribute value */
for
(;;)
{
int
t
;
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
t
=
BYTE_TYPE
(
enc
,
ptr
);
if
(
t
==
open
)
break
;
switch
(
t
)
{
INVALID_CASES
(
ptr
,
nextTokPtr
)
case
BT_AMP
:
{
int
tok
=
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
&
ptr
);
if
(
tok
<=
0
)
{
if
(
tok
==
XML_TOK_INVALID
)
*
nextTokPtr
=
ptr
;
return
tok
;
}
break
;
}
case
BT_LT
:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
default:
ptr
+=
MINBPC
(
enc
);
break
;
}
}
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
break
;
case
BT_SOL
:
goto
sol
;
case
BT_GT
:
goto
gt
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
/* ptr points to closing quote */
for
(;;)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
continue
;
case
BT_GT
:
gt:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_START_TAG_WITH_ATTS
;
case
BT_SOL
:
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_START_TAG_WITH_ATTS
;
case
BT_SOL
:
sol:
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_EMPTY_ELEMENT_WITH_ATTS
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
break
;
}
break
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_EMPTY_ELEMENT_WITH_ATTS
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
break
;
}
break
;
}
default:
*
nextTokPtr
=
ptr
;
...
...
@@ -678,9 +678,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<" */
static
int
PREFIX
(
scanLt
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanLt
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
#ifdef XML_NS
int
hadColon
;
...
...
@@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case
BT_MINUS
:
return
PREFIX
(
scanComment
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
case
BT_LSQB
:
return
PREFIX
(
scanCdataSection
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
PREFIX
(
scanCdataSection
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
...
...
@@ -718,13 +719,13 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
case
BT_COLON
:
if
(
hadColon
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
hadColon
=
1
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
default:
...
...
@@ -736,23 +737,23 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
{
ptr
+=
MINBPC
(
enc
);
while
(
ptr
!=
end
)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_GT
:
goto
gt
;
case
BT_SOL
:
goto
sol
;
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
ptr
+=
MINBPC
(
enc
);
continue
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
return
PREFIX
(
scanAtts
)(
enc
,
ptr
,
end
,
nextTokPtr
);
}
return
XML_TOK_PARTIAL
;
while
(
ptr
!=
end
)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NMSTRT_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
case
BT_GT
:
goto
gt
;
case
BT_SOL
:
goto
sol
;
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
ptr
+=
MINBPC
(
enc
);
continue
;
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
return
PREFIX
(
scanAtts
)(
enc
,
ptr
,
end
,
nextTokPtr
);
}
return
XML_TOK_PARTIAL
;
}
case
BT_GT
:
gt:
...
...
@@ -762,10 +763,10 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
sol:
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
(
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_EMPTY_ELEMENT_NO_ATTS
;
...
...
@@ -777,9 +778,9 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
return
XML_TOK_PARTIAL
;
}
static
int
PREFIX
(
contentTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
contentTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_NONE
;
...
...
@@ -788,7 +789,7 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
if
(
n
&
(
MINBPC
(
enc
)
-
1
))
{
n
&=
~
(
MINBPC
(
enc
)
-
1
);
if
(
n
==
0
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
end
=
ptr
+
n
;
}
}
...
...
@@ -833,8 +834,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
...
...
@@ -842,18 +843,18 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
#undef LEAD_CASE
case
BT_RSQB
:
if
(
ptr
+
MINBPC
(
enc
)
!=
end
)
{
if
(
!
CHAR_MATCHES
(
enc
,
ptr
+
MINBPC
(
enc
),
ASCII_RSQB
))
{
ptr
+=
MINBPC
(
enc
);
break
;
}
if
(
ptr
+
2
*
MINBPC
(
enc
)
!=
end
)
{
if
(
!
CHAR_MATCHES
(
enc
,
ptr
+
2
*
MINBPC
(
enc
),
ASCII_GT
))
{
ptr
+=
MINBPC
(
enc
);
break
;
}
*
nextTokPtr
=
ptr
+
2
*
MINBPC
(
enc
);
return
XML_TOK_INVALID
;
}
if
(
!
CHAR_MATCHES
(
enc
,
ptr
+
MINBPC
(
enc
),
ASCII_RSQB
))
{
ptr
+=
MINBPC
(
enc
);
break
;
}
if
(
ptr
+
2
*
MINBPC
(
enc
)
!=
end
)
{
if
(
!
CHAR_MATCHES
(
enc
,
ptr
+
2
*
MINBPC
(
enc
),
ASCII_GT
))
{
ptr
+=
MINBPC
(
enc
);
break
;
}
*
nextTokPtr
=
ptr
+
2
*
MINBPC
(
enc
);
return
XML_TOK_INVALID
;
}
}
/* fall through */
case
BT_AMP
:
...
...
@@ -876,9 +877,9 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "%" */
static
int
PREFIX
(
scanPercent
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanPercent
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
...
...
@@ -905,9 +906,9 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
return
XML_TOK_PARTIAL
;
}
static
int
PREFIX
(
scanPoundName
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanPoundName
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
...
...
@@ -932,10 +933,10 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
return
-
XML_TOK_POUND_NAME
;
}
static
int
PREFIX
(
scanLit
)(
int
open
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
scanLit
)(
int
open
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
while
(
ptr
!=
end
)
{
int
t
=
BYTE_TYPE
(
enc
,
ptr
);
...
...
@@ -945,16 +946,16 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
case
BT_APOS
:
ptr
+=
MINBPC
(
enc
);
if
(
t
!=
open
)
break
;
break
;
if
(
ptr
==
end
)
return
-
XML_TOK_LITERAL
;
return
-
XML_TOK_LITERAL
;
*
nextTokPtr
=
ptr
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_CR
:
case
BT_LF
:
case
BT_GT
:
case
BT_PERCNT
:
case
BT_LSQB
:
return
XML_TOK_LITERAL
;
return
XML_TOK_LITERAL
;
default:
return
XML_TOK_INVALID
;
return
XML_TOK_INVALID
;
}
default:
ptr
+=
MINBPC
(
enc
);
...
...
@@ -964,9 +965,9 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
return
XML_TOK_PARTIAL
;
}
static
int
PREFIX
(
prologTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
prologTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
int
tok
;
if
(
ptr
==
end
)
...
...
@@ -976,7 +977,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
if
(
n
&
(
MINBPC
(
enc
)
-
1
))
{
n
&=
~
(
MINBPC
(
enc
)
-
1
);
if
(
n
==
0
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
end
=
ptr
+
n
;
}
}
...
...
@@ -989,44 +990,47 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_EXCL
:
return
PREFIX
(
scanDecl
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
PREFIX
(
scanDecl
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
case
BT_QUEST
:
return
PREFIX
(
scanPi
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
PREFIX
(
scanPi
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
case
BT_NMSTRT
:
case
BT_HEX
:
case
BT_NONASCII
:
case
BT_LEAD2
:
case
BT_LEAD3
:
case
BT_LEAD4
:
*
nextTokPtr
=
ptr
-
MINBPC
(
enc
);
return
XML_TOK_INSTANCE_START
;
*
nextTokPtr
=
ptr
-
MINBPC
(
enc
);
return
XML_TOK_INSTANCE_START
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
case
BT_CR
:
if
(
ptr
+
MINBPC
(
enc
)
==
end
)
if
(
ptr
+
MINBPC
(
enc
)
==
end
)
{
*
nextTokPtr
=
end
;
/* indicate that this might be part of a CR/LF pair */
return
-
XML_TOK_PROLOG_S
;
}
/* fall through */
case
BT_S
:
case
BT_LF
:
for
(;;)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
break
;
break
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
case
BT_S
:
case
BT_LF
:
break
;
break
;
case
BT_CR
:
/* don't split CR/LF pair */
if
(
ptr
+
MINBPC
(
enc
)
!=
end
)
break
;
/* fall through */
/* don't split CR/LF pair */
if
(
ptr
+
MINBPC
(
enc
)
!=
end
)
break
;
/* fall through */
default:
*
nextTokPtr
=
ptr
;
return
XML_TOK_PROLOG_S
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_PROLOG_S
;
}
}
*
nextTokPtr
=
ptr
;
...
...
@@ -1045,10 +1049,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return
-
XML_TOK_CLOSE_BRACKET
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_RSQB
))
{
if
(
ptr
+
MINBPC
(
enc
)
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
+
MINBPC
(
enc
),
ASCII_GT
))
{
*
nextTokPtr
=
ptr
+
2
*
MINBPC
(
enc
);
return
XML_TOK_COND_SECT_CLOSE
;
*
nextTokPtr
=
ptr
+
2
*
MINBPC
(
enc
);
return
XML_TOK_COND_SECT_CLOSE
;
}
}
*
nextTokPtr
=
ptr
;
...
...
@@ -1147,40 +1151,40 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr
+=
MINBPC
(
enc
);
switch
(
tok
)
{
case
XML_TOK_NAME
:
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
tok
=
XML_TOK_PREFIXED_NAME
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NAME_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
default:
tok
=
XML_TOK_NMTOKEN
;
break
;
}
break
;
if
(
ptr
==
end
)
return
XML_TOK_PARTIAL
;
tok
=
XML_TOK_PREFIXED_NAME
;
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
CHECK_NAME_CASES
(
enc
,
ptr
,
end
,
nextTokPtr
)
default:
tok
=
XML_TOK_NMTOKEN
;
break
;
}
break
;
case
XML_TOK_PREFIXED_NAME
:
tok
=
XML_TOK_NMTOKEN
;
break
;
tok
=
XML_TOK_NMTOKEN
;
break
;
}
break
;
#endif
case
BT_PLUS
:
if
(
tok
==
XML_TOK_NMTOKEN
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_NAME_PLUS
;
case
BT_AST
:
if
(
tok
==
XML_TOK_NMTOKEN
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_NAME_ASTERISK
;
case
BT_QUEST
:
if
(
tok
==
XML_TOK_NMTOKEN
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
*
nextTokPtr
=
ptr
;
return
XML_TOK_INVALID
;
}
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_NAME_QUESTION
;
...
...
@@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return
-
tok
;
}
static
int
PREFIX
(
attributeValueTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
attributeValueTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
const
char
*
start
;
if
(
ptr
==
end
)
...
...
@@ -1208,7 +1212,7 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
#undef LEAD_CASE
case
BT_AMP
:
if
(
ptr
==
start
)
return
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_LT
:
...
...
@@ -1217,27 +1221,27 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return
XML_TOK_INVALID
;
case
BT_LF
:
if
(
ptr
==
start
)
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_DATA_NEWLINE
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_DATA_NEWLINE
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_CR
:
if
(
ptr
==
start
)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_TRAILING_CR
;
if
(
BYTE_TYPE
(
enc
,
ptr
)
==
BT_LF
)
ptr
+=
MINBPC
(
enc
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_NEWLINE
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_TRAILING_CR
;
if
(
BYTE_TYPE
(
enc
,
ptr
)
==
BT_LF
)
ptr
+=
MINBPC
(
enc
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_NEWLINE
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_S
:
if
(
ptr
==
start
)
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_ATTRIBUTE_VALUE_S
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_ATTRIBUTE_VALUE_S
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
...
...
@@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return
XML_TOK_DATA_CHARS
;
}
static
int
PREFIX
(
entityValueTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
entityValueTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
const
char
*
start
;
if
(
ptr
==
end
)
...
...
@@ -1266,33 +1270,33 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#undef LEAD_CASE
case
BT_AMP
:
if
(
ptr
==
start
)
return
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
PREFIX
(
scanRef
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_PERCNT
:
if
(
ptr
==
start
)
{
int
tok
=
PREFIX
(
scanPercent
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
(
tok
==
XML_TOK_PERCENT
)
?
XML_TOK_INVALID
:
tok
;
int
tok
=
PREFIX
(
scanPercent
)(
enc
,
ptr
+
MINBPC
(
enc
),
end
,
nextTokPtr
);
return
(
tok
==
XML_TOK_PERCENT
)
?
XML_TOK_INVALID
:
tok
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_LF
:
if
(
ptr
==
start
)
{
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_DATA_NEWLINE
;
*
nextTokPtr
=
ptr
+
MINBPC
(
enc
);
return
XML_TOK_DATA_NEWLINE
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
case
BT_CR
:
if
(
ptr
==
start
)
{
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_TRAILING_CR
;
if
(
BYTE_TYPE
(
enc
,
ptr
)
==
BT_LF
)
ptr
+=
MINBPC
(
enc
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_NEWLINE
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
==
end
)
return
XML_TOK_TRAILING_CR
;
if
(
BYTE_TYPE
(
enc
,
ptr
)
==
BT_LF
)
ptr
+=
MINBPC
(
enc
);
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_NEWLINE
;
}
*
nextTokPtr
=
ptr
;
return
XML_TOK_DATA_CHARS
;
...
...
@@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#ifdef XML_DTD
static
int
PREFIX
(
ignoreSectionTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
PREFIX
(
ignoreSectionTok
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
int
level
=
0
;
if
(
MINBPC
(
enc
)
>
1
)
{
...
...
@@ -1324,30 +1328,30 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
INVALID_CASES
(
ptr
,
nextTokPtr
)
case
BT_LT
:
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_EXCL
))
{
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_LSQB
))
{
++
level
;
ptr
+=
MINBPC
(
enc
);
}
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_LSQB
))
{
++
level
;
ptr
+=
MINBPC
(
enc
);
}
}
break
;
case
BT_RSQB
:
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_RSQB
))
{
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
ptr
+=
MINBPC
(
enc
);
if
(
level
==
0
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_IGNORE_SECT
;
}
--
level
;
}
if
((
ptr
+=
MINBPC
(
enc
))
==
end
)
return
XML_TOK_PARTIAL
;
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_GT
))
{
ptr
+=
MINBPC
(
enc
);
if
(
level
==
0
)
{
*
nextTokPtr
=
ptr
;
return
XML_TOK_IGNORE_SECT
;
}
--
level
;
}
}
break
;
default:
...
...
@@ -1360,9 +1364,9 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
#endif
/* XML_DTD */
static
int
PREFIX
(
isPublicId
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
)
static
int
PTRCALL
PREFIX
(
isPublicId
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
)
{
ptr
+=
MINBPC
(
enc
);
end
-=
MINBPC
(
enc
);
...
...
@@ -1392,22 +1396,22 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
break
;
case
BT_S
:
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_TAB
))
{
*
badPtr
=
ptr
;
return
0
;
*
badPtr
=
ptr
;
return
0
;
}
break
;
case
BT_NAME
:
case
BT_NMSTRT
:
if
(
!
(
BYTE_TO_ASCII
(
enc
,
ptr
)
&
~
0x7f
))
break
;
break
;
default:
switch
(
BYTE_TO_ASCII
(
enc
,
ptr
))
{
case
0x24
:
/* $ */
case
0x40
:
/* @ */
break
;
break
;
default:
*
badPtr
=
ptr
;
return
0
;
*
badPtr
=
ptr
;
return
0
;
}
break
;
}
...
...
@@ -1415,28 +1419,29 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
return
1
;
}
/* This must only be called for a well-formed start-tag or empty element tag.
Returns the number of attributes. Pointers to the first attsMax attributes
are stored in atts. */
/* This must only be called for a well-formed start-tag or empty
element tag. Returns the number of attributes. Pointers to the
first attsMax attributes are stored in atts.
*/
static
int
PREFIX
(
getAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
int
attsMax
,
ATTRIBUTE
*
atts
)
static
int
PTRCALL
PREFIX
(
getAtts
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
int
attsMax
,
ATTRIBUTE
*
atts
)
{
enum
{
other
,
inName
,
inValue
}
state
=
inName
;
int
nAtts
=
0
;
int
open
=
0
;
/* defined when state == inValue;
initialization just to shut up compilers */
initialization just to shut up compilers */
for
(
ptr
+=
MINBPC
(
enc
);;
ptr
+=
MINBPC
(
enc
))
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
#define START_NAME \
if (state == other) { \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
if (nAtts < attsMax) { \
atts[nAtts].name = ptr; \
atts[nAtts].normalized = 1; \
} \
state = inName; \
}
#define LEAD_CASE(n) \
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
...
...
@@ -1450,47 +1455,47 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
#undef START_NAME
case
BT_QUOT
:
if
(
state
!=
inValue
)
{
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valuePtr
=
ptr
+
MINBPC
(
enc
);
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valuePtr
=
ptr
+
MINBPC
(
enc
);
state
=
inValue
;
open
=
BT_QUOT
;
}
else
if
(
open
==
BT_QUOT
)
{
state
=
other
;
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valueEnd
=
ptr
;
nAtts
++
;
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valueEnd
=
ptr
;
nAtts
++
;
}
break
;
case
BT_APOS
:
if
(
state
!=
inValue
)
{
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valuePtr
=
ptr
+
MINBPC
(
enc
);
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valuePtr
=
ptr
+
MINBPC
(
enc
);
state
=
inValue
;
open
=
BT_APOS
;
}
else
if
(
open
==
BT_APOS
)
{
state
=
other
;
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valueEnd
=
ptr
;
nAtts
++
;
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
valueEnd
=
ptr
;
nAtts
++
;
}
break
;
case
BT_AMP
:
if
(
nAtts
<
attsMax
)
atts
[
nAtts
].
normalized
=
0
;
atts
[
nAtts
].
normalized
=
0
;
break
;
case
BT_S
:
if
(
state
==
inName
)
state
=
other
;
else
if
(
state
==
inValue
&&
nAtts
<
attsMax
&&
atts
[
nAtts
].
normalized
&&
(
ptr
==
atts
[
nAtts
].
valuePtr
||
BYTE_TO_ASCII
(
enc
,
ptr
)
!=
ASCII_SPACE
||
BYTE_TO_ASCII
(
enc
,
ptr
+
MINBPC
(
enc
))
==
ASCII_SPACE
||
BYTE_TYPE
(
enc
,
ptr
+
MINBPC
(
enc
))
==
open
))
atts
[
nAtts
].
normalized
=
0
;
&&
nAtts
<
attsMax
&&
atts
[
nAtts
].
normalized
&&
(
ptr
==
atts
[
nAtts
].
valuePtr
||
BYTE_TO_ASCII
(
enc
,
ptr
)
!=
ASCII_SPACE
||
BYTE_TO_ASCII
(
enc
,
ptr
+
MINBPC
(
enc
))
==
ASCII_SPACE
||
BYTE_TYPE
(
enc
,
ptr
+
MINBPC
(
enc
))
==
open
))
atts
[
nAtts
].
normalized
=
0
;
break
;
case
BT_CR
:
case
BT_LF
:
/* This case ensures that the first attribute name is counted
...
...
@@ -1498,12 +1503,12 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
if
(
state
==
inName
)
state
=
other
;
else
if
(
state
==
inValue
&&
nAtts
<
attsMax
)
atts
[
nAtts
].
normalized
=
0
;
atts
[
nAtts
].
normalized
=
0
;
break
;
case
BT_GT
:
case
BT_SOL
:
if
(
state
!=
inValue
)
return
nAtts
;
return
nAtts
;
break
;
default:
break
;
...
...
@@ -1512,32 +1517,36 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
/* not reached */
}
static
int
PREFIX
(
charRefNumber
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
static
int
PTRFASTCALL
PREFIX
(
charRefNumber
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
{
int
result
=
0
;
/* skip &# */
ptr
+=
2
*
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_x
))
{
for
(
ptr
+=
MINBPC
(
enc
);
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_SEMI
);
ptr
+=
MINBPC
(
enc
))
{
for
(
ptr
+=
MINBPC
(
enc
);
!
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_SEMI
);
ptr
+=
MINBPC
(
enc
))
{
int
c
=
BYTE_TO_ASCII
(
enc
,
ptr
);
switch
(
c
)
{
case
ASCII_0
:
case
ASCII_1
:
case
ASCII_2
:
case
ASCII_3
:
case
ASCII_4
:
case
ASCII_5
:
case
ASCII_6
:
case
ASCII_7
:
case
ASCII_8
:
case
ASCII_9
:
result
<<=
4
;
result
|=
(
c
-
ASCII_0
);
break
;
case
ASCII_A
:
case
ASCII_B
:
case
ASCII_C
:
case
ASCII_D
:
case
ASCII_E
:
case
ASCII_F
:
result
<<=
4
;
result
+=
10
+
(
c
-
ASCII_A
);
break
;
case
ASCII_a
:
case
ASCII_b
:
case
ASCII_c
:
case
ASCII_d
:
case
ASCII_e
:
case
ASCII_f
:
result
<<=
4
;
result
+=
10
+
(
c
-
ASCII_a
);
break
;
result
<<=
4
;
result
|=
(
c
-
ASCII_0
);
break
;
case
ASCII_A
:
case
ASCII_B
:
case
ASCII_C
:
case
ASCII_D
:
case
ASCII_E
:
case
ASCII_F
:
result
<<=
4
;
result
+=
10
+
(
c
-
ASCII_A
);
break
;
case
ASCII_a
:
case
ASCII_b
:
case
ASCII_c
:
case
ASCII_d
:
case
ASCII_e
:
case
ASCII_f
:
result
<<=
4
;
result
+=
10
+
(
c
-
ASCII_a
);
break
;
}
if
(
result
>=
0x110000
)
return
-
1
;
return
-
1
;
}
}
else
{
...
...
@@ -1546,23 +1555,24 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
result
*=
10
;
result
+=
(
c
-
ASCII_0
);
if
(
result
>=
0x110000
)
return
-
1
;
return
-
1
;
}
}
return
checkCharRefNumber
(
result
);
}
static
int
PREFIX
(
predefinedEntityName
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
static
int
PTRCALL
PREFIX
(
predefinedEntityName
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
{
switch
((
end
-
ptr
)
/
MINBPC
(
enc
))
{
case
2
:
if
(
CHAR_MATCHES
(
enc
,
ptr
+
MINBPC
(
enc
),
ASCII_t
))
{
switch
(
BYTE_TO_ASCII
(
enc
,
ptr
))
{
case
ASCII_l
:
return
ASCII_LT
;
return
ASCII_LT
;
case
ASCII_g
:
return
ASCII_GT
;
return
ASCII_GT
;
}
}
break
;
...
...
@@ -1570,9 +1580,9 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_a
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_m
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_p
))
return
ASCII_AMP
;
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_p
))
return
ASCII_AMP
;
}
}
break
;
...
...
@@ -1581,23 +1591,23 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
case
ASCII_q
:
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_u
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_o
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_t
))
return
ASCII_QUOT
;
}
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_o
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_t
))
return
ASCII_QUOT
;
}
}
break
;
case
ASCII_a
:
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_p
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_o
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_s
))
return
ASCII_APOS
;
}
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_o
))
{
ptr
+=
MINBPC
(
enc
);
if
(
CHAR_MATCHES
(
enc
,
ptr
,
ASCII_s
))
return
ASCII_APOS
;
}
}
break
;
}
...
...
@@ -1605,20 +1615,20 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
return
0
;
}
static
int
PREFIX
(
sameName
)(
const
ENCODING
*
enc
,
const
char
*
ptr1
,
const
char
*
ptr2
)
static
int
PTRCALL
PREFIX
(
sameName
)(
const
ENCODING
*
enc
,
const
char
*
ptr1
,
const
char
*
ptr2
)
{
for
(;;)
{
switch
(
BYTE_TYPE
(
enc
,
ptr1
))
{
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (*ptr1++ != *ptr2++) \
return 0;
return 0;
LEAD_CASE
(
4
)
LEAD_CASE
(
3
)
LEAD_CASE
(
2
)
#undef LEAD_CASE
/* fall through */
if
(
*
ptr1
++
!=
*
ptr2
++
)
return
0
;
return
0
;
break
;
case
BT_NONASCII
:
case
BT_NMSTRT
:
...
...
@@ -1630,23 +1640,23 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
case
BT_NAME
:
case
BT_MINUS
:
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
return
0
;
if
(
MINBPC
(
enc
)
>
1
)
{
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
if
(
MINBPC
(
enc
)
>
2
)
{
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
if
(
MINBPC
(
enc
)
>
2
)
{
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
if
(
MINBPC
(
enc
)
>
3
)
{
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
}
}
if
(
*
ptr2
++
!=
*
ptr1
++
)
return
0
;
}
}
}
break
;
default:
if
(
MINBPC
(
enc
)
==
1
&&
*
ptr1
==
*
ptr2
)
return
1
;
return
1
;
switch
(
BYTE_TYPE
(
enc
,
ptr2
))
{
case
BT_LEAD2
:
case
BT_LEAD3
:
...
...
@@ -1660,18 +1670,18 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
case
BT_DIGIT
:
case
BT_NAME
:
case
BT_MINUS
:
return
0
;
return
0
;
default:
return
1
;
return
1
;
}
}
}
/* not reached */
}
static
int
PREFIX
(
nameMatchesAscii
)(
const
ENCODING
*
enc
,
const
char
*
ptr1
,
const
char
*
end1
,
const
char
*
ptr2
)
static
int
PTRCALL
PREFIX
(
nameMatchesAscii
)(
const
ENCODING
*
enc
,
const
char
*
ptr1
,
const
char
*
end1
,
const
char
*
ptr2
)
{
for
(;
*
ptr2
;
ptr1
+=
MINBPC
(
enc
),
ptr2
++
)
{
if
(
ptr1
==
end1
)
...
...
@@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
return
ptr1
==
end1
;
}
static
int
PREFIX
(
nameLength
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
static
int
PTRFASTCALL
PREFIX
(
nameLength
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
{
const
char
*
start
=
ptr
;
for
(;;)
{
...
...
@@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
}
}
static
const
char
*
PREFIX
(
skipS
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
static
const
char
*
PTRFASTCALL
PREFIX
(
skipS
)(
const
ENCODING
*
enc
,
const
char
*
ptr
)
{
for
(;;)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
...
...
@@ -1725,11 +1735,11 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
}
}
static
void
PREFIX
(
updatePosition
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
pos
)
static
void
PTRCALL
PREFIX
(
updatePosition
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
POSITION
*
pos
)
{
while
(
ptr
!=
end
)
{
switch
(
BYTE_TYPE
(
enc
,
ptr
))
{
...
...
@@ -1748,7 +1758,7 @@ void PREFIX(updatePosition)(const ENCODING *enc,
pos
->
lineNumber
++
;
ptr
+=
MINBPC
(
enc
);
if
(
ptr
!=
end
&&
BYTE_TYPE
(
enc
,
ptr
)
==
BT_LF
)
ptr
+=
MINBPC
(
enc
);
ptr
+=
MINBPC
(
enc
);
pos
->
columnNumber
=
(
unsigned
)
-
1
;
break
;
default:
...
...
@@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc,
#undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES
Modules/expat/xmltok_ns.c
View file @
fc03a94a
const
ENCODING
*
NS
(
XmlGetUtf8InternalEncoding
)(
void
)
const
ENCODING
*
NS
(
XmlGetUtf8InternalEncoding
)(
void
)
{
return
&
ns
(
internal_utf8_encoding
).
enc
;
}
const
ENCODING
*
NS
(
XmlGetUtf16InternalEncoding
)(
void
)
const
ENCODING
*
NS
(
XmlGetUtf16InternalEncoding
)(
void
)
{
#if
XML_BYTE_ORDER == 12
#if
BYTEORDER == 1234
return
&
ns
(
internal_little2_encoding
).
enc
;
#elif
XML_BYTE_ORDER ==
21
#elif
BYTEORDER == 43
21
return
&
ns
(
internal_big2_encoding
).
enc
;
#else
const
short
n
=
1
;
return
*
(
const
char
*
)
&
n
?
&
ns
(
internal_little2_encoding
).
enc
:
&
ns
(
internal_big2_encoding
).
enc
;
return
(
*
(
const
char
*
)
&
n
?
&
ns
(
internal_little2_encoding
).
enc
:
&
ns
(
internal_big2_encoding
).
enc
);
#endif
}
static
const
ENCODING
*
NS
(
encodings
)[]
=
{
static
const
ENCODING
*
NS
(
encodings
)[]
=
{
&
ns
(
latin1_encoding
).
enc
,
&
ns
(
ascii_encoding
).
enc
,
&
ns
(
utf8_encoding
).
enc
,
...
...
@@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = {
&
ns
(
utf8_encoding
).
enc
/* NO_ENC */
};
static
int
NS
(
initScanProlog
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
NS
(
initScanProlog
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
return
initScan
(
NS
(
encodings
),
(
const
INIT_ENCODING
*
)
enc
,
XML_PROLOG_STATE
,
ptr
,
end
,
nextTokPtr
);
return
initScan
(
NS
(
encodings
),
(
const
INIT_ENCODING
*
)
enc
,
XML_PROLOG_STATE
,
ptr
,
end
,
nextTokPtr
);
}
static
int
NS
(
initScanContent
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
static
int
PTRCALL
NS
(
initScanContent
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
nextTokPtr
)
{
return
initScan
(
NS
(
encodings
),
(
const
INIT_ENCODING
*
)
enc
,
XML_CONTENT_STATE
,
ptr
,
end
,
nextTokPtr
);
return
initScan
(
NS
(
encodings
),
(
const
INIT_ENCODING
*
)
enc
,
XML_CONTENT_STATE
,
ptr
,
end
,
nextTokPtr
);
}
int
NS
(
XmlInitEncoding
)(
INIT_ENCODING
*
p
,
const
ENCODING
**
encPtr
,
const
char
*
name
)
int
NS
(
XmlInitEncoding
)(
INIT_ENCODING
*
p
,
const
ENCODING
**
encPtr
,
const
char
*
name
)
{
int
i
=
getEncodingIndex
(
name
);
if
(
i
==
UNKNOWN_ENC
)
...
...
@@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n
return
1
;
}
static
const
ENCODING
*
NS
(
findEncoding
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
static
const
ENCODING
*
NS
(
findEncoding
)(
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
)
{
#define ENCODING_MAX 128
char
buf
[
ENCODING_MAX
];
...
...
@@ -73,26 +80,27 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha
return
NS
(
encodings
)[
i
];
}
int
NS
(
XmlParseXmlDecl
)(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingName
,
const
ENCODING
**
encoding
,
int
*
standalone
)
int
NS
(
XmlParseXmlDecl
)(
int
isGeneralTextEntity
,
const
ENCODING
*
enc
,
const
char
*
ptr
,
const
char
*
end
,
const
char
**
badPtr
,
const
char
**
versionPtr
,
const
char
**
versionEndPtr
,
const
char
**
encodingName
,
const
ENCODING
**
encoding
,
int
*
standalone
)
{
return
doParseXmlDecl
(
NS
(
findEncoding
),
isGeneralTextEntity
,
enc
,
ptr
,
end
,
badPtr
,
versionPtr
,
versionEndPtr
,
encodingName
,
encoding
,
standalone
);
isGeneralTextEntity
,
enc
,
ptr
,
end
,
badPtr
,
versionPtr
,
versionEndPtr
,
encodingName
,
encoding
,
standalone
);
}
setup.py
View file @
fc03a94a
...
...
@@ -690,9 +690,9 @@ class PyBuildExt(build_ext):
# More information on Expat can be found at www.libexpat.org.
#
if
sys
.
byteorder
==
"little"
:
xmlbo
=
"12"
xmlbo
=
"12
34
"
else
:
xmlbo
=
"21"
xmlbo
=
"
43
21"
expatinc
=
os
.
path
.
join
(
os
.
getcwd
(),
srcdir
,
'Modules'
,
'expat'
)
exts
.
append
(
Extension
(
'pyexpat'
,
sources
=
[
...
...
@@ -702,10 +702,9 @@ class PyBuildExt(build_ext):
'expat/xmltok.c'
,
],
define_macros
=
[
(
'HAVE_EXPAT_H'
,
None
),
(
'XML_NS'
,
'1'
),
(
'XML_DTD'
,
'1'
),
(
'
XML_BYTE_
ORDER'
,
xmlbo
),
(
'
BYTE
ORDER'
,
xmlbo
),
(
'XML_CONTEXT_BYTES'
,
'1024'
),
],
include_dirs
=
[
expatinc
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment