Added "well-formed" XML checker script.

parent 7bed2413
#!/usr/bin/perl -w
#
# Parse document and report first syntax (well-formedness) error found.
#
use strict;
use XML::Parser;
use Getopt::Std;
my %opts;
getopts('e', \%opts);
my $ENTREFS = exists( $opts{'e'} ); # flag: check ent refs
my $parser = XML::Parser->new(
ErrorContext => 2, # output error context
);
# get input from files
if( @ARGV ) {
foreach( @ARGV ) {
my $file = $_;
unless( -r $file ) {
print STDERR "ERROR: Can't open '$file'.\n";
return;
}
my $input = '';
open( F, $file );
while( <F> ) { $input .= $_; }
close F;
# parse and report errors
if( &parse_string( $input )) {
print STDERR "ERROR in $file:\n$@\n";
} else {
print STDERR "'$file' is well-formed.\n";
}
}
print "All files checked.\n";
# get input from STDIN
} else {
my $input = "";
while( <STDIN> ) { $input .= $_; }
if( &parse_string( $input )) {
print STDERR "ERROR in stream:\n$@\n";
} else {
print STDERR "No syntax errors found in XML stream.\n";
}
}
# parse the string and return error message
#
# NOTE: By default, entity refs are not expanded. XML::Parser can be
# told not to expand entity refs, but will still try to find
# replacement text just in case, which we don't want. Therefore, we
# need to do a stupid regexp replacement, removing entities from input.
#
sub parse_string {
my $string = shift;
unless( $ENTREFS ) {
$string =~ s/\&[^\s;]+;//g; # remove entity references
}
eval { $parser->parse( $string ); };
$@ =~ s/at \/.*?$//s; # remove module line number
return $@;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment