# (c) Copyright 2005-2008, 2010. CodeWeavers, Inc.
package CXTinySAX;
use warnings;
use strict;
use CXUtils;
use CXLog;
use XML::RegExp;
sub xml_decode($)
my ($str)=@_;
if ($str =~ /&/)
$str =~ s/</</g;
$str =~ s/>/>/g;
# '&' == '&' == '&' so make sure we don't do double demangling
$str =~ s!&(amp|#(x[0-9a-fA-F]+));!$1 eq "amp" ? "&" : chr(oct("0$2"))!eg;
return $str;
sub mangle_cdata($)
my ($str)=@_;
$str =~ s/&/&/g;
$str =~ s/</</g;
$str =~ s/>/>/g;
$str =~ s!([\x01-\x1f])!sprintf "&#x%04X;", ord($1)!eg;
return $str;
sub mangle_attribute($)
my ($str)=@_;
$str =~ s/&/&/g;
$str =~ s/</</g;
$str =~ s/>/>/g;
$str =~ s!([\x01-\x1f\'\"])!sprintf "&#x%04X;", ord($1)!eg;
return $str;
sub parse_file($$)
my ($handler, $filename)=@_;
my $fh;
return undef if (!open($fh, "<", $filename));
my $file=join("", <$fh>);
$file =~ s%\s*\n\s*% %g;
$file =~ s%<\s*($XML::RegExp::Name)\s+%<\n$1\n%g;
$file =~ s%(</?|/?>)%\n$1\n%g;
my $last_start_tag;
my $element;
my $attributes;
my $cdata;
my $depth=0;
my $state="";
foreach my $line (split "\n", $file)
next if ($line =~ /^\s*$/);
if ($line eq "<")
elsif ($state eq "ignore")
# Ignore everything until the next start_element
elsif ($line eq "</")
elsif ($line =~ m%^/?>$%)
if ($state eq "attributes")
$handler->start_element($element, $attributes);
$state="end_element" if ($line eq "/>");
if ($state eq "end_element")
if (!defined $element)
$element =~ s/^-?[0-9]+://;
if (defined $cdata and $last_start_tag eq "$depth:$element")
$handler->cdata($element, $cdata);
elsif ($state eq "start_element")
if ($line =~ /^\?xml\s+/)
if ($line =~ /\bencoding\s*=\s*\"((?:[^\\\"]*|\\.)*)\"/)
my $encoding=unescape_string($1);
elsif ($line =~ s/^!--\s*//)
elsif ($line =~ /^[?!]/)
elsif ($state eq "end_element")
elsif ($state eq "attributes")
while ($line ne "")
if ($line =~ s/^\s*($XML::RegExp::Name)\s*=\s*\"([^\"]*)\"\s*// or
$line =~ s/^\s*($XML::RegExp::Name)\s*=\s*\'([^\']*)\'\s*//)
my $name=xml_decode($1);
my $value=xml_decode(unescape_string($2));
cxlog("unable to parse '$line' attribute(s)\n");
elsif ($state eq "cdata")
return 1;