#!/usr/bin/env perl
# -*- mode: perl -*-

# format - format XML Text using XSL Transformation
# Author: klm <klm at okowa dot org>
# Version: 2004-10-17 (v0.1)
# License: Perl's

my $usage = <<"__USAGE__";
usage: $0 [options] ...
  --style=/path/to/xslt
  --xsltproc=/path/to/xsltproc
  --parse_doctype
  --help
__USAGE__

use strict;
use warnings;
use Getopt::Long;

my $doctype_regex = qr/
    <!DOCTYPE \s+ [\w\:]+
              \s+ (PUBLIC|SYSTEM)
              \s+ \"([^\"]+)\"
           (?:\s+ \"([^\"]+)\")?
           (?:\s+ \[(.*?)\])?
              \s* > /x;

my $xsltproc      = "/usr/bin/xsltproc";
my $style         = "format.xsl";
my $encoding      = "utf-8";
my $parse_doctype = 1;
my $help          = 0;

GetOptions(
    "xsltproc=s"      => \$xsltproc,
    "encoding=s"      => \$encoding,
    "style=s"         => \$style,
    "parse_doctype=f" => \$parse_doctype,
    "help"            => \$help,
);
if($help) { print $usage; exit }


undef $/;
binmode STDIN;
binmode ARGV if @ARGV;
my $in = <>;

my %param;
$param{"xmldecl-encoding"} = $encoding;

if( $parse_doctype and $in =~ /$doctype_regex/o )
{
    my ($type, $pubid, $sysid, $content) = ($1, $2, $3, $4);
    if($pubid and $sysid)
    {
	$param{"doctype-public"} = $pubid;
	$param{"doctype-system"} = $sysid;
    }
    elsif($type eq 'PUBLIC')
    {
	$param{"doctype-public"} = $pubid;
    }
    elsif($type eq 'SYSTEM')
    {
	$param{"doctype-system"} = $pubid;
    }
    $param{"doctype-content"} = $content if $content;
}

my @param = map { ("--stringparam", $_, "'$param{$_}'") } keys %param;

binmode STDOUT;
open PIPE, "|-", $xsltproc, "--novalid", @param, $style, "-";
print PIPE $in;
close PIPE;
