#!/usr/bin/perl # Copyright (c) 2002-2007 Sampo Kellomaki (sampo@iki.fi). All Rights Reserved. # This is free software. You may distribute under GPL. NO WARRANTY. # $Id: xsd2sg.pl,v 1.33 2007-10-14 18:32:28 sampo Exp $ # 9.6.2005, Sampo Kellomaki # 15.7.2005, drastically improved the support for -s flag --Sampo # 16.8.2005, debugged for MM7 schema parsing --Sampo # 25.11.2005, added union() support --Sampo # 3.2.2006, added support for xs:redefine and mixed(1) --Sampo # 12.5.2006, added verbatim() file inclusion and inline xsd() features --Sampo # 27.5.2006, took a stab at adding some generation support --Sampo # 29.5.2006, complete redesign from basis of simple recursive descent parser --Sampo # 6.8.2006, continued adding code generation --Sampo # 8.8.2006, new initialization scheme for namespaces and tokens --Sampo # 26.8.2006, code generation debugging; fixes to simple elems --Sampo # 5.9.2006, several fixes to allow Liberty ID-FF 1.2 schemata to pass --Sampo # 14.9.2006, tweaks for ID-WSF 2.0 schemata to pass --Sampo # 23.9.2006, WO encoder support, improve namespaces, exc-c14n --Sampo # 15.10.2006, refactor sources to be per namespace --Sampo # 1.3.2007, tweaks to fix indigestion at people service --Sampo # 6.6.2007, DirectoryScript code generation --Sampo # 29.9.2007, Fixed encoding of attribute namespaces --Sampo # # XML schema to Schema Grammar converter (and beautifier) $usage = <schema.sg Usage: ./xsd2sg.pl [OPTIONS] -s schema.xsd Usage: ./xsd2sg.pl [OPTIONS] -gen FF -p PP -r N1:E1 -r N2:E1 -S schema1.sg schema2.sg >/dev/null -s Convert schema grammar (sg) to xsd (default is the other way around) -S Multigrammar coversion (for generation) -noverbatim Disable verbatim XSD includes -gen FF Generate datatypes, encoders, and decoders. Files are named with prefix FF -p PP Specify prefix used, in addition to ns, for datatypes of automatic code generation. -ext ns Consider namespace prefix ns to be externally satisfied dependency -r node Generate code starting from specified root node. -z zx Define zx prefix. -d Increase debugging level. -h Show this help -H Show summary of schema grammar format USAGE ; $format = < %tt Arrow (->) signifies reference to type that defines element or attribute xx: ... ; Colon (:) means that the definition of type follows immediately ee An element or attribute by itself means exactly one occurance is expected ee? Question mark (?) means the element or attribute is optional ee* Asterisk (*) means the element may appear from zero to infinite number of times (same as * in regular expressions) ee+ Plus (+) means the element must appear at least once, but may appear an infinite number of times (same as + in regular expressions) ee{x,y} The element must appear between x and y times (same as in regex) ee | ee The pipey symbol (|) means elements are mutually exclusive choices. ee ee Concatenation of elements or attributes means sequence base( t ) Introduce Extension base type (derive a type) redef( .. ) Redefine a type (using construct) mixed(1) Mark a complex type as having mixed content type, i.e. strings and elements alternate enum( ... ) Introduce enumeration of xs:strings ns( ... ) Introduce namespace (usually of any or @any) union( .. ) Union of types all ?? any xs:any, the XML arbitrary element extension mechanism @any xs:anyAttribute, the XML arbitrary attribute extension mechanism verbatim(f) Include a file verbatim to xsd output. xsd(x) Insert the x in verbatim to the xsd output. For example: target(demo, urn:demo.com:demo:0.1) import(foo, urn:demo.com:foo:0.2, foo.xsd) verbatim(copyright-annotation.xsf) ResourceID -> %disco:ResourceIDType // element definition by reference to type &ResourceIDGroup: ResourceID | EncryptedResourceID // group with choice (one of) ChangeFormat: enum( ChangedElements, CurrentElements ) @changeFormat: enum( ChangedElements CurrentElements All ) // xs:string base is implied ItemData -> %ItemDataType %ItemDataType: any* // 0-n any elements @id -> %xs:ID @itemIDRef -> %IDReferenceType @notSorted: enum( Now Never ) @changeFormat ; Query -> %QueryType %QueryType: &ResourceIDGroup Subscription? QueryItem*: base(ResultQueryType) @count -> xs:nonNegativeInteger @offset -> xs:nonNegativeInteger default(0) @setID -> %IDType @setReq: enum( Static DeleteSet ) Extension* @id -> xs:ID @any namespace( "##other") ; FORMAT ; $copyright_holder = 'Sampo Kellomaki (sampo@iki.fi)'; #$copyright_holder = 'Symlabs (symlabs@symlabs.com)'; $copyright_msg = <2; } sub sg_out { my ($x) = @_; $sg .= $x; warn "sg_out: $x" if $trace>2; } sub xs_out { my ($x) = @_; $xsd .= $x; warn "xs_out: $x" if $trace>2; } sub hdr_out { my ($x) = @_; $hdr .= $x; warn "hdr_out: $x" if $trace>2; } sub enc_out { my ($x) = @_; $enc .= $x; warn "enc_out: $x" if $trace>2; } sub dec_out { my ($x) = @_; $dec .= $x; warn "dec_out: $x" if $trace>2; } sub aux_out { my ($x) = @_; $aux .= $x; warn "aux_out: $x" if $trace>2; } sub getput_out { my ($x) = @_; $getput .= $x; warn "getput_out: $x" if $trace>2; } sub ds_out { my ($x) = @_; $dscode .= $x; warn "ds_out: $x" if $trace>2; } sub ns_out { my ($x) = @_; $nsout .= $x; warn "ns_out: $x" if $trace>2; } sub nsh_out { my ($x) = @_; $nshout .= $x; warn "nsh_out: $x" if $trace>2; } sub elems_gperf_out { my ($x) = @_; $elems_gperf .= $x; warn "elems_gperf_out: $x" if $trace>2; } sub attrs_gperf_out { my ($x) = @_; $attrs_gperf .= $x; warn "attrs_gperf_out: $x" if $trace>2; } sub reset_accumulators { $hdr = $enc = $dec = $aux = $getput = $dscode = ''; } %rename = ( case => 'case_is_c_keyword', signed => 'signed_is_c_keyword', VERSION => 'VERSION_is_Perl_MakeMaker_gobbled' ); die $usage if $ARGV[0] eq '-h'; die $format if $ARGV[0] eq '-H'; $nsix = 0; undef $/; $file = 'stdin'; while ($ARGV[0] eq '-d') { ++$trace; shift; } $zx = 'zx'; if ($ARGV[0] eq '-z') { shift; $zx = shift; } if ($ARGV[0] eq '-noverbatim') { ++$noverbatim; shift; } if ($ARGV[0] eq '-gen') { shift; $gen_prefix = shift; } if ($ARGV[0] eq '-p') { shift; $tx = shift; } while ($ARGV[0] eq '-ext') { shift; $ext_ns = shift; $ignore_ext{$ext_ns} = 1; } while ($ARGV[0] eq '-r') { shift; push @roots, shift; } if ($ARGV[0] eq '-s') { # simple grammar to schema mode $x = ; sg_to_xsd(); exit; } $ZX = uc $zx; if ($ARGV[0] eq '-S') { # Multifile grammar to schema mode (useful for generation) shift; for $file (@ARGV) { open F, $file or die "Cant read file($file): $!"; $x = ; close F; warn "Processing($file)" if $trace; sg_to_xsd(); } warn "Generating" if $trace; generate() if $gen_prefix; exit; } sub sg_to_xsd { $ns = ''; # 1 1 2 URL 2 if ($x =~ /target\((?:(\w+),\s*)?([^\)]+?)\)/) { $ns .= qq( targetNamespace="$2"\n); $ns .= qq( xmlns:$1="$2"\n) if length($1); ++$ns_siz; ns_out qq({ 0, 0, 0, 0, 0, 0, 0, sizeof("$1")-1, "$1", sizeof("$2")-1, "$2" },\n); nsh_out <%)|(?:&\@)|[.;,{}|\)\%&\@*+?!])/g; if ($trace) { for ($i = 0; $i <= $#tok; ++$i) { warn "sg $i: ($tok[$i])\n"; } } $i = 0; ++$i if $tok[$i] =~ /^\s*$/; sg_top(' '); # Work horse! Do actual conversion. $xsd =~ s%\n\s*\n\s*%%gs; # peep hole silly $xsd =~ s%]*?)>\s*%%gs; # peep hole print < $xsd XSD ; } sub fold_enum_value { my ($x) = @_; $x =~ s/([^A-Za-z0-9! ])/sprintf "0x%02x",ord($1)/gse; return $x; } sub sg_fold { my ($x,$tag) = @_; #warn "$tag($x)"; $x =~ s/(.)/sprintf "%02x",ord($1)/gsex; return qq{$tag($x)}; } sub sg_import { my ($prefix, $url, $path) = @_; sg_ns($prefix, $url) if length $prefix; xs_out qq( \n); return ''; } sub sg_include { my ($url) = @_; xs_out qq( \n); return ''; } sub sg_ns { my ($prefix, $url) = @_; $ns .= qq( xmlns:$prefix="$url"\n); die "Inconsistent namespace URIs for prefix($prefix) old($ns_tab{$prefix}) new($url) file($file)" if defined($ns_tab{$prefix}) && $ns_tab{$prefix} ne $url; warn "ns($prefix) uri($url) file($file)\n" if $trace; $ns_tab{$prefix} = $url; return ''; } #################################################### ### Schema Grammar to XML schema sub sg_top { my ($indent) = @_; while ($i <= $#tok) { warn $indent."sg_top $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; if ($tok[$i] eq '%') { sg_complexType($indent); } elsif ($tok[$i] eq '&') { sg_group($indent); } elsif ($tok[$i] eq '&@') { sg_attr_group($indent); } elsif ($tok[$i] eq '@') { sg_attr($indent); } elsif ($tok[$i] =~ /^[\w:]+$/) { sg_elem($indent); } elsif ($tok[$i] eq '!') { ++$i; # Divider between decls } elsif ($tok[$i] eq 'comment(') { ++$i; ($x = $tok[$i]) =~ s/(..)/chr(hex($1))/gex; xs_out qq{\n}; ++$i; die "Expected ) after comment $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } elsif ($tok[$i] eq 'sec(') { ++$i; xs_out qq{\n}; ++$i; die "Expected ) after sec $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } elsif ($tok[$i] eq 'endsec(') { ++$i; xs_out qq{\n}; ++$i; die "Expected ) after endsec $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } elsif ($tok[$i] eq 'verbatim(') { ++$i; unless ($noverbatim) { ($x = $tok[$i]) =~ s/(..)/chr(hex($1))/gex; open F, "<$x" or die "verbatim($x) not found: $!"; $x = ; close F; xs_out $x; } ++$i; die "Expected ) after verbatim $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } elsif ($tok[$i] eq 'xsd(') { ++$i; ($x = $tok[$i]) =~ s/(..)/chr(hex($1))/gex; xs_out $x; ++$i; die "Expected ) after xsd $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } else { die "Bad token $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)"; } } } sub sg_elem { my ($indent) = @_; my $ename = $tok[$i]; xs_out qq($indent1; $dt{'element'}{"$cur_ns$ename"} = sg_type_ref_or_def($indent, 'element'); } sub sg_attr { my ($indent) = @_; ++$i; warn $indent."sg_attr $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; my $aname = sg_get_word(); xs_out qq($indent1; my $gname = sg_get_word(); xs_out qq($indent1; my $agname = sg_get_word(); xs_out qq($indent1; if ($tok[$i + 1] eq 'redef(' && $tok[$i + 3] eq ')') { ($x = $tok[$i+2]) =~ s/(..)/chr(hex($1))/gex; xs_out qq($indent\n); $indent .= ' '; $redef = 1; $i += 3; die "Expected ) after redef name($name) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; $tok[$i] = '.'; warn $indent."sg_complexType after redef $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; } if ($tok[$i + 1] eq 'mixed(' && $tok[$i + 3] eq ')') { $mixed = qq( mixed="$tok[$i+2]"); $i += 3; die "Expected ) after mixed $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; $tok[$i] = '.'; warn $indent."sg_complexType after mixed $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; } warn $indent."sg_complexType BEFORE BASE DETECT $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; if (0 && $tok[$i + 1] eq 'base(' && $tok[$i + 3] eq ')' && $tok[$i+2] =~ /^xs:/) { # && $tok[$i + 4] eq ';' All derivations of simple base are still simple warn "simpleType in disguise($tok[$i+2])"; xs_out qq($indent\n); } } # Type specific handlers sub sg_any { my ($indent) = @_; ++$i; my ($occurs, $n_min, $n_max) = sg_occurs(); my ($ns, $pc); warn $indent."sg_any $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; if ($tok[$i] eq '!') { # ! is divider between decls die "What is this $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)"; } if ($tok[$i] eq 'ns(' || $tok[$i] eq 'processContents(') { while ($i <= $#tok) { if ($tok[$i] eq 'processContents(') { ++$i; my $w = sg_get_word(); $pc .= qq( processContents="$w"); die "Expected ) after ns decl $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; next; } elsif ($tok[$i] eq 'ns(') { ++$i; $ns .= qq( namespace="$tok[$i]"); ++$i; die "Expected ) after ns decl $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; next; } last; } } $ns = qq( namespace="##any") if !$ns; xs_out qq($indent\n); return "any\$$n_min\$$n_max\$$pc\$$ns"; } sub sg_any_attr { my ($indent) = @_; my ($occurs, $n_min); ++$i; $indent.warn "sg_any_attr $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; if ($tok[$i] eq '?') { $n_min = 0; $occurs = qq( use="optional"); ++$i; } else { $n_min = 1; $occurs = qq( use="required"); } # *** should process ns() if present # N.B. anyAttribute can not take occurs spec xs_out qq($indent\n); return "anyAttribute\$$n_min\$1"; } sub sg_enum { my ($indent, $close) = @_; ++$i; my $datarep = 'enum'; my $val = $tok[$i]; $val =~ s/0x([0-9a-f][0-9a-f])/chr(hex($1))/gse; # unfold xs_out qq($indent \n); xs_out qq($indent \n); xs_out qq($indent \n); ++$i; $datarep .= '$'. $val; warn $indent."sg_enum $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; while ($i <= $#tok && $tok[$i] ne ')') { die "Expected ! in enum $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq '!'; ++$i; $val = $tok[$i]; $val =~ s/0x([0-9a-f][0-9a-f])/chr(hex($1))/gse; # unfold xs_out qq($indent \n); ++$i; $datarep .= '$' . $val; } xs_out qq($indent \n); xs_out qq($indent \n); xs_out qq($indent\n); die "Expected ) after enum type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; die "Expected ; after enum type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ';'; ++$i; return $datarep; } sub sg_union { my ($indent, $close) = @_; ++$i; my $datarep = 'union'; xs_out qq($indent \n); warn $indent."sg_union $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; my $val = ''; for (; $i <= $#tok && $tok[$i] ne ')'; ++$i) { $val .= $tok[$i] . ' '; $datarep .= '$' . $tok[$i]; } chop $val; xs_out qq($indent \n); xs_out qq($indent \n); xs_out qq($indent\n); die "Expected ) after union type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; die "Expected ; after union type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ';'; ++$i; return $datarep; } sub sg_choice { my ($indent, $close) = @_; my $datarep = 'choice'; xs_out qq($indent \n); my $alt = sg_get_word(); # *** groups can be members of choice as well? xs_out qq($indent \n); $datarep .= '$' . $alt; warn $indent."sg_choice $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; while ($i <= $#tok && $tok[$i] ne ';') { die "Expected | in alternate $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" if $tok[$i] ne '|'; ++$i; my $alt = sg_get_word(); # *** groups can be members of choice as well? xs_out qq($indent \n); $datarep .= '$' . $alt; } xs_out qq($indent \n); xs_out qq($indent\n); die "Expected ; after choice $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ';'; ++$i; return $datarep; } sub sg_type_def { my ($indent, $close) = @_; my ($base_type, $closed, $basecontent); my @data_ary; die "Expected . (:) before type def ($close) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq '.'; ++$i; xs_out ">\n"; warn $indent."sg_type_def($close) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; return sg_choice($indent, $close) if $tok[$i + 1] eq '|'; return sg_enum($indent, $close) if $tok[$i] eq 'enum('; return sg_union($indent, $close) if $tok[$i] eq 'union('; if ($close ne 'complexType' && $close ne 'attributeGroup' && $close ne 'simpleType') { xs_out qq($indent \n); $indent .= ' '; } else { $indent .= ' '; } if ($tok[$i] eq 'base(') { ++$i; $base_type = $tok[$i]; ++$i; die "Expected ) after base type ($close) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; if ($base_type =~ /string$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /base64Binary$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /[Ss]hort$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /nteger$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /boolean$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /anyURI$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /date$/) { $basecontent = 'simpleContent'; } elsif ($base_type =~ /gMonthDay$/) { $basecontent = 'simpleContent'; } else { $basecontent = 'complexContent'; } if ($basecontent eq 'simpleContent') { push @data_ary, "_d\$\$0\$1\$$base_type"; # pseudo child element to represent content } else { push @data_ary, "base\$\$0\$1\$$base_type"; # generate reference to base ++$needed_complexType{$base_type}; } xs_out qq($indent\n); xs_out qq($indent \n); $indent .= ' '; ++$i; } xs_out "$indent\n"; warn $indent."sg_type_def($close) sequence $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; while ($i <= $#tok && $tok[$i] ne ';') { if ($tok[$i] eq 'any') { push @data_ary, sg_any(" $indent"); } elsif ($tok[$i] =~ /^[\w:]+$/) { push @data_ary, sg_ref_def_or_nada(" $indent", 'element'); } elsif ($tok[$i] eq '&') { ++$i; push @data_ary, sg_ref_def_or_nada(" $indent", 'group'); } elsif ($tok[$i] eq '&@') { xs_out "$indent\n" if !$closed; $closed = 1; ++$i; push @data_ary, sg_ref_def_or_nada($indent, 'attributeGroup'); } elsif ($tok[$i] eq '@') { xs_out "$indent\n" if !$closed; $closed = 1; ++$i; if ($tok[$i] eq 'any') { push @data_ary, sg_any_attr($indent); } else { push @data_ary, sg_ref_def_or_nada($indent, 'attribute'); } } elsif ($tok[$i] eq '!') { ++$i; # Divider between sequence elems } else { die "Bad token in type def ($close) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)"; } } xs_out "$indent\n" if !$closed; if ($base_type) { substr($indent, -4) = ''; xs_out qq($indent \n); xs_out qq($indent\n); } if ($close ne 'complexType' && $close ne 'attributeGroup' && $close ne 'simpleType') { substr($indent, -4) = ''; xs_out qq($indent \n); } else { substr($indent, -2) = ''; } xs_out "$indent\n"; ++$i; return \@data_ary; } sub sg_type_ref_or_def { my ($indent, $close) = @_; warn $indent."sg_type_ref_or_def $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; if ($tok[$i] eq '->%') { return sg_type_ref(); } elsif ($tok[$i] eq '.') { return sg_type_def($indent, $close); } else { die "Bad token in $close $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)"; } } sub sg_type_ref { die "Expected ->% before complexType $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq '->%'; ++$i; warn "sg_type_ref $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; my $default; my $r = sg_get_word(); if ($tok[$i] eq '!' && $tok[$i+1] eq 'default(') { ++$i; ++$i; $default = qq( default="$tok[$i]"); ++$i; die "Expected ) after default $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" unless $tok[$i] eq ')'; ++$i; } xs_out qq( type="$r"$default/>\n); return "ref\$$r\$$default"; } sub sg_ref_def_or_nada { my ($indent, $tag) = @_; my ($occurs, $n_min, $n_max); warn $indent."sg_ref_def_or_nada($tag) $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])\n" if $trace>1; my $name = sg_get_word(); if ($tag eq 'attribute') { $n_max = 1; if ($tok[$i] eq '?') { $n_min = 0; $occurs = qq( use="optional"); ++$i; } else { $n_min = 1; $occurs = qq( use="required"); } } elsif ($tag eq 'attributeGroup') { $occurs = ''; } else { ($occurs, $n_min, $n_max) = sg_occurs(); } if ($tok[$i] eq '.') { # inline type definition xs_out qq($indent%') { # reference to a type xs_out qq($indent\n); return "$tag\$$name\$$n_min\$$n_max\$nada"; } } sub sg_occurs { if ($tok[$i] eq '?') { ++$i; return (qq(\tminOccurs="0" maxOccurs="1"), 0, 1); } elsif ($tok[$i] eq '*') { ++$i; return (qq(\tminOccurs="0" maxOccurs="unbounded"), 0, -1); } elsif ($tok[$i] eq '+') { ++$i; return (qq(\tminOccurs="1" maxOccurs="unbounded"), 1, -1); } elsif ($tok[$i] eq '{') { ++$i; my $b = sg_get_word(); die "Expected , in occurs $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" if $tok[$i] ne ','; ++$i; if ($tok[$i] eq '}') { ++$i; return qq(\tminOccurs="$b" maxOccurs="unbounded"); } my $e = sg_get_word(); die "Expected } in occurs $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" if $tok[$i] ne '}'; ++$i; return (qq(\tminOccurs="$b" maxOccurs="$e"), $b, $e); } return (qq(\tminOccurs="1" maxOccurs="1"), 1, 1); # Exactly once if no quantifier was specified } sub sg_get_word { die "Expected a word token $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) file($file)" if $tok[$i] !~ /^[\w:]+$/; return $tok[$i++]; } #################################################### ### XML schema to SG ### Lexical analysis: split the blob to tokens sub fold_attr_value { my ($x) = @_; $x =~ s/ /\\x20/gs; return $x; } $x = ; $x =~ s%<\?xml.*?>%%gs; $x =~ s%%%gs; $x =~ s%<((\w+:)?documentation)>(.*?)%%gs; # Zap pesky documentation $x =~ s%%%gs; $x =~ s%([/?]?>)% $1 %gs; # We need to see close tag angle bracket as a separate token $x =~ s%=(['"])([^"'].*?)\1%"=$1" . fold_attr_value($2) . $1 %gse; #'; #"; #warn $x; @tok = split /\s+/s, $x; if ($trace) { for ($i = 0; $i <= $#tok; ++$i) { warn "$i: ($tok[$i])\n"; } } $i = 0; ++$i if $tok[$i] =~ /^\s*$/; ### ### Top down recursive descent parser for XML schemas ### top_decls(''); # start the parser ball rolling sub top_decls { my ($indent) = @_; scan_pi($indent) if $tok[$i] =~ /^<\?xml$/; schema($indent, $1) if $tok[$i] =~ /^<(\w+:)?schema$/; } sub scan_pi { my ($indent) = @_; attrs(); ++$i until $tok[$i] eq '?>'; ++$i; } sub schema { my ($indent, $tagns) = @_; die "schema element expected $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])" if $tok[$i] !~ /^<(\w+:)?schema$/; my $at = attrs($indent.' '); ++$i; sg_out("target($$at{'targetNamespace'})\n") if $$at{'targetNamespace'}; while ($i <= $#tok && $tok[$i] !~ m%$/; # Compute occurs as regexp quantifier character my $min = $at{'minOccurs'}; $min = 1 if !defined $min; my $max = $at{'maxOccurs'}; $max = 1 if !defined $max; if ($min == 0) { if ($max eq 'unbounded') { $at{'occurs'} = '*'; } elsif ($max == 1) { $at{'occurs'} = '?'; } else { $at{'occurs'} = "{$min,$max}"; } } elsif ($min == 1) { if ($max eq 'unbounded') { $at{'occurs'} = '+'; } elsif ($max == 1) { $at{'occurs'} = ''; # exactly once } else { $at{'occurs'} = "{$min,$max}"; } } else { $at{'occurs'} = "{$min,$max}"; } return \%at; } sub namespace { my ($ns, $uri) = @_; $ns{$ns} = $uri; sg_out("ns($ns,$uri)\n") unless $ns eq 'xs'; } sub close_tag { my ($indent,$tag) = @_; die "missing close $tag $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])" if $tok[$i] !~ m%^'; ++$i; } sub process_empty_element { my ($indent, $tag) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '>') { ++$i; if ($tok[$i] =~ /^<(\w+:)?annotation$/) { annotation($indent); } close_tag($indent, $tag); } else { die "junk at end of $tag $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])" if $tok[$i] ne '/>'; ++$i; } return $at; } sub import_decl { my ($indent) = @_; my $at = process_empty_element($indent, 'import'); sg_out($indent."import($$at{'namespace'},$$at{'schemaLocation'})\n"); } sub include { my ($indent) = @_; my $at = process_empty_element($indent, 'include'); sg_out($indent."include($$at{'schemaLocation'})\n"); } sub enumeration { my ($indent) = @_; my $at = process_empty_element($indent, 'enumeration'); return $$at{'value'}; } sub any { my ($indent) = @_; my $at = process_empty_element($indent, 'any'); my $ns = defined($$at{'namespace'}) && $$at{'namespace'} ne '##any' ? ' ns(' . $$at{'namespace'} . ')' : ''; $ns .= ' processContents(' . $$at{'processContents'} . ')' if defined $$at{'processContents'}; sg_out($indent.'any' . $$at{'occurs'} . $ns . "\n"); } sub anyAttribute { my ($indent) = @_; my $at = process_empty_element($indent, 'any'); my $ns = defined($$at{'namespace'}) && $$at{'namespace'} ne '##other' ? ' ns(' . $$at{'namespace'} . ')' : ''; sg_magic_newline(); sg_out($indent . '@any' . $ns . "\n"); } sub union { my ($indent) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '>') { ++$i; close_tag($indent, $tag); } if ($tok[$i] eq '/>') { ++$i; } else { die "junk at end of union $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } # *** what does UNION mean? Could it be expressed using minOccurs=0 instead? sg_out($indent . "union($$at{'memberTypes'})\n"); } sub annotation { my ($indent) = @_; attrs($indent.' '); ++$i; documentation($indent) if $tok[$i] =~ /^<(\w+:)?documentation$/; close_tag($indent, 'annotation'); } sub documentation { my ($indent) = @_; attrs($indent.' '); ++$i; # *** the text of the documentation goes here, but see # *** documentation elimination substitution above close_tag($indent, 'documentation'); } sub sg_magic_semicolon { my ($indent) = @_; if (substr($sg,-1) eq "\n") { sg_out($indent." ;\n"); } else { sg_out(" ;\n"); } } sub sg_magic_newline { if (substr($sg,-1) ne "\n") { sg_out("\n"); } } sub element { my ($indent) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '>') { ++$i; if ($$at{'ref'}) { sg_out($indent . $$at{'ref'} . $$at{'occurs'} . "\n"); annotation($indent) if $tok[$i] =~ /^<(\w+:)?annotation$/; } elsif ($$at{'type'}) { sg_out($indent . $$at{'name'} . $$at{'occurs'} . "\t -> \%" . $$at{'type'}. "\n"); annotation($indent) if $tok[$i] =~ /^<(\w+:)?annotation$/; } else { sg_out($indent . $$at{'name'} . $$at{'occurs'} . ':'); annotation($indent) if $tok[$i] =~ /^<(\w+:)?annotation$/; complex_or_simple_type($indent.' '); sg_magic_semicolon($indent); } close_tag($indent, 'element'); } elsif ($tok[$i] eq '/>') { ++$i; if ($$at{'ref'}) { sg_out($indent . $$at{'ref'} . $$at{'occurs'} . "\n"); } elsif ($$at{'abstract'}) { sg_out($indent . $$at{'name'} . $$at{'occurs'} . "\n"); } elsif ($$at{'type'}) { sg_out($indent . $$at{'name'} . $$at{'occurs'} . "\t -> \%" . $$at{'type'}. "\n"); } elsif ($$at{'name'}) { sg_out($indent . $$at{'name'} . $$at{'occurs'} . "\n"); } else { die "Expected ref or type XML attribute $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])" . Dumper($at); } } else { die "junk at end of element $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } sub attribute { my ($indent) = @_; my ($use_ind); my $at = attrs($indent.' '); sg_magic_newline(); #warn "HERE $i: ($tok[$i-1]) ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) ($tok[$i+1])"; if ($$at{'use'} eq 'required' || $indent eq '') { $use_ind = ''; } else { $use_ind = '?'; } if ($tok[$i] eq '>') { ++$i; sg_out($indent . '@' . $$at{'name'} . $use_ind . ':'); if (complex_or_simple_type($indent.' ')) { sg_magic_semicolon($indent); close_tag($indent, 'attribute'); return; } else { # Must have been just an attribute, fall thru to normal handling close_tag($indent, 'attribute'); } } elsif ($tok[$i] eq '/>') { ++$i; } else { die "junk at end of attribute $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } if ($$at{'ref'}) { sg_out($indent . '@' . $$at{'ref'} . $use_ind . "\n"); } else { my $default = defined($$at{'default'}) ? ' default (' . $$at{'default'} . ')' : ''; sg_out($indent . '@' . $$at{'name'} . $use_ind . "\t -> \%" . $$at{'type'} . $default . "\n"); } } sub complex_or_simple_type { my ($indent) = @_; while ($tok[$i] !~ /^<\//) { if ($tok[$i] =~ /^<(\w+:)?complexType$/) { complexType($indent); return 1; } elsif ($tok[$i] =~ /^<(\w+:)?simpleType$/) { simpleType($indent); return 1; } elsif ($tok[$i] =~ /^<(\w+:)?annotation$/) { annotation($indent); } else { die "Expected complex or simple type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } return 0; } sub complexType { my ($indent) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '/>') { ++$i; return; } ++$i; # '>' if ($$at{'name'}) { sg_out($indent.'%'.$$at{'name'}.':'); $indent .= ' '; } while ($i <= $#tok && $tok[$i] !~ m%') { ++$i; sg_out($indent . '&' . $$at{'name'} . $$at{'occurs'}. ": "); if ($tok[$i] =~ /^<(\w+:)?sequence$/) { sequence($indent.' '); } elsif ($tok[$i] =~ /^<(\w+:)?choice$/) { choice($indent.' '); } else { die "Expected complex or simple type $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } sg_magic_semicolon($indent); close_tag($indent, 'group'); } elsif ($tok[$i] eq '/>') { ++$i; sg_out($indent . '&' . $$at{'ref'} . $$at{'occurs'}. "\n"); } else { die "junk at end of group $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } #warn "HERE $i: ($tok[$i-1]) ($tok[$i-1]) ($tok[$i]) ($tok[$i+1]) ($tok[$i+1])"; } sub attributeGroup { my ($indent) = @_; my $at = attrs($indent.' '); sg_magic_newline(); if ($tok[$i] eq '>') { ++$i; sg_out($indent . '&@' . $$at{'name'} . $$at{'occurs'}. ": "); while ($i <= $#tok && $tok[$i] !~ m%') { ++$i; sg_out($indent . '&@' . $$at{'ref'} . $$at{'occurs'}. "\n"); } else { die "junk at end of attributeGroup $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } sub simpleContent { my ($indent) = @_; attrs($indent.' '); ++$i; if ($tok[$i] =~ /^<(\w+:)?extension$/) { extension($indent); } elsif ($tok[$i] =~ /^<(\w+:)?restriction$/) { restriction($indent); } else { die "Expected extension or restriction in simpleContent $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } close_tag($indent, 'simpleContent'); } sub complexContent { my ($indent) = @_; attrs($indent.' '); ++$i; while ($i <= $#tok && $tok[$i] !~ m%') { ++$i; if ($i <= $#tok && $tok[$i] !~ m%<(\w+:)?enumeration$%) { sg_out("\t base(" . $$at{'base'} . ')'); } while ($i <= $#tok && $tok[$i] !~ m%') { ++$i; sg_out("\t base(" . $$at{'base'} . ')'); } else { die "junk at end of restriction $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } sub maxLength { my ($indent) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '>') { close_tag($indent, 'maxLength'); } elsif ($tok[$i] eq '/>') { ++$i; } else { die "junk at end of maxLength $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } sub list { my ($indent) = @_; my $at = attrs($indent.' '); if ($tok[$i] eq '>') { close_tag($indent, 'list'); } elsif ($tok[$i] eq '/>') { ++$i; } else { die "junk at end of list $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } sg_out("\t" . $$at{'itemType'} . "*"); } sub extension { my ($indent) = @_; my $at = attrs($indent.' '); sg_out("\t base(" . $$at{'base'} . ")"); if ($tok[$i] eq '>') { ++$i; while ($i <= $#tok && $tok[$i] !~ m%') { ++$i; } else { die "junk at end of extension $i: ($tok[$i-1]) ($tok[$i]) ($tok[$i+1])"; } } print $sg; ################################################################### ### Data type expansion and code generation ### ### Prior to code generation pass, the sg-to-xsd generation pass was ### invoked. As a side effect, that pass has populated the %dt hash. ### ### $dt{'element'}{"$cur_ns$ename"} branch describes the data type ### of each named element. The datatype can be an array for a ### complex element composed of multiple subelements and attributes: ### Each element of the array follows the format "op$type$additional$fields" ### where op describes what type of object, elememt or attribute, is being ### described. At intermediate stages of processing also complexType, ### group, or attributeGroup can appear, but they will get expanded ### to the constituent elements during the process. ### ### Noncomplex elements contain directly the description, such as "ref$xs:string" ### meaning the element is simple and of type xs:string. ### ### $dt{'attribute'}{"$cur_ns$aname"} branch describes datatype of ### each attribute. ### ### $dt{'group'}{"$cur_ns$gname"} branch describes datatype of group ### $dt{'attributeGroup'}{"$cur_ns$agname"} branch describes datatype of group ### $dt{'complexType'}{"$cur_ns$ename"} branch describes datatype of group ### ### $cur_ns has the current namespace prefix, with colon: e.g. "xs:" sub expand_attributeGroup { my ($ag) = @_; my $i; for ($i = 0; $i <= $#{$dt{'attributeGroup'}{$ag}}; ++$i) { my ($op, $name, $n_min, $n_max) = split /\$/, $dt{'attributeGroup'}{$ag}[$i]; next if $op eq 'attribute'; next if $op eq 'anyAttribute'; die "bad op($op) ($ag) ag($dt{'attributeGroup'}{$ag}[$i])" if $op ne 'attributeGroup'; expand_attributeGroup($name); splice @{$dt{'attributeGroup'}{$ag}}, $i, 1, @{$dt{'attributeGroup'}{$name}}; } } sub expand_group { my ($ag) = @_; my $i; for ($i = 0; $i <= $#{$dt{'group'}{$ag}}; ++$i) { my ($op, $name, $n_min, $n_max) = split /\$/, $dt{'group'}{$ag}[$i]; if ($op eq 'complexType') { expand_complexType($name); splice @{$dt{'group'}{$ag}}, $i, 1, @{$dt{'complexType'}{$name}}; } elsif ($op eq 'group') { expand_group($name); splice @{$dt{'group'}{$ag}}, $i, 1, @{$dt{'group'}{$name}}; } elsif ($op eq 'attributeGroup') { expand_attributeGroup($name); splice @{$dt{'group'}{$ag}}, $i, 1, @{$dt{'attributeGroup'}{$name}}; } } } sub expand_complexType { my ($ag) = @_; my $i; for ($i = 0; $i <= $#{$dt{'complexType'}{$ag}}; ++$i) { my ($op, $name, $n_min, $n_max) = split /\$/, $dt{'complexType'}{$ag}[$i]; if ($op eq 'complexType') { expand_complexType($name); splice @{$dt{'complexType'}{$ag}}, $i, 1, @{$dt{'complexType'}{$name}}; } elsif ($op eq 'group') { expand_group($name); splice @{$dt{'complexType'}{$ag}}, $i, 1, @{$dt{'group'}{$name}}; } elsif ($op eq 'attributeGroup') { expand_attributeGroup($name); splice @{$dt{'complexType'}{$ag}}, $i, 1, @{$dt{'attributeGroup'}{$name}}; } } } sub expand_element { my ($ag) = @_; my ($i, $el, $nam); ($el = $ag) =~ s/[:-]/_/g; #warn "expand el($ag)" . Dumper($dt{'element'}{$ag}); if (ref $dt{'element'}{$ag} eq '') { # scalar my ($op, $name, $n_min, $n_max) = split /\$/, $dt{'element'}{$ag}; if ($op eq 'ref') { #warn "expand ref el($ag) name($name) cx($dt{'complexType'}{$name})"; $dt{'element'}{$ag} = $dt{'complexType'}{$name} if $dt{'complexType'}{$name}; } elsif ($op eq 'enum') { $dt{'element'}{$ag} = 'ref$xs:string'; } elsif ($op eq 'union') { $dt{'element'}{$ag} = 'ref$xs:string'; } else { die "bad op($op)"; } } # Now complex types and groups for ($i = 0; $i <= $#{$dt{'element'}{$ag}}; ++$i) { my ($op, $name, $n_min, $n_max,$type) = split /\$/, $dt{'element'}{$ag}[$i]; ($nam = $name) =~ s/[:-]/_/g; $refby{$nam} .= " $tx${el}_s"; #warn "REFBY($nam): $el"; if ($op eq 'complexType') { expand_complexType($name); splice @{$dt{'element'}{$ag}}, $i, 1, @{$dt{'complexType'}{$name}}; redo; # recheck the expansion for complex types } elsif ($op eq 'group') { expand_group($name); splice @{$dt{'element'}{$ag}}, $i, 1, @{$dt{'group'}{$name}}; redo; # recheck the expansion } elsif ($op eq 'attributeGroup') { expand_attributeGroup($name); splice @{$dt{'element'}{$ag}}, $i, 1, @{$dt{'attributeGroup'}{$name}}; redo; # recheck the expansion } elsif ($op eq 'base') { #warn "expanding base($type)"; splice @{$dt{'element'}{$ag}}, $i, 1, (); # remove base indicator # Add contents of base always to beginning so that types derived # from same base are isomorphic wrt to the fields from the base. splice @{$dt{'element'}{$ag}}, 0, 0, @{$dt{'complexType'}{$type}}; $i = -1; # restart checking: base may have a base or comlpex types } } # *** Exclusive XML canonicalization rules dictate that attributes # are sorted alphabetically. } sub trivial_complexType_check { my ($name) = @_; if ($#{$dt{'element'}{$name}} == 0) { # check for trivial complexType my ($op, $name1, undef, undef, $type) = split /\$/, $dt{'element'}{$name}[0]; if ($op eq '_d') { #warn "Collapsing trivial complexType($name) to type($type)"; return $type; } elsif ($op eq 'any') { # *** } elsif ($op eq 'element') { #die "*** not thought out"; #$dt{'element'}{$name} = $dt{'element'}{$name}[0]; # *** naming conflicts? } elsif ($op eq 'attribute') { #die "*** not thought out"; #$dt{'element'}{$name} = $dt{'element'}{$name}[0]; } elsif ($op eq 'anyAttribute') { } else { die "Bad op($op) name($name) name1($name1)"; } } return $undef; } #################################################################### ### Code generation (mainly C) ### sub gen_element { my ($el, $def, $rootp) = @_; my ($i, $attrs, $elems, $attrs_so_len, $attrs_wo_len, $elems_so_len, $elems_wo_len, $attrs_so_enc, $attrs_wo_enc, $elems_so_enc, # $len_wo, $enc_wo, MUST NOT be local! $attrs_dup_strs, $elems_dup_strs, $attrs_free, $elems_free, $attrs_clone, $elems_clone, $xmlns_so_enc, $xmlns_so_len, $xmlns_wo_enc, $xmlns_wo_len, $attrs_walk_so, $elems_walk_so, $attrs_walk_wo, $elems_walk_wo, $getput_attr_get_hdrs, $getput_get_hdrs, $getput_num_hdrs, $getput_pop_hdrs, $getput_push_hdrs, $getput_attr_put_hdrs, $getput_put_hdrs, $getput_add_hdrs, $getput_del_hdrs, $getput_rev_hdrs, $ds_attr_parse, $ds_attr_build, $ds_elem_parse, $ds_elem_build, $field, $type, %fields, @attrs); return unless ref $def eq 'ARRAY'; my ($ns, $tag) = $el =~ /^(?:(\w+):)?([\w-]+)$/; $el =~ s/[:-]/_/g; if ($ns) { elems_gperf_out qq($tag, "$ns", ${tx}ns_tab + ${tx}xmlns_ix_$ns\n); } else { elems_gperf_out qq($tag, "$ns", 0\n); } my %seen_ns = ($ns => undef); # NS appears by virtue of tag itself my $n_decode = $rootp ? ', int n_decode' : ''; hdr_out <g.n = &x->${field}->g; x->${field} = el; break; DEC ; $elems_so_len .= <${field}; se; se = (struct ${zx}_elem_s*)se->g.n) len += ${tx}LEN_SO_simple_elem(c,se, sizeof("${name}")-1, ${tx}ns_tab+${tx}xmlns_ix_$name_ns); ENC ; $elems_wo_len .= <${field}; se; se = (struct ${zx}_elem_s*)se->g.n) len += ${tx}LEN_WO_simple_elem(c, se, sizeof("${name_tag}")-1); ENC ; $elems_so_enc .= <${field}; se; se = (struct ${zx}_elem_s*)se->g.n) p = ${tx}ENC_SO_simple_elem(c, se, p, "${name}", sizeof("${name}")-1, ${tx}ns_tab+${tx}xmlns_ix_$name_ns); ENC ; $len_wo .= <gg\.g/-\>g/gs; getput_out $x; $getput_num_hdrs .= "int ${tx}${el}_NUM_${field}(struct $tx${el}_s* x);\n"; $getput_get_hdrs .= "struct ${zx}_elem_s* ${tx}${el}_GET_${field}(struct $tx${el}_s* x, int n);\n"; $getput_pop_hdrs .= "struct ${zx}_elem_s* ${tx}${el}_POP_${field}(struct $tx${el}_s* x);\n"; $getput_push_hdrs .= "void ${tx}${el}_PUSH_${field}(struct $tx${el}_s* x, struct ${zx}_elem_s* y);\n"; $getput_rev_hdrs .= "void ${tx}${el}_REV_${field}(struct $tx${el}_s* x);\n"; $getput_put_hdrs .= "void ${tx}${el}_PUT_${field}(struct $tx${el}_s* x, int n, struct ${zx}_elem_s* y);\n"; $getput_add_hdrs .= "void ${tx}${el}_ADD_${field}(struct $tx${el}_s* x, int n, struct ${zx}_elem_s* z);\n"; $getput_del_hdrs .= "void ${tx}${el}_DEL_${field}(struct $tx${el}_s* x, int n);\n"; hdr_out " struct ${zx}_elem_s* ${field};\t/* {$n_min,$n_max} $type */\n"; if ($n_max == 1) { $ds_elem_parse .= " POPTAG_RAW(x[2], '${field}', data.${field});\n"; $ds_elem_build .= " PUSHTAG_RAW(body, '${field}', data.${field});\n"; } else { $ds_elem_parse .= <g.n = &x->${field}->gg.g; x->${field} = (struct $tx${ns_name}_s*)el; break; DEC ; # When canonicalizing for signature verification, # the embedded signature is to be excluded. if ($ns_name eq 'ds_Signature') { $elems_so_len .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) if (e != c->exclude_sig) len += ${tx}LEN_SO_${ns_name}(c, e); } ENC ; $elems_wo_len .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) if (e != c->exclude_sig) len += ${tx}LEN_WO_${ns_name}(c, e); } ENC ; $elems_so_enc .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) if (e != c->exclude_sig) p = ${tx}ENC_SO_${ns_name}(c, e, p); } ENC ; $len_wo .= <exclude_sig) ? ${tx}LEN_WO_${ns_name}(c, (struct $tx${ns_name}_s*)x) : 0; ENC_WO ; $enc_wo .= <exclude_sig) ? ${tx}ENC_WO_${ns_name}(c, (struct $tx${ns_name}_s*)x, p) : p; ENC_WO ; } else { $elems_so_len .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) len += ${tx}LEN_SO_${ns_name}(c, e); } ENC ; $elems_wo_len .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) len += ${tx}LEN_WO_${ns_name}(c, e); } ENC ; $elems_so_enc .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) p = ${tx}ENC_SO_${ns_name}(c, e, p); } ENC ; $len_wo .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) ${tx}DUP_STRS_${ns_name}(c, e); } DUP ; $elems_free .= <${field}; e; e = en) { en = (struct $tx${ns_name}_s*)e->gg.g.n; ${tx}FREE_${ns_name}(c, e, free_strs); } } FREE ; $elems_clone .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) { en = ${tx}DEEP_CLONE_${ns_name}(c, e, dup_strs); if (!enn) x->${field} = en; else enn->gg.g.n = &en->gg.g; enn = en; } } CLONE ; $elems_walk_so .= <${field}; e; e = (struct $tx${ns_name}_s*)e->gg.g.n) { ret = ${tx}WALK_SO_${ns_name}(c, e, ctx, callback); if (ret) return ret; } } WALKSO ; $x = $getput_subtempl; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/FNAME/$field/g; $x =~ s/ELTYPE/$tx$el/g; $x =~ s/FTYPE/$tx$ns_name/g; getput_out $x; $getput_num_hdrs .= "int ${tx}${el}_NUM_${field}(struct $tx${el}_s* x);\n"; $getput_get_hdrs .= "struct $tx${ns_name}_s* ${tx}${el}_GET_${field}(struct $tx${el}_s* x, int n);\n"; $getput_pop_hdrs .= "struct $tx${ns_name}_s* ${tx}${el}_POP_${field}(struct $tx${el}_s* x);\n"; $getput_push_hdrs.= "void ${tx}${el}_PUSH_${field}(struct $tx${el}_s* x, struct $tx${ns_name}_s* y);\n"; $getput_rev_hdrs .= "void ${tx}${el}_REV_${field}(struct $tx${el}_s* x);\n"; $getput_put_hdrs .= "void ${tx}${el}_PUT_${field}(struct $tx${el}_s* x, int n, struct $tx${ns_name}_s* y);\n"; $getput_add_hdrs .= "void ${tx}${el}_ADD_${field}(struct $tx${el}_s* x, int n, struct $tx${ns_name}_s* z);\n"; $getput_del_hdrs .= "void ${tx}${el}_DEL_${field}(struct $tx${el}_s* x, int n);\n"; hdr_out " struct $tx${ns_name}_s* ${field};\t/* {$n_min,$n_max} $type */\n"; if ($n_max == 1) { $ds_elem_parse .= <g.n = &x->${field}->g; x->${field} = ss; ${ZX}_ATTR_DEC_EXT(ss); break; DEC ; $attrs_so_len .= " len += ${zx}_attr_so_len(x->$field, sizeof(\"$name\")-1);\n"; $attrs_wo_len .= " len += ${zx}_attr_wo_len(x->$field, sizeof(\"$name_tag\")-1);\n"; $attrs_so_enc .= qq{ p = ${zx}_attr_so_enc(p, x->$field, " $name=\\"", sizeof(" $name=\\"")-1);\n}; #$attrs_so_enc .= qq{ p = ${zx}_attr_so_enc(p, x->$field, " $ns_name=\\"", sizeof(" $ns_name=\\"")-1);\n}; #$attrs_so_enc .= qq{ p = ${zx}_attr_so_enc(p, x->$field, " $name_tag=\\"", sizeof(" $name_tag=\\"")-1);\n}; $attrs_wo_enc .= qq{ p = ${zx}_attr_wo_enc(p, x->$field, "$name_tag=\\"", sizeof("$name_tag=\\"")-1);\n}; $attrs_dup_strs .= " ${zx}_dup_attr(c, x->$field);\n"; $attrs_free .= " ${zx}_free_attr(c, x->$field, free_strs);\n"; $attrs_clone .= " x->$field = ${zx}_clone_attr(c, x->$field);\n"; getput_out <${field}; } /* FUNC(${tx}${el}_PUT_${field}) */ void ${tx}${el}_PUT_${field}(struct $tx${el}_s* x, struct ${zx}_str* y) { x->${field} = y; } GETPUT ; $getput_attr_get_hdrs .= "struct ${zx}_str* ${tx}${el}_GET_${field}(struct $tx${el}_s* x);\n"; $getput_attr_put_hdrs .= "void ${tx}${el}_PUT_${field}(struct $tx${el}_s* x, struct ${zx}_str* y);\n"; hdr_out " struct ${zx}_str* ${field};\t/* {$n_min,$n_max} attribute $type */\n"; $ds_attr_parse .= " POPATTR(x[1], '$ns_name', data.$ns_name);\n"; $ds_attr_build .= " PUSHATTR(attr, '$ns_name', data.$ns_name);\n"; } hdr_out <gg.g.tok = tok; return x; }/g; } else { $x =~ s/ROOT_N_DECODE//g; $x =~ s/ROOT_CHECK_N_DECODED;//g; } dec_out $x; # XMLNS checks. Sort by ns prefix (unlike attributes which use URI). for $ns (sort keys %seen_ns) { if (!defined $seen_ns{$ns}) { # Special case: tag's own namespace $xmlns_so_len .= qq{ len += ${zx}_len_xmlns_if_not_seen(c, ${tx}ns_tab+${tx}xmlns_ix_$ns, &pop_seen);\n}; $xmlns_wo_len .= qq{ len += ${zx}_len_xmlns_if_not_seen(c, x->gg.g.ns, &pop_seen);\n}; $xmlns_so_enc .= qq{ p = ${zx}_enc_xmlns_if_not_seen(c, p, ${tx}ns_tab+${tx}xmlns_ix_$ns, &pop_seen);\n}; $xmlns_wo_enc .= qq{ ${zx}_add_xmlns_if_not_seen(c, x->gg.g.ns, &pop_seen);\n}; } else { $fields = ''; for $field (@{$seen_ns{$ns}}) { $xmlns_wo_len .= qq{ if (x->$field)\n len += ${zx}_len_xmlns_if_not_seen(c, x->$field->g.ns, &pop_seen);\n}; $xmlns_wo_enc .= qq{ if (x->$field)\n ${zx}_add_xmlns_if_not_seen(c, x->$field->g.ns, &pop_seen);\n}; $fields .= "x->$field || "; } chop $fields; chop $fields; chop $fields; chop $fields; $xmlns_so_len .= qq{ if ($fields)\n len += ${zx}_len_xmlns_if_not_seen(c, ${tx}ns_tab+${tx}xmlns_ix_$ns, &pop_seen);\n}; $xmlns_so_enc .= qq{ if ($fields)\n p = ${zx}_enc_xmlns_if_not_seen(c, p, ${tx}ns_tab+${tx}xmlns_ix_$ns, &pop_seen);\n}; } } $x = $enc_templ; $x =~ s/SIMPLELENNSARG//g; $x =~ s/SIMPLELENARG//g; $x =~ s/SIMPLETAGLENNSARG//g; $x =~ s/SIMPLETAGLENARG//g; $x =~ s/SIMPLELENNS//g; $x =~ s/SIMPLELEN//g; $x =~ s/SIMPLETAGLENNS//g; $x =~ s/SIMPLETAGLEN//g; $x =~ s/XMLNS_SO_LEN;/$xmlns_so_len/g; $x =~ s/XMLNS_WO_LEN;/$xmlns_wo_len/g; $x =~ s/ATTRS_SO_LEN;/$attrs_so_len/g; $x =~ s/ATTRS_WO_LEN;/$attrs_wo_len/g; $x =~ s/ELEMS_SO_LEN;/$elems_so_len/g; $x =~ s/ELEMS_WO_LEN;/$elems_wo_len/g; $x =~ s/XMLNS_SO_ENC;/$xmlns_so_enc/g; $x =~ s/XMLNS_WO_ENC;/$xmlns_wo_enc/g; $x =~ s/ATTRS_SO_ENC;/$attrs_so_enc/g; $x =~ s/ATTRS_WO_ENC;/$attrs_wo_enc/g; $x =~ s/ELEMS_SO_ENC;/$elems_so_enc/g; #$x =~ s/ANYELEM_WO_ENC;/$elems_wo_enc/g; $x =~ s/ELSTRUCT/$tx${el}_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/ELNSC/$ns:/g; $x =~ s/ELNS/$ns/g; $x =~ s/ELTAG/$tag/g; if ($rootp) { # special processing for root node $x =~ s%\#if 1 /\* NORMALMODE \*/.*?\#else(.*?)\#endif%$1%gs; } enc_out $x; $x = $aux_templ; $x =~ s/ATTRS_WALK_SO;/$attrs_walk_so/g; $x =~ s/ELEMS_WALK_SO;/$elems_walk_so/g; $x =~ s/ATTRS_WALK_WO;/$attrs_walk_wo/g; $x =~ s/ELEMS_WALK_WO;/$elems_walk_wo/g; $x =~ s/ATTRS_DUP_STRS;/$attrs_dup_strs/g; $x =~ s/ELEMS_DUP_STRS;/$elems_dup_strs/g; $x =~ s/ATTRS_FREE;/$attrs_free/g; $x =~ s/ELEMS_FREE;/$elems_free/g; $x =~ s/ATTRS_CLONE;/$attrs_clone/g; $x =~ s/ELEMS_CLONE;/$elems_clone/g; $x =~ s/ELSTRUCT/$tx${el}_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/ELNS/$ns/g; $x =~ s/ELTAG/$tag/g; if ($rootp) { # special processing for root node $x =~ s%\#if 1 /\* NORMALMODE \*/.*?\#else(.*?)\#endif%$1%gs; } aux_out $x; $x = $getput_templ; $x =~ s/ELSTRUCT/$tx${el}_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/ELNS/$ns/g; $x =~ s/ELTAG/$tag/g; if ($rootp) { # special processing for root node $x =~ s%\#if 1 /\* NORMALMODE \*/.*?\#else(.*?)\#endif%$1%gs; } getput_out $x; $x = $ds_templ; $x =~ s/ELSTRUCT/$tx${el}_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/ELNS/$ns/g; $x =~ s/ELTAG/$tag/g; $x =~ s/ATTRS_PARSE;/$ds_attr_parse/g; $x =~ s/ELEMS_PARSE;/$ds_elem_parse/g; $x =~ s/ATTRS_BUILD;/$ds_attr_build/g; $x =~ s/ELEMS_BUILD;/$ds_elem_build/g; ds_out $x; } sub read_template { my ($name) =@_; open F, "<$name-templ.c" or die "Cant read decoding template($name-templ.c)"; $templ = ; close F; $templ =~ s%(/\*\*.*?\*\*/)%%s; ($comment) = $1; $templ =~ s%(/\* EOF \*/)%%s; $comment =~ s/\$Id/Id/; # Preserve original CVS id return ($templ, $comment); } ### ### Per namespace code generator ### sub generate_ns { my ($ns, @els) = @_; my ($el); #return if $ns eq 'xs' || $ns eq 'xml'; reset_accumulators(); for $el (@els) { next if trivial_complexType_check($el); gen_element($el, $dt{'element'}{$el}, 0); } # ========================================================================== # Encoder # open F, ">$gen_prefix-$ns-enc.c" or die "Cant write enc($gen_prefix-$ns-enc.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" #include "$gen_prefix-$ns-data.h" #include "$gen_prefix-ns.h" ENC ; print F $enc; print F "/* EOF -- $gen_prefix-$ns-enc.c */\n"; close F; # ========================================================================== # Decoder # open F, ">$gen_prefix-$ns-dec.c" or die "Cant write dec($gen_prefix-$ns-dec.c): $!"; print F <$gen_prefix-$ns-aux.c" or die "Cant write aux($gen_prefix-$ns-aux.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" #include "$gen_prefix-$ns-data.h" ENC ; print F $aux; print F "/* EOF -- $gen_prefix-$ns-aux.c */\n"; close F; # ========================================================================== # GetPut: Accessor functions for struct fields # open F, ">$gen_prefix-$ns-getput.c" or die "Cant write getput($gen_prefix-$ns-getput.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" #include "$gen_prefix-$ns-data.h" ENC ; print F $getput; print F "/* EOF -- $gen_prefix-$ns-getput.c */\n"; close F; # ========================================================================== # DirectoryScript implementation of encoders and decoders # open F, ">$gen_prefix-$ns.ds" or die "Cant write getput($gen_prefix-$ns.ds): $!"; print F <$gen_prefix-$ns-data.h" or die "Cant write hdr($gen_prefix-$ns-data.h): $!"; ($fold = $gen_prefix) =~ s/\W/_/gs; print F <gg.g.ns, &pop_seen);\n/g; $x =~ s/ATTRS_SO_LEN;//g; $x =~ s/ATTRS_WO_LEN;//g; $x =~ s/ELEMS_SO_LEN;//g; $x =~ s/ELEMS_WO_LEN;//g; $x =~ s/XMLNS_SO_ENC;/ p = ${zx}_enc_xmlns_if_not_seen(c, p, ns, &pop_seen);/g; $x =~ s/XMLNS_WO_ENC;/ ${zx}_add_xmlns_if_not_seen(c, x->gg.g.ns, &pop_seen);\n/g; $x =~ s/ATTRS_SO_ENC;//g; $x =~ s/ATTRS_WO_ENC;//g; $x =~ s/ELEMS_SO_ENC;//g; #$x =~ s/ANYELEM_WO_ENC;//g; $x =~ s/SIMPLELENNSARG/, simplelen, ns/g; $x =~ s/SIMPLELENARG/, simplelen/g; $x =~ s/SIMPLETAGLENNSARG/, simpletag, simplelen, ns/g; $x =~ s/SIMPLETAGLENARG/, simpletag, simplelen/g; $x =~ s/SIMPLELENNS/, int simplelen, struct ${zx}_ns_s* ns/g; $x =~ s/SIMPLELEN/, int simplelen/g; $x =~ s/SIMPLETAGLENNS/, char* simpletag, int simplelen, struct ${zx}_ns_s* ns/g; $x =~ s/SIMPLETAGLEN/, char* simpletag, int simplelen/g; $x =~ s/sizeof\(""\)-1%simplelen + 3%g; $x =~ s/sizeof\("ELTAG"\)-1/simplelen/g; $x =~ s/${ZX}_OUT_TAG\(p, ""\)%${ZX}_OUT_SIMPLE_CLOSE_TAG(p, simpletag,simplelen)%go; $x =~ s/"ELTAG"/simpletag/g; $x =~ s/ELSTRUCT/${zx}_elem_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/simple_elem/g; $x =~ s/ELNSC//g; $x =~ s/ELNS//g; $x =~ s/ELTAG/simple_elem/g; $x =~ s/x-\>gg\./x-\>/gs; $x =~ s/\&x-\>gg/x/gs; enc_out $x; hdr_out <$gen_prefix-enc.c" or die "Cant write enc($gen_prefix-enc.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" #include "$gen_prefix-ns.h" ENC ; print F $enc; $x = $enc_wo_subtempl; $x =~ s/TX/$tx/g; $x =~ s/ANYELEM_WO_LEN;/$len_wo/g; $x =~ s/ANYELEM_WO_ENC;/$enc_wo/g; print F $x; print F "/* EOF -- $gen_prefix-enc.c */\n"; close F; # ========================================================================== # Decoder # # Generate the ${zx}_dec_simple_elem() function $x = $dec_templ; $x =~ s/ROOT_N_DECODE/, int toke/gs; $x =~ s/TXELNAME_ELEM/toke/gs; $x =~ s/ATTRS;//g; $x =~ s/ELEMS;//g; $x =~ s/ELSTRUCT/${zx}_elem_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/simple_elem/g; $x =~ s/ELNS//g; $x =~ s/ELTAG/simple_elem/g; $x =~ s/x-\>gg\./x-\>/gs; $x =~ s/&x-\>gg/x/gs; $x =~ s/ROOT_CHECK_N_DECODED;//g; dec_out $x; hdr_out "struct ${zx}_elem_s* ${tx}DEC_simple_elem(struct ${zx}_ctx* c, struct ${zx}_ns_s* ns, int tok);\n"; # Generate the ${zx}_dec_wrong_elem() function $x = $dec_templ; $x =~ s/ROOT_N_DECODE/, char* nam, int namlen/gs; $x =~ s/x-\>gg\.g\.tok = TXELNAME_ELEM;/x->gg.g.tok = ${ZX}_TOK_NOT_FOUND;\n x->name_len = namlen;\n x->name = nam;/gs; $x =~ s/defined\(DEC_WRONG_ELEM\)/1/g; $x =~ s/TXELNAME_ELEM/${ZX}_TOK_NOT_FOUND/gs; $x =~ s/ATTRS;//g; $x =~ s/ELEMS;//g; $x =~ s/ELSTRUCT/${zx}_any_elem_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/wrong_elem/g; $x =~ s/ELNS//g; $x =~ s/ELTAG/wrong_elem/g; $x =~ s/x-\>gg\.g\.ns = .*?elems\[${ZX}_TOK_NOT_FOUND\]\.ns;/${zx}_fix_any_elem_dec(c,x,nam,namlen);/gso; $x =~ s///g; $x =~ s/ROOT_CHECK_N_DECODED;//g; dec_out $x; hdr_out "struct ${zx}_any_elem_s* ${tx}DEC_wrong_elem(struct ${zx}_ctx* c, struct ${zx}_ns_s* ns, char* nam, int namlen);\n"; open F, ">$gen_prefix-dec.c" or die "Cant write dec($gen_prefix-dec.c): $!"; print F <gg\./x-\>/gs; $x =~ s/\&x-\>gg/x/gs; aux_out $x; hdr_out <$gen_prefix-aux.c" or die "Cant write enc($gen_prefix-aux.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" ENC ; print F $aux; print F "/* EOF -- $gen_prefix-aux.c */\n"; close F; # ========================================================================== # GetPut: Accessor functions for struct fields # $x = $getput_templ; $x =~ s/ELSTRUCT/${zx}_elem_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/simple_elem/g; $x =~ s/ELNS//g; $x =~ s/ELTAG/simple_elem/g; $x =~ s/x-\>gg\./x-\>/gs; getput_out $x; open F, ">$gen_prefix-getput.c" or die "Cant write enc($gen_prefix-getput.c): $!"; print F < #include "errmac.h" #include "${zx}.h" #include "$gen_prefix-const.h" #include "$gen_prefix-data.h" ENC ; print F $getput; print F "/* EOF -- $gen_prefix-getput.c */\n"; close F; # ========================================================================== # ds - DirectoryScript implementation to encode and decode # $x = $ds_templ; $x =~ s/ELSTRUCT/$tx${el}_s/g; $x =~ s/TX/$tx/g; $x =~ s/ELNAME/$el/g; $x =~ s/ELNS/$ns/g; $x =~ s/ELTAG/$tag/g; $x =~ s/ATTRS_PARSE;/$ds_attr_parse/g; $x =~ s/ELEMS_PARSE;/$ds_elem_parse/g; $x =~ s/ATTRS_BUILD;/$ds_attr_build/g; $x =~ s/ELEMS_BUILD;/$ds_elem_build/g; ds_out $x; open F, ">$gen_prefix.ds" or die "Cant write enc($gen_prefix.ds): $!"; print F <$gen_prefix-data.h" or die "Cant write hdr($gen_prefix-data.h): $!"; ($fold = $gen_prefix) =~ s/\W/_/gs; print F <$gen_prefix-elems.gperf" or die "Cant write gperf($gen_prefix-elems.gperf): $!"; print F < %} struct ${zx}_tok { const char* name; const char* prefix; struct ${zx}_ns_s* ns; }; %% GPERF ; elems_gperf_out qq("TOK_NOT_FOUND", "${ZX}", 0\n); print F $elems_gperf; print F "%%\n/* EOF - gperf -t -D -C -N${tx}elem2tok $gen_prefix-elems.gperf */\n"; close F; # ========================================================================== # Attribute table # open F, ">$gen_prefix-attrs.gperf" or die "Cant write gperf($gen_prefix-attrs.gperf): $!"; print F < %} struct ${zx}_tok { const char* name; const char* prefix; struct ${zx}_ns_s* ns; }; %% GPERF ; attrs_gperf_out qq("TOK_NOT_FOUND", "${ZX}", 0\n); print F $attrs_gperf; print F "%%\n/* EOF - gperf -t -D -C -N${tx}attr2tok $gen_prefix-attrs.gperf */\n"; close F; # ========================================================================== # Namespace table # ++$ns_siz; # Trailer open F, ">$gen_prefix-ns.c" or die "Cant write nsc($gen_prefix-ns.c): $!"; print F <$gen_prefix-ns.h" or die "Cant write nsh($gen_prefix-ns.h): $!"; print F <