Subject: RE: complex transform vs. generated transform performance..
From: "Michael Kay" <mike@xxxxxxxxxxxx>
Date: Mon, 6 Jun 2005 09:45:40 +0100
|
Haven't got time to give you the code, but you need to define a key that
allows you to replace
<xsl:value-of select="$record/*[local-name(.) = current()/@name]"/>
with a call on the key() function.
Michael Kay
http://www.saxonica.com/
> -----Original Message-----
> From: Brian Chrisman [mailto:incubus@xxxxxxxxxxxxxx]
> Sent: 05 June 2005 23:53
> To: xsl-list@xxxxxxxxxxxxxxxxxxxxxx
> Subject: complex transform vs. generated transform performance..
>
> Hello All,
>
> I slapped together a quick XSLt script to convert an XML document
> with a simple flat structure into a postgres-optimized
> table-load script.
> I had a schema document (XSD), so I used that to defined the
> transformation,
> but I also used the assumption that the schema was a simple
> 2-level document.
>
> The XML is a reasonable size .. 19MB.
> My first attempt is a stylesheet which dynamically used the
> information
> from the XSD to pull the fields from the XML. This was
> taking > 30 minutes
> to run.
> It seemed to work, though I only ran it on a few similarly
> formatted test
> documents. (The smallest of which, I'm including below)
> (I used sablotron... XSLT1.0.. xsltproc/libxslt was taking
> much longer)
>
> In the second attempt, I converted that stylesheet to one
> which generated
> a specific stylesheet for that XML, which did the
> transformation in 30 seconds.
> (The results diff'ed to be identical, minus one newline).
>
> In general, I lean towards 1-stage processes over 2-stage
> processes when
> I don't see a real usefulness to the intermediate stage (ie.
> modularizing
> functionality).
>
> If anybody spots a way to optimize this first stylesheet, I'd
> appreciate it.
>
> If the schema document I'm including below is too big or
> verbose, I could
> find a way to cut the example down a bit... in general
> though, it's just
> a flat space, like the XML example below it.
>
> Thanks,
> Brian Chrisman
>
>
> (hopefully my long lines or other formatting irregularities
> don't cause a
> huge problem here..)
> -- xsd2insertgenerator.xsl
> <xsl:stylesheet
> xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
> xmlns:xsd="http://www.w3.org/2001/XMLSchema"
> xmlns:od="urn:schemas-microsoft-com:officedata"
> exclude-result-prefixes="xsd od"
> version="1.0">
> <xsl:output method="text" omit-xml-declaration="yes"/>
>
> <!-- just imports the data, does not create the table.. etc -->
>
> <xsl:variable name="schema"
> select="document('/www/MAD/MAD.xsd')"/>
> <xsl:variable name="table"
> select="$schema/xsd:schema/xsd:element[1]//@ref"/>
> <xsl:variable name="element-list"
> select="$schema/xsd:schema/xsd:element[@name=$schema/xsd:schem
> a/xsd:element[1]//@ref]//xsd:element"/>
>
> <!-- some fields may exist *only* in the schema document -->
> <xsl:variable name="fields"
> select="$schema/xsd:schema/xsd:element[@name=/xsd:schema/xsd:e
> lement[1]//@ref]//xsd:element"/>
>
> <xsl:template match="/">
> <xsl:value-of select="concat('COPY "',
> $table, '" FROM stdin;
')"/>
> <xsl:apply-templates select="/dataroot/*"
> mode="one-record"/> <!-- Flat space imports only -->
> <xsl:text>\.</xsl:text>
> </xsl:template>
>
> <xsl:template match="*" mode="one-record">
> <xsl:variable name="record" select="."/>
> <!-- Note: below I am looping over the entries
> in the XSD doc, and looking up corresponding values in the
> XML record -->
> <xsl:for-each select="$element-list">
> <xsl:value-of
> select="$record/*[local-name(.) = current()/@name]"/>
> <xsl:if test="position() <
> last()"><xsl:text>	</xsl:text></xsl:if>
> </xsl:for-each>
> <xsl:text>
</xsl:text>
> </xsl:template>
>
> </xsl:stylesheet>
>
> -- xsd2insertgeneratorgenerator.xsl
> <xsl:stylesheet
> version="1.0"
> xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
> xmlns:gen="dummy-namespace-for-the-generated-xslt"
> xmlns:xsd="http://www.w3.org/2001/XMLSchema"
> xmlns:od="urn:schemas-microsoft-com:officedata"
> exclude-result-prefixes="xsd od xsl">
> <xsl:output method="xml" indent="yes"/>
>
> <xsl:namespace-alias stylesheet-prefix="gen"
> result-prefix="xsl"/>
> <xsl:variable name="schema"
> select="document('/www/MAD/MAD.xsd')"/>
> <xsl:variable name="table"
> select="$schema/xsd:schema/xsd:element[1]//@ref"/>
> <xsl:variable name="element-list"
> select="$schema/xsd:schema/xsd:element[@name=$schema/xsd:schem
> a/xsd:element[1]//@ref]//xsd:element"/>
>
> <xsl:template match="/">
> <gen:stylesheet version="1.0">
> <gen:output method="txt" omit-xml-declaration="yes"/>
>
> <gen:template match="/">
> <gen:value-of
> select="concat('COPY "', '{$table}', '" FROM
> stdin;
')"/>
> <gen:apply-templates
> select="/dataroot/{$table}"/> <!-- Flat space imports only -->
> <gen:text>\.</gen:text>
> <gen:for-each select="{$table}">
> <xsl:for-each
> select="$element-list">
> <gen:value-of
> select="{@name}"/>
> <xsl:if
> test="position() < last()">
>
> <gen:text><xsl:value-of select="'	'"/></gen:text>
> </xsl:if>
> </xsl:for-each>
> <gen:text><xsl:value-of
> select="'
'"/></gen:text>
> </gen:for-each>
> </gen:template>
>
> <!--
> <gen:template match="{$table}">
> <xsl:for-each select="$element-list">
> <gen:value-of select="{@name}"/>
> <xsl:if
> test="position() < last()">
>
> <gen:text><xsl:value-of select="'	'"/></gen:text>
> </xsl:if>
> </xsl:for-each>
> <gen:text><xsl:value-of
> select="'
'"/></gen:text>
> </gen:template>
> -->
> </gen:stylesheet>
> </xsl:template>
> </xsl:stylesheet>
>
>
>
> -- MAD.xsd
> <?xml version="1.0" encoding="UTF-8"?>
> <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
> xmlns:od="urn:schemas-microsoft-com:officedata">
> <xsd:element name="dataroot">
> <xsd:complexType>
> <xsd:sequence>
> <xsd:element ref="MAD" minOccurs="0" maxOccurs="unbounded"/>
> </xsd:sequence>
> <xsd:attribute name="generated" type="xsd:dateTime"/>
> </xsd:complexType>
> </xsd:element>
> <xsd:element name="MAD">
> <xsd:annotation>
> <xsd:appinfo/>
> </xsd:annotation>
> <xsd:complexType>
> <xsd:sequence>
> <xsd:element name="ADDRESS_ID" minOccurs="1"
> od:jetType="double" od:sqlSType="float" od:nonNullable="yes"
> type="xsd:double"/>
> <xsd:element name="APN" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="14"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="PREFIX_TYPE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="15"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="PREFIX_DIR" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="5"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ST_NUMBER" minOccurs="0"
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
> <xsd:element name="ST_NAME" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="30"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ST_TYPE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="SUFFIX_DIR" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="5"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="CITY" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="25"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ZIP" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="LONG_ADDRESS" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="100"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ADDR_FRACTION" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="5"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="UNIT_DESIGNATOR" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="UNIT" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="FLOOR" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="5"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="BUILDING" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="30"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ADDRESS_TYPE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ADDRESS_STATUS" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="20"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ADDRESS_USE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="25"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="PRIVATE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="INCORPORATED" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="X_COORD" minOccurs="0"
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
> <xsd:element name="Y_COORD" minOccurs="0"
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
> <xsd:element name="DATE_ENTERED" minOccurs="0"
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
> <xsd:element name="DATE_UPDATED" minOccurs="0"
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
> <xsd:element name="DATE_RETIRED" minOccurs="0"
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
> <xsd:element name="SOURCE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="30"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="BUS_LIC_FLAG" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="SERC_REQ_FLAG" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="15"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="PROJECT_FLAG" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="15"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="PERMIT_FLAG" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="15"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="ZIP4_FLAG" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="1"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> <xsd:element name="COORD_SOURCE" minOccurs="0"
> od:jetType="text" od:sqlSType="nvarchar">
> <xsd:simpleType>
> <xsd:restriction base="xsd:string">
> <xsd:maxLength value="10"/>
> </xsd:restriction>
> </xsd:simpleType>
> </xsd:element>
> </xsd:sequence>
> </xsd:complexType>
> </xsd:element>
> </xsd:schema>
>
> -- foo.xml (test data)
> <?xml version="1.0" encoding="UTF-8"?>
> <dataroot xmlns:od="urn:schemas-microsoft-com:officedata"
> xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> xsi:noNamespaceSchemaLocation="MAD.xsd"
> generated="2005-04-29T15:13:49">
> <MAD>
> <ADDRESS_ID>1</ADDRESS_ID>
> <APN>22400900350000</APN>
> <ST_NUMBER>7453</ST_NUMBER>
> <ST_NAME>TIARA</ST_NAME>
> <ST_TYPE>WAY</ST_TYPE>
> <CITY>ORANGEVALE</CITY>
> <ZIP>95662</ZIP>
> <LONG_ADDRESS>7453 TIARA WAY B</LONG_ADDRESS>
> <UNIT>B</UNIT>
> <X_COORD>6768867.42298</X_COORD>
> <Y_COORD>2018154.73453</Y_COORD>
> <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
> <SOURCE>GISDROP_20050225</SOURCE>
> <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
> <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
> <PROJECT_FLAG>N</PROJECT_FLAG>
> <PERMIT_FLAG>Y</PERMIT_FLAG>
> <ZIP4_FLAG>N</ZIP4_FLAG>
> <COORD_SOURCE>CENTROID</COORD_SOURCE>
> </MAD>
> <MAD>
> <ADDRESS_ID>8</ADDRESS_ID>
> <APN>24301920130000</APN>
> <ST_NUMBER>6054</ST_NUMBER>
> <ST_NAME>SUNRISE</ST_NAME>
> <ST_TYPE>MALL</ST_TYPE>
> <CITY>CITRUS HEIGHTS</CITY>
> <ZIP>95610</ZIP>
> <LONG_ADDRESS>6054 SUNRISE MALL B7</LONG_ADDRESS>
> <UNIT>B7</UNIT>
> <X_COORD>6770356.0616</X_COORD>
> <Y_COORD>2007415.14451</Y_COORD>
> <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
> <SOURCE>GISDROP_20050225</SOURCE>
> <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
> <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
> <PROJECT_FLAG>N</PROJECT_FLAG>
> <PERMIT_FLAG>Y</PERMIT_FLAG>
> <ZIP4_FLAG>Y</ZIP4_FLAG>
> <COORD_SOURCE>CENTROID</COORD_SOURCE>
> </MAD>
> <MAD>
> <ADDRESS_ID>9</ADDRESS_ID>
> <APN>24301500230000</APN>
> <ST_NUMBER>7669</ST_NUMBER>
> <ST_NAME>GREENBACK</ST_NAME>
> <ST_TYPE>LN</ST_TYPE>
> <ZIP>0</ZIP>
> <LONG_ADDRESS>7669 GREENBACK LN 2002</LONG_ADDRESS>
> <UNIT>2002</UNIT>
> <X_COORD>6766774.11111</X_COORD>
> <Y_COORD>2010059.02536</Y_COORD>
> <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
> <SOURCE>GISDROP_20050225</SOURCE>
> <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
> <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
> <PROJECT_FLAG>N</PROJECT_FLAG>
> <PERMIT_FLAG>Y</PERMIT_FLAG>
> <ZIP4_FLAG>N</ZIP4_FLAG>
> <COORD_SOURCE>CENTROID</COORD_SOURCE>
> </MAD>
> </dataroot>
|