Remove recurring values from string

a 夏天 提交于 2019-12-11 05:37:31

问题


I need to remove the filepath in this xmlvia xslt

<?xml version="1.0" encoding="ISO-8859-1"?>
    <InvoiceCapture>
    <Invoice>
        <CaptureDate>2014-02-19</CaptureDate>
        <CaptureTime>14:04:07</CaptureTime>
        <Company>bygg</Company>
        <Type>0</Type>
        <Supplier>11111111</Supplier>
        <SupplierInvoiceNo>11111111</SupplierInvoiceNo>
        <InvoiceDate>2013-12-30</InvoiceDate>
        <DueDate>2014-01-29</DueDate>
        <Reference1>11111111</Reference1>
        <Reference2>11111111</Reference2>
        <Currency>SEK</Currency>
        <Amount>11111111</Amount>
        <VatAmount>11111111</VatAmount>
        <AlternativeID>20140219_bygg_2788</AlternativeID>
        <ImageFile>\\extsql1\INVOICES\m3Bygg\test\2KB16000.PNG  \\extsql1\INVOICES\m3Bygg\test\2KB16002.PNG \\extsql1\INVOICES\m3Bygg\test\2KB16004.PNG \\extsql1\INVOICES\m3Bygg\test\2KB16006.PNG \\extsql1\INVOICES\m3Bygg\test\2KB16008.PNG</ImageFile>
        <NoOfImages>5</NoOfImages>
        <BatchPrefix/>
        <BatchNo>2788</BatchNo>
        <InvoiceLine/>
    </Invoice>
</InvoiceCapture>

The output I need is only the image names separated with space:

<ImageFile>2JE04000.PNG 2JE04002.PNG 2JE04004.PNG 2JE04006.PNG 2JE04008.PNG</ImageFile>

回答1:


My suggestion is this template that I called extract-substrings-between. It has the advantage of doing the task with a single template not requiring extensions that is not specific to this actual problem and more generally useful.

Its parameters are:

  • string: The to-be-processed string. It defaults to the value of the current node with normalize-space() applied to it.
  • startCharacter and endCharacter: The template extracts any substring that neither contains a startCharacter nor an endCharacter, but is immediately preceded by either a startCharacter or the start of the string and immediately followed by either the endCharacter or the end of the string. Both startCharacter and endCharacter default to a space.
  • outputSeparator: As the name says, the string separating the extracted substrings on output. Defaults to a space as well.

Stylesheet

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="1.0">
  <xsl:template match="/">
    <xsl:for-each select="InvoiceCapture/Invoice/ImageFile">
      <xsl:copy>
        <xsl:call-template name="extract-substrings-between">
          <xsl:with-param name="startCharacter" select="'\'"/>
        </xsl:call-template>
      </xsl:copy>
    </xsl:for-each>
  </xsl:template>

  <xsl:template name="extract-substrings-between">
    <xsl:param name="string" select="normalize-space()"/>
    <xsl:param name="startCharacter" select="' '"/>
    <xsl:param name="endCharacter" select="' '"/>
    <xsl:param name="outputSeparator" select="' '"/>

    <xsl:variable name="currentToken" 
      select="substring-before(concat($string, $endCharacter), $endCharacter)"/>

    <xsl:choose>
      <xsl:when test="contains($currentToken, $startCharacter)">
        <!-- We need to chip off more from the current token -->
        <xsl:call-template name="extract-substrings-between">
          <xsl:with-param name="string" select="substring-after($string, $startCharacter)"/>
          <xsl:with-param name="startCharacter" select="$startCharacter"/>
          <xsl:with-param name="endCharacter" select="$endCharacter"/>
          <xsl:with-param name="outputSeparator" select="$outputSeparator"/>
        </xsl:call-template>
      </xsl:when>

      <xsl:otherwise>
        <!-- We've isolated what we want to return from the current token -->
        <xsl:value-of select="$currentToken"/>

        <xsl:variable name="remainingString" select="substring-after($string, ' ')"/>
        <xsl:if test="$remainingString != ''">
          <xsl:value-of select="$outputSeparator"/>
          <xsl:call-template name="extract-substrings-between">
            <xsl:with-param name="string" select="$remainingString"/>
            <xsl:with-param name="startCharacter" select="$startCharacter"/>
            <xsl:with-param name="endCharacter" select="$endCharacter"/>
            <xsl:with-param name="outputSeparator" select="$outputSeparator"/>
          </xsl:call-template>
        </xsl:if>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

</xsl:stylesheet>

Output:

<ImageFile>2KB16000.PNG 2KB16002.PNG 2KB16004.PNG 2KB16006.PNG 2KB16008.PNG</ImageFile>



回答2:


Your expected output no longer matches your input XML. Still, below is a solution in XSLT 1.0 - which is difficult, as I said. Functions like tokenize() are not available in XSLT 1.0 and you cannot use result tree fragments as a node-set natively. My solution uses EXSLT to convert a result tree fragment to a node-set.

This only works if the file paths are similar and contain "\test\" towards the end.

Stylesheet

<?xml version="1.0" encoding="utf-8"?>

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:exsl="http://exslt.org/common"
                extension-element-prefixes="exsl">

   <xsl:output method="xml" indent="yes" omit-xml-declaration="yes"/>

    <xsl:strip-space elements="*"/>

   <xsl:template match="//ImageFile">
    <xsl:copy>
       <xsl:variable name="tokens">
           <xsl:call-template name="tokenize">
               <xsl:with-param name="string" select="."/>
           </xsl:call-template>
       </xsl:variable>
       <xsl:for-each select="exsl:node-set($tokens)/*">
           <xsl:value-of select="substring-after(.,'\test\')"/>
           <xsl:text>&#32;</xsl:text>
       </xsl:for-each>
    </xsl:copy>
   </xsl:template>

   <xsl:template name="tokenize">
    <xsl:param name="string" select="."/>
    <xsl:param name="separator" select="' '"/>
    <xsl:choose>
        <xsl:when test="not(contains($string, $separator))">
            <item>
                <xsl:value-of select="normalize-space($string)"/>
            </item>
        </xsl:when>
        <xsl:otherwise>
            <item>
                <xsl:value-of select="normalize-space(substring-before($string, $separator))"/>
            </item>
            <xsl:call-template name="tokenize">
                <xsl:with-param name="string" select="substring-after($string, $separator)"/>
            </xsl:call-template>
        </xsl:otherwise>
    </xsl:choose>
   </xsl:template>

   <xsl:template match="text()"/>

</xsl:stylesheet>

Output

<ImageFile>2KB16000.PNG  2KB16002.PNG 2KB16004.PNG 2KB16006.PNG 2KB16008.PNG </ImageFile>



回答3:


Assuming the paths are really separated by the tab (&#9;) character (hard to tell from the copied example), and not assuming anything about the path other than the \ delimiter, try the following stylesheet:

XSLT 1.0:

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method='xml' version='1.0' encoding='utf-8' indent='yes'/>

<xsl:template match="/">
<xsl:for-each select="InvoiceCapture/Invoice/ImageFile">
    <xsl:copy>
        <xsl:call-template name="tokenize">
            <xsl:with-param name="string" select="."/>
        </xsl:call-template>
    </xsl:copy>
</xsl:for-each>
</xsl:template>

<xsl:template name="tokenize">
    <xsl:param name="string"/>
    <xsl:param name="delimiter" select="'&#9;'"/>
    <xsl:choose>
        <xsl:when test="contains($string, $delimiter)">
            <xsl:call-template name="file-name">
                <xsl:with-param name="path" select="substring-before($string, $delimiter)"/>
            </xsl:call-template>
            <!-- recursive call -->
            <xsl:call-template name="tokenize">
                <xsl:with-param name="string" select="substring-after($string, $delimiter)" />
            </xsl:call-template>
        </xsl:when>
        <xsl:otherwise>
            <xsl:call-template name="file-name">
                <xsl:with-param name="path" select="$string"/>
                <xsl:with-param name="last-token" select="true()"/>
            </xsl:call-template>
        </xsl:otherwise>
    </xsl:choose>
</xsl:template>

<xsl:template name="file-name">
    <xsl:param name="path"/>
    <xsl:param name="delimiter" select="'\'"/>
    <xsl:param name="last-token"/>
    <xsl:choose>
        <xsl:when test="contains($path, $delimiter)">
             <!-- recursive call -->
            <xsl:call-template name="file-name">
                <xsl:with-param name="path" select="substring-after($path, $delimiter)" />
                <xsl:with-param name="last-token" select="$last-token" />
            </xsl:call-template>
        </xsl:when>
        <xsl:otherwise>
            <xsl:value-of select="$path"/>
            <xsl:if test="not($last-token)">
                <xsl:text> </xsl:text>
            </xsl:if>
        </xsl:otherwise>
    </xsl:choose>
</xsl:template>

</xsl:stylesheet>


来源:https://stackoverflow.com/questions/21905766/remove-recurring-values-from-string

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!