Rapidminer data transpose equivalent to melt in R

不问归期 提交于 2019-12-13 05:24:14

问题


I have a Rapidminer process which reads from a web API, uses Read XML to process the response and XPATH to capture one of the elements in the XML. The elements can be of any number and the resulting attribute is a concatenated string of the element/text().

As a result of the concatenated string, I have to split the string into multiple columns like this:

ID  Col1  Col2 Col3 Col4 Col5 Col6
A   1     5    7    8
B   2
C   4
D   3     9    10   11   12   13

My final goal is to transpose it into the following format:

ID  NewCol
A   1
A   5
A   7
A   8
B   2
C   4
D   3
D   9
D   10
D   11
D   12
D   13

Two questions:
1. Can the Read XML operator be configured to read data into multiple rows instead of a long concatenated string?
2. If answer to 1 is negative, is there any operator which can perform the "transpose" task as described above(similar to melt function in R)?


回答1:


You may use de-pivot operator in rapid miner. Just go through its documentation




回答2:


The Read XML operator creates new attributes within an example. Basically, it makes new columns not new rows.

There is no single operator that can do what you need, but you can make a process. I've attached one. It's relatively complex and given more time I could probably make it more efficient.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.008" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="6.0.008" expanded="true" height="76" name="make data" width="90" x="112" y="75">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="313" y="345">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;A&quot;"/>
              <parameter key="Col1" value="&quot;1&quot;"/>
              <parameter key="Col2" value="&quot;5&quot;"/>
              <parameter key="Col3" value="&quot;7&quot;"/>
              <parameter key="Col4" value="&quot;8&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="313" y="435">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;B&quot;"/>
              <parameter key="Col1" value="&quot;2&quot;"/>
              <parameter key="Col2" value="&quot;missing&quot;"/>
              <parameter key="Col3" value="&quot;missing&quot;"/>
              <parameter key="Col4" value="&quot;missing&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="313" y="525">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;C&quot;"/>
              <parameter key="Col1" value="&quot;4&quot;"/>
              <parameter key="Col2" value="&quot;missing&quot;"/>
              <parameter key="Col3" value="&quot;missing&quot;"/>
              <parameter key="Col4" value="&quot;missing&quot;"/>
              <parameter key="Col5" value="&quot;missing&quot;"/>
              <parameter key="Col6" value="&quot;missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.0.008" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="313" y="615">
            <list key="attribute_values">
              <parameter key="ID" value="&quot;D&quot;"/>
              <parameter key="Col1" value="&quot;3&quot;"/>
              <parameter key="Col2" value="&quot;9&quot;"/>
              <parameter key="Col3" value="&quot;10&quot;"/>
              <parameter key="Col4" value="&quot;11&quot;"/>
              <parameter key="Col5" value="&quot;12&quot;"/>
              <parameter key="Col6" value="&quot;13&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="130" name="Append" width="90" x="581" y="345"/>
          <operator activated="true" class="declare_missing_value" compatibility="6.0.008" expanded="true" height="76" name="Declare Missing Value" width="90" x="782" y="345">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="missing"/>
          </operator>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="6.0.008" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="75">
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="6.0.008" expanded="true" height="76" name="Filter Example Range" width="90" x="112" y="255">
            <parameter key="first_example" value="%{example}"/>
            <parameter key="last_example" value="%{example}"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="6.0.008" expanded="true" height="76" name="Transpose" width="90" x="246" y="30"/>
          <operator activated="true" class="extract_macro" compatibility="6.0.008" expanded="true" height="60" name="Extract Macro" width="90" x="246" y="120">
            <parameter key="macro" value="id"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="attribute_name" value="att_1"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="rename_by_example_values" compatibility="6.0.008" expanded="true" height="76" name="Rename by Example Values" width="90" x="246" y="210"/>
          <operator activated="true" class="select_attributes" compatibility="6.0.008" expanded="true" height="76" name="Select Attributes" width="90" x="380" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="ID"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.0.008" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="120">
            <list key="function_descriptions">
              <parameter key="ID" value="&quot;%{id}&quot;"/>
            </list>
          </operator>
          <operator activated="true" class="rename" compatibility="6.0.008" expanded="true" height="76" name="Rename" width="90" x="380" y="210">
            <parameter key="old_name" value="%{id}"/>
            <parameter key="new_name" value="NewCol"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="6.0.008" expanded="true" height="94" name="Filter Examples" width="90" x="514" y="30">
            <parameter key="condition_class" value="no_missing_attributes"/>
            <list key="filters_list"/>
          </operator>
          <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="original" to_port="example set"/>
          <connect from_op="Transpose" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="6.0.008" expanded="true" height="76" name="Append (2)" width="90" x="380" y="120"/>
      <connect from_op="make data" from_port="out 1" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Append (2)" to_port="example set 1"/>
      <connect from_op="Append (2)" from_port="merged set" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

Hopefully, you can use it as a starting point.



来源:https://stackoverflow.com/questions/25048768/rapidminer-data-transpose-equivalent-to-melt-in-r

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!