Copying MultiIndex dataframes with pd.read_clipboard?

前端 未结 1 1991
鱼传尺愫
鱼传尺愫 2020-11-30 06:16

Given a dataframe like this:

          C
A   B      
1.1 111  20
    222  31
3.3 222  24
    333  65
5.5 333  22
6.6 777  74 

How do I read

相关标签:
1条回答
  • 2020-11-30 06:29

    UPDATE: now it parses the clipboard - i.e. no need to save it beforehand

    def read_clipboard_mi(index_names_row=None, **kwargs):
        encoding = kwargs.pop('encoding', 'utf-8')
    
        # only utf-8 is valid for passed value because that's what clipboard
        # supports
        if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
            raise NotImplementedError(
                'reading from clipboard only supports utf-8 encoding')
    
        from pandas import compat, read_fwf
        from pandas.io.clipboard import clipboard_get
        from pandas.io.common import StringIO
        data = clipboard_get()
    
        # try to decode (if needed on PY3)
        # Strange. linux py33 doesn't complain, win py33 does
        if compat.PY3:
            try:
                text = compat.bytes_to_str(
                    text, encoding=(kwargs.get('encoding') or
                                    get_option('display.encoding'))
                )
            except:
                pass
    
        index_names = None
        if index_names_row:
            if isinstance(index_names_row, int):
                index_names = data.splitlines()[index_names_row].split()
                skiprows = [index_names_row]
                kwargs.update({'skiprows': skiprows})
            else:
                raise Exception('[index_names_row] must be of [int] data type')
    
        df = read_fwf(StringIO(data), **kwargs)
        unnamed_cols = df.columns[df.columns.str.contains(r'Unnamed:')].tolist()
    
        if index_names:
            idx_cols = df.columns[range(len(index_names))].tolist()
        elif unnamed_cols:
            idx_cols = df.columns[range(len(unnamed_cols))].tolist()
            index_names = [None] * len(idx_cols)
    
        df[idx_cols] = df[idx_cols].ffill()
        df = df.set_index(idx_cols).rename_axis(index_names)
    
        return df
    

    testing multi-index DF without index names:

    In [231]: read_clipboard_mi()
    Out[231]:
              C
    1.1 111  20
        222  31
    3.3 222  24
        333  65
    5.5 333  22
    6.6 777  74
    

    testing multi-index DF with index names:

    In [232]: read_clipboard_mi(index_names_row=1)
    Out[232]:
              C
    A   B
    1.1 111  20
        222  31
    3.3 222  24
        333  65
    5.5 333  22
    6.6 777  74
    

    NOTE:

    1. it's not well tested
    2. it does NOT support multi-level columns
    3. see point 1 ;-)

    NOTE2: please feel free to use this code or to create a pull request on Pandas github

    0 讨论(0)
提交回复
热议问题