Stacked density plots with pandas and seaborn

狂风中的少年 提交于 2019-12-14 03:07:56

问题


I am trying to obtain the following plot from a pandas data frame.

I am not sure how to combine seaborn with pandas for that task.

This is the dataframe I want to use:

import pandas as pd

data = pd.DataFrame({'a': np.random.randn(1000) + 1,
              'b': np.random.randn(1000),
              'c': np.random.rand(1000) + 10},        
             columns=['a', 'b', 'c'])

data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN

Notice that the frequency will need to be normalized (height of the histogram), as the number of data points and distributions differ significantly and the distributions will have different 'y scales'.

data.plot.hist();

This is the code of seaborn that generates the figure I used in the beginning.

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(150)
g = np.tile(list("ABC"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)

# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=5, height=1, palette=pal)

# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .3, label, fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)


g.map(label, "x")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.0025)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)

回答1:


Here is a function to create a grid of kde plots ("joyplot") with one plot per dataframe column.

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde


def joyplot_from_dataframe(data, cmap=None):
    mi, ma = np.nanmin(data.values), np.nanmax(data.values)
    minx = mi - (ma-mi)/5
    maxx = ma + (ma-mi)/5
    x = np.linspace(minx,maxx, 1000)

    n = len(data.columns)

    if not cmap:
        cmap = plt.cm.get_cmap("Blues")
    colors = cmap(np.linspace(.2,1,n))

    fig, axes = plt.subplots(nrows = n, sharex=True)

    for c, ax, color in zip(data.columns, axes, colors):
        y = data[c].values
        y = y[~np.isnan(y)]
        kde = gaussian_kde(y)
        ax.fill_between(x, kde(x), color=color)
        ax.yaxis.set_visible(False)
        for spine in ["left", "right", "top"]:
            ax.spines[spine].set_visible(False)
        ax.spines["bottom"].set_linewidth(2)
        ax.spines["bottom"].set_color(color)
        ax.margins(y=0)
        ax.tick_params(bottom=False)

    return fig, axes

Use it as

import pandas as pd

data = pd.DataFrame({'a': np.random.randn(1000) + 1,
              'b': np.random.randn(1000),
              'c': np.random.rand(1000) + 10},        
             columns=['a', 'b', 'c'])

data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN


joyplot_from_dataframe(data)    
plt.show()



来源:https://stackoverflow.com/questions/54729039/stacked-density-plots-with-pandas-and-seaborn

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!