问题
I am trying to make a series of graphs like this:
I have some mixed categorical and continuous data. I am able to make this series of graphs when there are only categorical variables or when there are only continuous variables. But I am unable to produce this series of graphs when there are both types of variables.
I have created some data below. Is there a way to debug this code so that it produces a series of graphs?
library(ggplot2)
library(gridExtra)
library(tidyr)
/create some data/
var_1 <- rnorm(100,1,4)
var_2 <- sample( LETTERS[1:2], 100, replace=TRUE, prob=c(0.3, 0.7) )
var_3 <- sample( LETTERS[1:5], 100, replace=TRUE, prob=c(0.2, 0.2,0.2,0.2, 0.1) )
cluster <- sample( LETTERS[1:4], 100, replace=TRUE, prob=c(2.5, 2.5, 2.5, 2.5) )
/put in a frame/
f <- data.frame(var_1, var_2, var_3, cluster)
/convert to factors/
f$var_2 = as.factor(f$var_2)
f$var_3 = as.factor(f$var_3)
f$cluster = as.factor(f$cluster)
/create graphs/
f2 %>% pivot_longer(cols = contains("var"), names_to = "variable") %>%
ggplot(aes(x = value, fill = value)) +
geom_bar() + geom_density() +
facet_grid(rows = vars(cluster),
cols = vars(variable),
scales = "free") +
labs(y = "freq", fill = "Var")
When I only have categorical variables, the following code works:
var_2 <- sample( LETTERS[1:2], 100, replace=TRUE, prob=c(0.3, 0.7) )
var_3 <- sample( LETTERS[1:5], 100, replace=TRUE, prob=c(0.2, 0.2,0.2,0.2, 0.1) )
cluster <- sample( LETTERS[1:4], 100, replace=TRUE, prob=c(2.5, 2.5, 2.5, 2.5) )
f <- data.frame(var_2, var_3, cluster)
f$var_2 = as.factor(f$var_2)
f$var_3 = as.factor(f$var_3)
f$cluster = as.factor(f$cluster)
f%>% pivot_longer(cols = contains("var"), names_to = "variable") %>% ggplot(aes(x = value, fill = value)) + geom_bar() + geom_density() +facet_grid(rows = vars(cluster), cols = vars(variable), scales = "free") + labs(y = "freq", fill = "Var")
回答1:
This is possible to do entirely within ggplot, but it's pretty hacky. Facets are really a way of showing extra dimensions of the same data set. They are not intended to be a way of arbitrarily stitching different plots together, so an entirely ggplot-based solution requires manipulating your data and the axis labels to produce the appearance of stitching plots together.
First, we get the unique levels of the barplot variables as character strings:
levs <- sort(unique(c(as.character(f$var_2), as.character(f$var_3))))
Now, we convert the factors to numbers:
f$var_2 <- as.numeric(factor(f$var_2, levs)) + ceiling(max(f$var_1)) + 10
f$var_3 <- as.numeric(factor(f$var_3, levs)) + ceiling(max(f$var_1)) + 10
We will now construct the breaks and labels that we will use for our x axis
breaks <- c(pretty(range(f$var_1)), sort(unique(c(f$var_2, f$var_3))))
labs <- c(pretty(range(f$var_1)), levs)
Now we can safely pivot our data frame:
f <- pivot_longer(f, cols = c("var_1", "var_2", "var_3"))
For our plot, we will use appropriately subsetted groups from the data frame for the density plot and the bar plots. We then facet with free scales and label the x axis with our pre-defined breaks and labels:
ggplot(f, aes(x = value)) +
geom_density(data = subset(f, name == "var_1")) +
geom_bar(data = subset(f, name != "var_1"), aes(fill = name)) +
facet_wrap(cluster~name, ncol = 3, scales = "free") +
scale_x_continuous(breaks = breaks, labels = labs) +
scale_fill_manual(values = c("deepskyblue4", "gold"), guide = guide_none())
回答2:
I do not think ggplot can handle both continuous and categorical variables in the y
or x
aesthetic. But there is also an error when mixing them in the pivot_longer()
.
Error: Can't combine `var_1` <double> and `var_2` <character>.
My recommendation would be to create separate plots for each metric and then combine the plots. This will give you greater control of each plot. Here is an example using GGally's ggmatrix(). I am sure this is also possible with gridextra.
library(ggplot2)
library(gridExtra)
library(tidyr)
library(GGally)
# Generate data
var_1 <- rnorm(100, 1, 4)
var_2 <- sample(LETTERS[1:2], 100, replace = TRUE, prob = c(0.3, 0.7))
var_3 <- sample(LETTERS[1:5], 100, replace = TRUE, prob = c(0.2, 0.2, 0.2, 0.2, 0.1))
cluster <- sample(LETTERS[1:4], 100, replace = TRUE,prob = c(2.5, 2.5, 2.5, 2.5))
f <- data.frame(var_1, var_2, var_3, cluster)
f$var_2 = as.factor(f$var_2)
f$var_3 = as.factor(f$var_3)
f$cluster = as.factor(f$cluster)
# Create plots for each var
var_1_plot <- f %>%
ggplot(aes(x = var_1,
fill = cluster)) +
geom_density() +
facet_grid(cluster ~ .,
scales = "free")
var_2_plot <- f %>%
ggplot(aes(x = var_2,
fill = cluster)) +
geom_bar() +
facet_grid(cluster ~ .,
scales = "free")
var_3_plot <- f %>%
ggplot(aes(x = var_3,
fill = cluster)) +
geom_bar() +
facet_grid(cluster ~ .,
scales = "free")
# Combine all plots
plot_list <- list(var_1_plot, var_2_plot, var_3_plot)
GGally::ggmatrix(
plots = plot_list,
nrow = 1,
ncol = 3,
xAxisLabels = c("Var 1", "Var 2", "Var 3"),
)
来源:https://stackoverflow.com/questions/63543604/using-ggplot2-and-facet-grid-for-continuous-and-categorical-variables-together