R:ggplot2数据可视化――基础知识

匿名 (未验证) 提交于 2019-12-02 23:59:01

# 获取ggplot2 最容易的就是下载整个tidyverse: install.packages("tidyverse")  # 也可以选择只下载ggplot2: install.packages("ggplot2")  # 或者下载GitHub上的开发者版本 # install.packages("devtools") devtools::install_github("tidyverse/ggplot2") 

library(ggplot2) ggplot(diamonds)  #以diamonds数据集为例 #gg <- ggplot(df, aes(x=xcol, y=ycol)) 其中df只能是数据框  ggplot(diamonds, aes(x=carat))  # 如果只有X-axis值 Y-axis can be specified in respective geoms. ggplot(diamonds, aes(x=carat, y=price))  # if both X and Y axes are fixed for all layers. ggplot(diamonds, aes(x=carat, color=cut))  #  'cut' 变量每种类型单独一个颜色, once a geom is added.  #aes代表美化格式 ggplot2 把 X 和 Y 轴也当作和颜色、尺寸、形状等相同的格式 设定颜色(不是基于数据框中的变量),需要在aes()外面设置  ggplot(diamonds, aes(x=carat), color="steelblue") 

geomsdocumentation

library(ggplot2) gg <- ggplot(diamonds, aes(x=carat, y=price))  gg + geom_point() 

gg + geom_point(size=1, shape=1, color="steelblue", stroke=2)  # 'stroke' 控制点边界的宽度 静态设置格式 

gg + geom_point(aes(size=carat, shape=cut, color=color, stroke=carat))  # carat, cut color 动态根据数据框中变量设置格式 

ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + geom_smooth() # Adding scatterplot geom (layer1) and smoothing geom (layer2).#或者是在geom层里面自定义美化格式ggplot(diamonds) + geom_point(aes(x=carat, y=price, color=cut)) + geom_smooth(aes(x=carat, y=price, color=cut)) 

#把不同平滑曲线整合成一条library(ggplot2) ggplot(diamonds) + geom_point(aes(x=carat, y=price, color=cut)) + geom_smooth(aes(x=carat, y=price)) # Remove color from geom_smooth ggplot(diamonds, aes(x=carat, y=price)) + geom_point(aes(color=cut)) + geom_smooth()  # same but simpler

# 把不同颜色的散点的形状设成不同的 ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=color)) + geom_point() 

添加水平或者垂直线

p1 <- gg3 + geom_hline(yintercept=5000, size=2, linetype="dotted", color="blue") # linetypes: solid, dashed, dotted, dotdash, longdash and twodash p2 <- gg3 + geom_vline(xintercept=4, size=2, color="firebrick")#添加垂直线 p3 <- gg3 + geom_segment(aes(x=4, y=5000, xend=4, yend=10000, size=2, lineend="round"))#添加方块 p4 <- gg3 + geom_segment(aes(x=carat, y=price, xend=carat, yend=price-500, color=color), size=2) + coord_cartesian(xlim=c(3, 5))  # x, y: start points. xend, yend: end points gridExtra::grid.arrange(p1,p2,p3,p4, ncol=2) 

labs

library(ggplot2) gg <- ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + labs(title="Scatterplot", x="Carat", y="Price")  # 增加坐标轴和图像标题 print(gg)#保存图形 

使用Theme函数控制标签的尺寸、颜色等,在element_text()函数内自定义具体的格式,想要清除格式,则设为element_blank()即可

gg1 <- gg + theme(plot.title=element_text(size=30, face="bold"),                    axis.text.x=element_text(size=15), #x轴文本                   axis.text.y=element_text(size=15),                   axis.title.x=element_text(size=25),                   axis.title.y=element_text(size=25)) +    scale_color_discrete(name="Cut of diamonds")  # add title and axis text, 改变图例标题 #scale_shape_discrete(name="legend title") 基于离散分类变量生成对应图例标题 #scale_shape_continuous(name="legend title") 基于连续变量 shape fill color属性 print(gg1)  

#改变图形中所有文本的颜色等 gg2 + theme(text=element_text(color="blue"))  # all text turns blue. 
#改变点的颜色 gg3 + scale_colour_manual(name='Legend', values=c('D'='grey', 'E'='red', 'F'='blue', 'G'='yellow', 'H'='black', 'I'='green', 'J'='firebrick'))

颜色表:

三种方法:

  1. Using coord_cartesian(xlim=c(x1,x2))
  2. Using xlim(c(x1,x2))
#调整x y 轴范围 gg3 + coord_cartesian(xlim=c(0,3), ylim=c(0, 5000)) + geom_smooth()  # zoom in 

#删除坐标范围之外的点 注意这时候平滑线也会相应改变 可能会误导分析 gg3 + scale_x_continuous(limits=c(0,3)) + scale_y_continuous(limits=c(0, 5000)) + geom_smooth()  # deletes the points outside limits #> Warning message: #> Removed 14714 rows containing missing values (geom_point).  

#改变x y轴标签 间隔等  gg3 + scale_x_continuous(labels=c("zero", "one", "two", "three", "four", "five")) + scale_y_continuous(breaks=seq(0, 20000, 4000))  # Y 是连续变量  X 是类型变量 

#旋转文本角度 gg3 + theme(axis.text.x=element_text(angle=45), axis.text.y=element_text(angle=45)) 
gg3 + coord_flip()  #把x和y轴对换 
#设置图形内背景网格 gg3 + theme(panel.background = element_rect(fill = 'springgreen'),   panel.grid.major = element_line(colour = "firebrick", size=3),   panel.grid.minor = element_line(colour = "blue", size=1))

图形背景与边距

#设置图形外背景颜色和边距 gg3 + theme(plot.background=element_rect(fill="yellowgreen"), plot.margin = unit(c(2, 4, 1, 3), "cm")) # top, right, bottom, left 

gg3 + scale_color_discrete(name="")  # 删除图例标题 p1 <- gg3 + theme(legend.title=element_blank())  # 删除图例标题 p2 <- gg3 + scale_color_discrete(name="Diamonds")  # 改变图例标题 gg3 + scale_colour_manual(name='Legend', values=c('D'='grey', 'E'='red', 'F'='blue', 'G'='yellow', 'H'='black', 'I'='green', 'J'='firebrick'))# 改变图例标题和点颜色  #隐藏图例标题 gg3 + theme(legend.position="none")  # hides the legend #改变图例位置 p1 <- gg3 + theme(legend.position="top")  # top / bottom / left / right 图形外 #图形内 p2 <- gg3 + theme(legend.justification=c(1,0), legend.position=c(1,0))  # legend justification 是图例的定标点 把图例的左下点作为 (0,0) gridExtra::grid.arrange(p1, p2, ncol=2)  #相当于library(gridExtra) #grid.arrange(p1, p2, ncol=2)    #改变图例具体项目的顺序 按照需求在图例中创建一个新的类型变量 df$newLegendColumn <- factor(df$legendcolumn, levels=c(new_order_of_legend_items), ordered = TRUE)   #legend.title - 图例标题 #legend.text - 图例文本 #legend.key - 图例背景框 #guides - 图例符号 gg3 + theme(legend.title = element_text(size=20, color = "firebrick"), legend.text = element_text(size=15), legend.key=element_rect(fill='steelblue')) + guides(colour = guide_legend(override.aes = list(size=2, shape=4, stroke=2)))   # legend title color and size, box color, symbol color, size and shape. 

gg1 + facet_wrap( ~ cut, ncol=3)  # cut类型变量的每种类型是一个图 设置为三列 gg1 + facet_wrap(color ~ cut)  # row: color, column: cut 左边的对应行 右边的对应列  gg1 + facet_wrap(color ~ cut, scales="free")  # row: color, column: cut 释放尺度限制  gg1 + facet_grid(color ~ cut)   # 为方便比较 把所有图片放在网格中 头信息去掉 更多的空间给图形

制作时间序列图形(使用ggfortify)

使用ggfortify包很容易直接用一个时间序列对象来画时间序列图形,而不用把数据类型转换为数据框,更多请见

#下载ggfortify包 library(devtools) install_github('sinhrks/ggfortify') 

tsggfortify 包后ggplot2::autoplotts

library(ggfortify) autoplot(AirPassengers) + labs(title="AirPassengers")  # where AirPassengers is a 'ts' object 

autoplot(AirPassengers, ts.colour = 'red', ts.linetype = 'dashed')#改变线的颜色和类型 #使用 help(autoplot.ts) (or help(autoplot.*) for any other objects) 来查询可以改变的选项

autoplot

  • zoo::zooreg
  • xts::xts
  • timeSeries::timSeries
  • tseries::irts
library(xts) autoplot(as.xts(AirPassengers), ts.colour = 'green') 

{ggplot2}

autoplot(AirPassengers, ts.geom = 'bar', fill = 'blue') autoplot(AirPassengers, ts.geom = 'point', shape = 3) 

同一张图上画多个时间序列

要求数据是数据框类型,且一列必须为时间数据

(1)转换成数据框后,累加层

# Approach 1: data(economics, package="ggplot2")  #  数据初始化 economics <- data.frame(economics)  # 转换为数据框类型 ggplot(economics) + geom_line(aes(x=date, y=pce, col="pcs")) + geom_line(aes(x=date, y=unemploy, col="unemploy")) + scale_color_discrete(name="Legend") + labs(title="Economics") # 画多条线 使用 'geom_line's 

reshape2::meltidgeom_line

# Approach 2: library(reshape2) df <- melt(economics[, c("date", "pce", "unemploy")], id="date") ggplot(df) + geom_line(aes(x=date, y=value, color=variable)) + labs(title="Economics")# plot multiple time series by melting 

条形图

但是呢,如果想具体指定y轴的值,这时候一定要在geom_bar内设置stat="identity"

# 绝对条形图: Specify both X adn Y axis. Set stat="identity" df <- aggregate(mtcars$mpg, by=list(mtcars$cyl), FUN=mean)  # 计算每个'cyl'对应的mpg变量均值 names(df) <- c("cyl", "mpg")#为数据框增加变量名字 head(df) #>   cyl    mpg #> 1   4  26.66 #> 2   6  19.74 #> 3   8  15.10  gg_bar <- ggplot(df, aes(x=cyl, y=mpg)) + geom_bar(stat = "identity")  # Y axis is explicit. 'stat=identity' print(gg_bar) 

df$cyl <- as.factor(df$cyl)#把cyl作为类型变量 gg_bar <- ggplot(df, aes(x=cyl, y=mpg)) + geom_bar(stat = "identity", aes(fill=cyl), width = 0.25) gg_bar + scale_fill_manual(values=c("4"="steelblue", "6"="firebrick", "8"="darkgreen")) 

改变颜色

library(RColorBrewer) display.brewer.all(n=20, exact.n=FALSE)  # 展示所有颜色方案 ggplot(mtcars, aes(x=cyl, y=carb, fill=factor(cyl))) + geom_bar(stat="identity") + scale_fill_brewer(palette="Reds")  # "Reds" is palette name 

gg <- ggplot(mtcars, aes(x=cyl)) p1 <- gg + geom_bar(position="dodge", aes(fill=factor(vs)))  # side-by-side 并列 p2 <- gg + geom_bar(aes(fill=factor(vs)))  # stacked 堆积 gridExtra::grid.arrange(p1, p2, ncol=2) 

# 方法 1: gg <- ggplot(economics, aes(x=date))  # 基本设置 gg + geom_line(aes(y=psavert), size=2, color="firebrick") + geom_line(aes(y=uempmed), size=1, color="steelblue", linetype="twodash")  #没有图例 # 折线类型有: solid, dashed, dotted, dotdash, longdash and twodash 

# 方法 2: library(reshape2) df_melt <- melt(economics[, c("date", "psavert", "uempmed")], id="date")  # melt by date.  gg <- ggplot(df_melt, aes(x=date))  # setup gg + geom_line(aes(y=value, color=variable), size=1) + scale_color_discrete(name="Legend")  # gets legend.有图例 

geom_ribbon()画填充时间序列图ymin

# Prepare the dataframe st_year <- start(AirPassengers)[1] #开始年份 st_month <- "01" st_date <- as.Date(paste(st_year, st_month, "01", sep="-"))#开始日期 dates <- seq.Date(st_date, length=length(AirPassengers), by="month")#生产日期数组 以月为间隔 df <- data.frame(dates, AirPassengers, AirPassengers/2)#一定要记得构建数据框 head(df) #>        dates AirPassengers AirPassengers.2 #> 1 1949-01-01           112            56.0 #> 2 1949-02-01           118            59.0 #> 3 1949-03-01           132            66.0 #> 4 1949-04-01           129            64.5 #> 5 1949-05-01           121            60.5 #> 6 1949-06-01           135            67.5 # Plot ribbon with ymin=0 gg <- ggplot(df, aes(x=dates)) + labs(title="AirPassengers") + theme(plot.title=element_text(size=30), axis.title.x=element_text(size=20), axis.text.x=element_text(size=15)) gg + geom_ribbon(aes(ymin=0, ymax=AirPassengers)) + geom_ribbon(aes(ymin=0, ymax=AirPassengers.2), fill="green") 

gg + geom_ribbon(aes(ymin=AirPassengers-20, ymax=AirPassengers+20)) + geom_ribbon(aes(ymin=AirPassengers.2-20, ymax=AirPassengers.2+20), fill="green") 

geom_area和ymin设置为alpha

# Method1: 非重叠区域 df <- reshape2::melt(economics[, c("date", "psavert", "uempmed")], id="date") head(df, 3) #>         date variable value #> 1 1967-07-01  psavert  12.5 #> 2 1967-08-01  psavert  12.5 #> 3 1967-09-01  psavert  11.7 p1 <- ggplot(df, aes(x=date)) + geom_area(aes(y=value, fill=variable)) + labs(title="Non-Overlapping - psavert and uempmed")  # Method2: 重叠区域 PS:因为没有构建成数据框,也就相应没有图例啦 p2 <- ggplot(economics, aes(x=date)) + geom_area(aes(y=psavert), fill="yellowgreen", color="yellowgreen") + geom_area(aes(y=uempmed), fill="dodgerblue", alpha=0.7, linetype="dotted") + labs(title="Overlapping - psavert and uempmed") gridExtra::grid.arrange(p1, p2, ncol=2) 

p1 <- ggplot(mtcars, aes(factor(cyl), mpg)) + geom_boxplot(aes(fill = factor(cyl)), width=0.5, outlier.colour = "dodgerblue", outlier.size = 4, outlier.shape = 16, outlier.stroke = 2, notch=T) + labs(title="Box plot")  # boxplot p2 <- ggplot(mtcars, aes(factor(cyl), mpg)) + geom_violin(aes(fill = factor(cyl)), width=0.5, trim=F) + labs(title="Violin plot (untrimmed)")  # violin plot gridExtra::grid.arrange(p1, p2, ncol=2) 

ggplot(mtcars, aes(mpg)) + geom_density(aes(fill = factor(cyl)), size=2) + labs(title="Density plot")  # Density plot 

corr <- round(cor(mtcars), 2)#生成相关系数矩阵 对称的 df <- reshape2::melt(corr) gg <- ggplot(df, aes(x=Var1, y=Var2, fill=value, label=value)) + geom_tile() + theme_bw() + geom_text(aes(label=value, size=value), color="white") + labs(title="mtcars - Correlation plot") + theme(text=element_text(size=20), legend.position="none")  library(RColorBrewer) p2 <- gg + scale_fill_distiller(palette="Reds") p3 <- gg + scale_fill_gradient2() gridExtra::grid.arrange(gg, p2, p3, ncol=3) 

相同坐标轴范围

ggplot(diamonds, aes(x=price, y=price+runif(nrow(diamonds), 100, 10000), color=cut)) + geom_point() + geom_smooth() + coord_equal()

自定义布局

gridExtra包能在一个网格中安排放置多个图形

library(gridExtra) grid.arrange(plot1, plot2, ncol=2)

切换不同的内置主题:

  1. theme_gray()
  2. theme_bw()
  3. theme_linedraw()
  4. theme_light()
  5. theme_minimal()
  6. theme_classic()
  7. theme_void()

ggthemes另外的主题

#从 CRAN下载稳定版 install.packages('ggthemes', dependencies = TRUE) #或者下载开发者版本 library("devtools") install_github(c("hadley/ggplot2", "jrnold/ggthemes"))
ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + geom_smooth() +theme_bw() + labs(title="bw Theme") 

library(grid) my_grob = grobTree(textGrob("This text is at x=0.1 and y=0.9, relative!\n Anchor point is at 0,0", x=0.1,  y=0.9, hjust=0,gp=gpar(col="firebrick", fontsize=25, fontface="bold")))  ggplot(mtcars, aes(x=cyl)) + geom_bar() + annotation_custom(my_grob) + labs(title="Annotation Example") 

保存图片

plot1 <- ggplot(mtcars, aes(x=cyl)) + geom_bar() ggsave("myggplot.png")  # 保存最近创建的图片 ggsave("myggplot.png", plot=plot1)  #保存指定的图形

相关链接:

非常有用:https://ggplot2.tidyverse.org/reference/

Cheatsheets:http://www.rstudio.com/wp-content/uploads/2015/12/ggplot2-cheatsheet-2.0.pdf

教程:http://r-statistics.co/ggplot2-Tutorial-With-R.html

https://ggplot2.tidyverse.org/

时间序列画图包:http://rpubs.com/sinhrks/plot_ts

主题“https://github.com/jrnold/ggthemes

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!