Generating summary table at bottom of dataframe

自闭症网瘾萝莉.ら 提交于 2020-02-06 08:01:09

问题


Please Help!!

I have the following dataframe (named Final_APOL1). I need to generate a summary table like the second dataframe shown. Once generated is it possible to save this as a separate output csv that will be saved to the same directory?

The summary table runs through the risk allele count variables and places them into categories so population frequencies can be calculated for each mutation.

Code for risk allele numbers 1, 2 or no

 "no APOL1 Risk Alleles" = ifelse(`Final genotype of APOL1` == "G0/G0", 1, NA),

    "1 APOL1 Risk Alleles" = 
      ifelse(`Final genotype of APOL1` %in% c("G0/G2", "G1^{GM}/G0", "G1^{G+}/G0"), 1, NA),

    "2 APOL1 Risk Alleles" =
      ifelse(`Final genotype of APOL1` %in% c("G1^{GM}/G1^{GM}", "G1^{GM}/G2", "G2/G2"), 1, NA))

The genotypes within Final_APOL1 fit into the following categories for Summary_table

G1^{GM}/G2 and G1^{G+}/G2 = G1/G2 for Summary_table
G1^{GM}/G1^{GM} = G1/G1 for Summary_table
G1^{GM}/G0 and G1^{G+}/G0 = G1/G0 for Summary_table
G2/G2 = G2/G2 for Summary_table
G0/G2 = G0/G2 for Summary_table

dataframe (Final_APOL1)

structure(list(Well = structure(1:96, .Label = c("A01", "A02", 
"A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10", "A11", 
"A12", "B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", 
"B09", "B10", "B11", "B12", "C01", "C02", "C03", "C04", "C05", 
"C06", "C07", "C08", "C09", "C10", "C11", "C12", "D01", "D02", 
"D03", "D04", "D05", "D06", "D07", "D08", "D09", "D10", "D11", 
"D12", "E01", "E02", "E03", "E04", "E05", "E06", "E07", "E08", 
"E09", "E10", "E11", "E12", "F01", "F02", "F03", "F04", "F05", 
"F06", "F07", "F08", "F09", "F10", "F11", "F12", "G01", "G02", 
"G03", "G04", "G05", "G06", "G07", "G08", "G09", "G10", "G11", 
"G12", "H01", "H02", "H03", "H04", "H05", "H06", "H07", "H08", 
"H09", "H10", "H11", "H12"), class = "factor"), G1_1_1 = c("Blank", 
"Blank", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", 
"+", "G1^{S342G}", "G1^{S342G}", "Blank", "Blank", "+", "+", 
"+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", "Blank", 
"Blank", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"Blank", "Blank", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", 
"+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", "+", "+", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", 
"+", "+", "G1^{S342G}", "G1^{S342G}"), G1_1_2 = c("Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"
), G1_2_1 = c("Blank", "Blank", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "Blank", "Blank", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "Blank", "Blank", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "+", "+", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "+", "+", "+", "+", 
"+", "+", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}"), G1_2_2 = c("Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "+", "+", "+", "+", "Blank", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", "+", 
"+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+"), G2_1 = c("Blank", "Blank", 
"+", "+", "+", "+", "G2", "G2", "+", "+", "G2", "G2", "Blank", 
"Blank", "+", "+", "G2", "G2", "G2", "G2", "+", "+", "+", "+", 
"Blank", "Blank", "G2", "G2", "+", "+", "+", "+", "+", "+", "+", 
"+", "Blank", "Blank", "G2", "G2", "G2", "G2", "+", "+", "+", 
"+", "+", "+", "G2", "G2", "G2", "G2", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "G2", "G2", "G2", 
"G2", "G2", "G2", "G2", "G2", "+", "+", "+", "+", "+", "+", "+", 
"+", "G2", "G2", "+", "+", "+", "+", "G2", "G2", "+", "+", "+", 
"+", "+", "+"), G2_2 = c("Blank", "Blank", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+"), `Final genotype of APOL1` = c("NA", 
"NA", "G0/G0", "G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G2", "G1^{GM}/G2", 
"G0/G0", "G0/G0", "G1^{GM}/G2", "G1^{GM}/G2", "NA", "NA", "G0/G0", 
"G0/G0", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G1^{GM}/G0", "G1^{GM}/G0", 
"G0/G0", "G0/G0", "NA", "NA", "G0/G2", "G0/G2", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G0", "NA", "NA", "G0/G2", "G0/G2", "G1^{GM}/G2", 
"G1^{GM}/G2", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G0", "G0/G0", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G2", "G1^{GM}/G2", "G0/G2", "G0/G2", "G0/G0", 
"G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G1^{GM}", "G1^{GM}/G1^{GM}", 
"G0/G0", "G0/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G0", "G0/G0", 
NA, "G0/G0", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G0/G2", 
"G0/G2", "G0/G2", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", 
"G0/G0", "G0/G0", "G0/G0", "G0/G0", "G0/G2", "G0/G2", "G1^{GM}/G1^{GM}", 
"G1^{GM}/G1^{GM}", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G2", "G0/G2", 
"G0/G0", "G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G0", "G1^{GM}/G0"
), `no APOL1 Risk Alleles` = c(NA, NA, 1, 1, 1, 1, NA, NA, 1, 
1, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1, 1, NA, NA, 1, 
1, NA, NA, 1, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1, 
1, NA, NA), `1 APOL1 Risk Alleles` = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1, 1, 1, 1, NA, 
NA, NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, 1, 1, NA, NA, 
1, 1, NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, 1, NA, NA, NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
NA, NA, NA, NA, 1, 1, NA, NA, 1, 1, 1, 1, NA, NA, NA, NA, 1, 
1), `2 APOL1 Risk Alleles` = c(NA, NA, NA, NA, NA, NA, 1, 1, 
NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), Sample_Flag = c(FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE)), row.names = c(NA, -96L), class = "data.frame")

dataframe (Summary_table)

structure(list(X__1 = c("Total number", "Percentage", "G1/G1 %", 
"G2/G2 %", "G1/G2 %", "G0/G1 %", "G0/G2 %"), `no APOL1 Risk Alleles` = c(29, 
33.33, NA, NA, NA, NA, NA), `1 APOL1 Risk Alleles` = c(46, 52.88, 
NA, NA, NA, 27.59, 25.29), `2 APOL1 Risk Alleles` = c(12, 13.79, 
4.6, NA, 9.19, NA, NA), `Total Number` = c(87, NA, NA, NA, NA, 
NA, NA)), row.names = c(NA, -7L), class = c("tbl_df", "tbl", 
"data.frame"))

回答1:


I'm generally not a fan of storing row summaries (e.g., total) within the frame itself, as any follow-on row summarizing will include that number; I'm assuming that this is purely for aesthetics, and so including it is not horrible

library(dplyr)

tmp1 <- as_tibble(Final_APOL_1) %>%
  # discard G* fields, they seem unnecessary?
  select(-starts_with("G")) %>%
  # remove an reference to NA or the literal string "NA", they seem distracting?
  filter(
    !is.na(`Final genotype of APOL1`),
    `Final genotype of APOL1` != "NA"
  ) %>%
  mutate(
    `X__1` = case_when(
      `Final genotype of APOL1` %in% c("G1^{GM}/G2", "G1^{G+}/G2") ~ "G1/G2",
      `Final genotype of APOL1` == "G1^{GM}/G1^{GM}"               ~ "G1/G1",
      `Final genotype of APOL1` %in% c("G1^{GM}/G0", "G1^{G+}/G0") ~ "G1/G0",
      TRUE ~ `Final genotype of APOL1`
    )
  ) %>%
  select(-Well, -`Final genotype of APOL1`, -Sample_Flag) %>%
  group_by(X__1) %>%
  summarize_all(~ sum(.))

totals <- tmp1 %>%
  summarize_at(vars(-X__1), ~ sum(., na.rm = TRUE)) %>%
  mutate(
    `Total Number` = sum(.),
    X__1 = "Total number"
  )
percents <- totals %>%
  mutate_at(vars(-X__1, -`Total Number`), ~ 100 * . / `Total Number`) %>%
  mutate(
    `Total Number` = NA_real_,
    X__1 = "Percentage"
  )

Final steps and output:

tmp1 %>%
  mutate_at(vars(-X__1), ~ 100 * . / totals$`Total Number`) %>%
  bind_rows(., totals, percents)
# # A tibble: 7 x 5
#   X__1         `no APOL1 Risk Allele~ `1 APOL1 Risk Allele~ `2 APOL1 Risk Allele~ `Total Number`
#   <chr>                         <dbl>                 <dbl>                 <dbl>          <dbl>
# 1 G0/G0                          33.3                  NA                   NA                NA
# 2 G0/G2                          NA                    25.3                 NA                NA
# 3 G1/G0                          NA                    27.6                 NA                NA
# 4 G1/G1                          NA                    NA                    4.60             NA
# 5 G1/G2                          NA                    NA                    9.20             NA
# 6 Total number                   29                    46                   12                87
# 7 Percentage                     33.3                  52.9                 13.8              NA


来源:https://stackoverflow.com/questions/59989724/generating-summary-table-at-bottom-of-dataframe

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!