Multiply Columns by Substrings in R

你离开我真会死。 提交于 2019-12-06 11:31:45

We create a function that finds the specific names, and then calculates the rowwise products. The heavy lifter of this function is the mapply function. We add a final step to rename the resultant data.frame.

fun1 <- function(data){
  qty_names <- names(data)[grepl(pattern = "Qty", x = names(data))]

  p_names <- names(data)[grepl(pattern = "P", x = names(data))]

  setNames(
    data.frame(
      mapply(qty_names, p_names, 
             FUN = function(n1, n2) apply(data[c(n1,n2)], 1, prod))),
      paste0('F_Total_', 1:length(p_names)))


}

cbind(dat, fun1(dat))

  Basket F_Type_1 F_Qty_1 F_P_1 F_Type_2 F_Qty_2 F_P_2 F_Total_1 F_Total_2
1    AAA    Apple      10   2.5   Banana       9     2        25        18
2    BBB    Peach       5   6.0    Melon      20     5        30       100

In base R, you can use the [[]] form to access column with a string, so you can loop easily

df <- read.table(text = "Basket   F_Type_1   F_Qty_1   F_P_1   F_Type_2   F_Qty_2   F_P_2 
AAA       Apple     10        2.5     Banana     9         2
BBB       Peach     5         6       Melon      20        5",header = T)

for(i in 1:2)
{
  df[[paste0("F_Total_",i)]] <- as.numeric(df[[paste0("F_P_",i)]])*as.numeric(df[[paste0("F_Qty_",i)]])
}

  Basket F_Type_1 F_Qty_1 F_P_1 F_Type_2 F_Qty_2 F_P_2 F_Total_1 F_Total_2
1    AAA    Apple      10   2.5   Banana       9     2        25        18
2    BBB    Peach       5   6.0    Melon      20     5        30       100

Here's a method using tidyverse functions for reshaping your data. Basically using tidyr verbs, we reshape your data into a more "tidy" format.

# library(dplyr); library(tidyr)
dd %>% select(Basket, contains("_Qty_"), contains("_P_")) %>% 
  gather("key", "value", -Basket) %>% 
  separate(key, c("F", "Val", "Grp")) %>% 
  group_by(Basket, Grp) %>% 
  spread(Val, value) %>% 
  mutate(Total=P*Qty, GrpN=paste0("Total_", Grp)) %>% 
  ungroup() %>% 
  select(Basket, GrpN, Total) %>% 
  spread(GrpN ,Total)

#   Basket Total_1 Total_2
#   <fct>    <dbl>   <dbl>
# 1 AAA         25      18
# 2 BBB         30     100

Create data

library(data.table)
df <- fread("
Basket   F_Type_1   F_Qty_1   F_P_1   F_Type_2   F_Qty_2   F_P_2 
AAA       Apple     10        2.5     Banana     9         2
BBB       Peach     5         6       Melon      20        5
")

df
#    Basket F_Type_1 F_Qty_1 F_P_1 F_Type_2 F_Qty_2 F_P_2
# 1:    AAA    Apple      10   2.5   Banana       9     2
# 2:    BBB    Peach       5   6.0    Melon      20     5

For numbers from 1 to sum(grepl('F_P_', names(df))), set F_Total_{number} as F_Qty_{number}*F_P_{number}

for(i in seq(sum(grepl('F_P_', names(df)))))
  df[, paste0('F_Total_', i) := Reduce(`*`, .SD)
     , .SDcols = paste0(c('F_Qty_', 'F_P_'), i)]

df
#    Basket F_Type_1 F_Qty_1 F_P_1 F_Type_2 F_Qty_2 F_P_2 F_Total_1 F_Total_2
# 1:    AAA    Apple      10   2.5   Banana       9     2        25        18
# 2:    BBB    Peach       5   6.0    Melon      20     5        30       100

Or in base R with df as a data.frame

for(i in seq(sum(grepl('F_P_', names(df)))))
  df[paste0('F_Total_', i)] <- Reduce(`*`, df[paste0(c('F_Qty_', 'F_P_'), i)])
setNames(df[grepl("F_Qty",names(df))]*df[grepl("F_P",names(df))],paste0("F_Total_",1:2))

  F_Total_1 F_Total_2
1        25        18
2        30       100

Suggestion: If you could store your data in the long format, it will make future maintenance much cleaner.

Basket     Item     Type    Qty Price    Total
AAA           1    Apple     10   2.5       25
AAA           2    Banana     9     2       18
BBB           1    Peach      5   6.0       30
BBB           2    Melon     20     5      100
BBB           3    Orange    11   2.7     29.7

Then, when you really need the data in the above format, use the data.table package to transpose the data.

library(data.table)
fruits <- data.frame("Basket" = c("AAA", "AAA", "BBB", "BBB", "BBB"),
                 "Item" = c(1,2,1,2,3),
                 "Type" = c("Apple", "Banana", "Peach", "Melon", "Orange"),
                 "Qty" = c(10, 9, 5, 20, 11),
                 "Price" = c(2.5, 2, 6.0, 5, 2.7)
)

fruits$Total <- fruits$Qty * fruits$Price
fruits.New <- data.table::dcast(setDT(fruits),
                            formula = Basket ~ Item,
                            value.var = c("Type", "Price", "Qty", "Total"))

This way, you only have to maintain one formula and not to worry about apply, while giving you the flexibility to have variable column numbers too.

The column name should be relatively easy to fix if it's important.

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!