Rank values column-wise

假如想象 提交于 2019-12-11 07:32:35

问题


I want to rank values column-wise.

I have the following dataframe:

dput(test)
structure(list(Name = c("A", "B", "C", "D"), Margin = c(744, 
3196.4722, 0, 394), T1 = c(420, 200, 2150, 70), T2 = c(630, 285, 
2365, 84), T3 = c(630, 335, 2580, 105), T4 = c(666, 410, 2795, 
128), T5 = c(2244, 2961.7931, 3010, 142), T6 = c(2244, 3652.472, 
3440, 151), T7 = c(2244, 3722.472, 3870, 168), T8 = c(2244, 3887.472, 
5160, 187), T9 = c(2244, 4112.472, 6450, 225), T10 = c(2244, 
4337.472, 6450, 225), T11 = c(798, 3567.472, 4300, 112), T12 = c(630, 
3582.472, 4300, 111), T13 = c(702, 3582.472, 4300, 112), T14 = c(3600, 
4637.472, 3440, 78), T15 = c(744, 3067.306, 2580, 274), T16 = c(744, 
2770.5666, 2580, 197), T17 = c(744, 3138.806, 2580, 80), T18 = c(2244, 
3920.0836, 3870, 401), T19 = c(2244, 2789.1117, 1290, 127)), .Names = c("Name", 
"Margin", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", 
"T10", "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", 
"T19"), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"
))

Each row has unique ID in name, and I want to rank the columns to determine which column is equal or least small to the value in the margin column.

The ideal output would be:

Name    Margin    Closest_Column
 A      744.000        T15

Break ties could be random.

My attempt:

nm1 <- paste("rank", names(test)[3:21], sep="_")
test[nm1] <-  mutate_all(test[3:21],funs(rank(., ties.method="first")))

回答1:


If we need to use tidyverse, one approach is rowwise and then find the index of the minimum difference between the 'Margin' and other columns to get the column names

test %>% 
      rowwise() %>% 
      do(data.frame(.[1:2], Closest_column = names(.)[3:21][which.min(abs(.[[2]]-
                        unlist(.[3:21])))]))
# A tibble: 4 x 3
#    Name   Margin Closest_column
#* <chr>    <dbl>          <chr>
#1     A  744.000            T15
#2     B 3196.472            T17
#3     C    0.000            T19
#4     D  394.000            T18

Or another option is

library(tidyverse)
gather(test, Closest_column, val, T1:T19) %>%
        group_by(Name) %>% 
        slice(which.min(abs(Margin - val))) %>%
        select(-val)
# A tibble: 4 x 3
# Groups:   Name [4]
#    Name   Margin Closest_column
#  <chr>    <dbl>          <chr>
#1     A  744.000            T15
#2     B 3196.472            T17
#3     C    0.000            T19
#4     D  394.000            T18

With base R an efficient option would be max.col

cbind(test[1:2], 
    Closest_column = names(test)[3:21][max.col(-abs(test[3:21]-test[[2]]), 'first')])
#    Name   Margin Closest_column
#1    A  744.000            T15
#2    B 3196.472            T17
#3    C    0.000            T19
#4    D  394.000            T18



回答2:


I would go for the long format

library(tidyr)
library(dplyr)

test %>%
  gather(Variable, Value, -(Name:Margin)) %>%
  group_by(Name, Margin) %>%
  summarise(Closest = Variable[which.min(abs(Value - Margin))])

# A tibble: 4 x 3
# Groups:   Name [?]
#    Name   Margin Closest
#   <chr>    <dbl>   <chr>
# 1     A  744.000     T15
# 2     B 3196.472     T17
# 3     C    0.000     T19
# 4     D  394.000     T18

Or using data.table

library(data.table)
melt(setDT(test), 1:2
     )[, .(Closest = variable[which.min(abs(value - Margin))]),
         by = .(Name, Margin)]
#    Name   Margin Closest
# 1:    A  744.000     T15
# 2:    B 3196.472     T17
# 3:    C    0.000     T19
# 4:    D  394.000     T18



回答3:


Use cbind.data.frame to align the first two columns to a column constructed from the name chosen as the one with the minimum of absolute value of column minus Margin:

cbind( test[1:2], Closest_Column =
    apply(test[-1], 1, function(x) names(x[-1])[which.min( abs(x[-1]-x[1]))] ) )
  Name   Margin Closest_Column
1    A  744.000            T15
2    B 3196.472            T17
3    C    0.000            T19
4    D  394.000            T18


来源:https://stackoverflow.com/questions/45562700/rank-values-column-wise

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!