id first middle last Age
1 Carol Jenny Smith 15
2 Sarah Carol Roberts 20
3 Josh David Richardson 22
I am t
Using tidyverse
library(tidyverse)
f1 <- function(data, wordToCompare, colsToCompare) {
wordToCompare <- enquo(wordToCompare)
data %>%
select(colsToCompare) %>%
mutate(!! wordToCompare := map(., ~
.x == as_label(wordToCompare)) %>%
reduce(`|`) %>%
as.integer)
}
f1(df1, Carol, c("first", 'middle', 'last'))
# first middle last Carol
#1 Carol Jenny Smith 1
#2 Sarah Carol Roberts 1
#3 Josh David Richardson 0
f1(df1, Sarah, c("first", 'middle', 'last'))
# first middle last Sarah
#1 Carol Jenny Smith 0
#2 Sarah Carol Roberts 1
#3 Josh David Richardson 0
Or this can also be done with pmap
df1 %>%
mutate(Carol = pmap_int(.[c('first', 'middle', 'last')],
~ +('Carol' %in% c(...))))
# id first middle last Age Carol
#1 1 Carol Jenny Smith 15 1
#2 2 Sarah Carol Roberts 20 1
#3 3 Josh David Richardson 22 0
which can be wrapped into a function
f2 <- function(data, wordToCompare, colsToCompare) {
wordToCompare <- enquo(wordToCompare)
data %>%
mutate(!! wordToCompare := pmap_int(.[colsToCompare],
~ +(as_label(wordToCompare) %in% c(...))))
}
f2(df1, Carol, c("first", 'middle', 'last'))
# id first middle last Age Carol
#1 1 Carol Jenny Smith 15 1
#2 2 Sarah Carol Roberts 20 1
#3 3 Josh David Richardson 22 0
NOTE: Both the tidyverse methods doesn't require any reshaping
With base R, we can loop through the 'first', 'middle', 'last' column and use == for comparison to get a list of logical vectors, which we Reduce to a single logical vector with | and coerce it to binary with +
df1$Carol <- +(Reduce(`|`, lapply(df1[2:4], `==`, 'Carol')))
df1
# id first middle last Age Carol
#1 1 Carol Jenny Smith 15 1
#2 2 Sarah Carol Roberts 20 1
#3 3 Josh David Richardson 22 0
NOTE: There are dupes for this post. For e.g. here
df1 <- structure(list(id = 1:3, first = c("Carol", "Sarah", "Josh"),
middle = c("Jenny", "Carol", "David"), last = c("Smith",
"Roberts", "Richardson"), Age = c(15L, 20L, 22L)),
class = "data.frame", row.names = c(NA,
-3L))
A solution using apply family
df$Carol = lapply(1:nrow(df), function(x) any(df[x,]=="Carol))
We can use rowSums
df$Carol <- as.integer(rowSums(df[2:4] == "Carol") > 0)
df
# id first middle last Age Carol
#1 1 Carol Jenny Smith 15 1
#2 2 Sarah Carol Roberts 20 1
#3 3 Josh David Richardson 22 0
If we need it as a function
fun <- function(df, value) {
as.integer(rowSums(df[2:4] == value) > 0)
}
fun(df, "Carol")
#[1] 1 1 0
fun(df, "Sarah")
#[1] 0 1 0
but this assumes the columns you want to search are at position 2:4.
To give more flexibility with column position
fun <- function(df, cols, value) {
as.integer(rowSums(df[cols] == value) > 0)
}
fun(df, c("first", "last","middle"), "Carol")
#[1] 1 1 0
fun(df, c("first", "last","middle"), "Sarah")
#[1] 0 1 0
Here's a tidyverse option. We first reshape the data to long format, group by id, and find levels of id that have the desired name in at least one row. Then we reshape back to wide format.
library(tidyverse)
df %>%
gather(key, value, first:last) %>%
group_by(id) %>%
mutate(Carol = as.numeric(any(value=="Carol"))) %>%
spread(key, value)
id Age Carol first last middle 1 1 15 1 Carol Smith Jenny 2 2 20 1 Sarah Roberts Carol 3 3 22 0 Josh Richardson David
Or, as a function:
find.target = function(data, target) {
data %>%
gather(key, value, first:last) %>%
group_by(id) %>%
mutate(!!target := as.numeric(any(value==target))) %>%
spread(key, value) %>%
# Move new target column to end
select(-target, target)
}
find.target(df, "Carol")
find.target(df, "Sarah")
You could also do several at once. For example:
map(c("Sarah", "Carol", "David"), ~ find.target(df, .x)) %>%
reduce(left_join)
id Age first last middle Sarah Carol David 1 1 15 Carol Smith Jenny 0 1 0 2 2 20 Sarah Roberts Carol 1 1 0 3 3 22 Josh Richardson David 0 0 1
Another option using mutate and if_else() as you suggested:
library(tidyverse)
data = read_table(" id first middle last Age
1 Carol Jenny Smith 15
2 Sarah Carol Roberts 20
3 Josh David Richardson 22")
data %>%
mutate(carol = if_else(first == "Carol" | middle == "Carol" | last == "Carol",
"yes",
"no"))
Result:
# A tibble: 3 x 6
id first middle last Age carol
<dbl> <chr> <chr> <chr> <dbl> <chr>
1 1 Carol Jenny Smith 15 yes
2 2 Sarah Carol Roberts 20 yes
3 3 Josh David Richardson 22 no