Replace entire strings based on partial match

前端 未结 2 398
南旧
南旧 2020-12-06 11:45

New to R. Looking to replace the entire string if there is a partial match.

d = c(\"SDS0G2 Blue\", \"Blue SSC2CWA3\", \"Blue SA2M1GC\", \"SA5 Blue CSQ5\")
         


        
相关标签:
2条回答
  • 2020-12-06 12:25

    If you did want to keep the variable as a factor and replace multiple partial matches at once, the following function will work (example from another question).

    
    clrs <- c("blue", "light blue", "red", "rose", "ruby", "yellow", "green", "black", "brown", "royal blue")
    dfx <- data.frame(colors1=clrs, colors2 = clrs, Amount=sample(100,10))
    
    # Function to replace levels with regex matching
    make_levels <- function(.f, patterns, replacement = NULL, ignore.case = FALSE) {
    
      lvls <- levels(.f)
    
      # Replacements can be listed in the replacement argument, taken as names in patterns, or the patterns themselves.
      if(is.null(replacement)) {
        if(is.null(names(patterns)))
          replacement <- patterns
        else
          replacement <- names(patterns)
      }
    
      # Find matching levels
      lvl_match <- setNames(vector("list", length = length(patterns)), replacement)
      for(i in seq_along(patterns))
        lvl_match[[replacement[i]]] <- grep(patterns[i], lvls, ignore.case = ignore.case, value = TRUE)
    
      # Append other non-matching levels
      lvl_other <- setdiff(lvls, unlist(lvl_match))
      lvl_all <- append(
        lvl_match, 
        setNames(as.list(lvl_other), lvl_other)
      )
    
      return(lvl_all)
    
    }
    
    # Replace levels
    levels(dfx$colors2) <- make_levels(.f = dfx$colors2, patterns = c(Blue = "blue", Red = "red|rose|ruby"))
    
    dfx
    #>       colors1 colors2 Amount
    #> 1        blue    Blue     75
    #> 2  light blue    Blue     55
    #> 3         red     Red     47
    #> 4        rose     Red     83
    #> 5        ruby     Red     56
    #> 6      yellow  yellow     10
    #> 7       green   green     25
    #> 8       black   black     29
    #> 9       brown   brown     23
    #> 10 royal blue    Blue     24
    

    Created on 2020-04-18 by the reprex package (v0.3.0)

    0 讨论(0)
  • 2020-12-06 12:29

    I'd suggest using grepl to find the indices and replace those indices with "Red":

    d = c("SDS0G2 Blue", "Blue SSC2CWA3", "Blue SA2M1GC", "SA5 Blue CSQ5", "ABCDE")
    d[grepl("Blue", d, ignore.case=FALSE)] <- "Red"
    d
    # [1] "Red"   "Red"   "Red"   "Red"   "ABCDE"
    
    0 讨论(0)
提交回复
热议问题