Calculate character string “days, hours, minutes, seconds” to numeric total days [duplicate]

我们两清 提交于 2019-12-01 20:55:44

again, without packages and a little regex

Time <- c(
  "22 hours 3 minutes 22 seconds", 
  "170 hours 15 minutes 20 seconds", 
  "39 seconds", 
  "6 hours 44 minutes 17 seconds", 
  "9 hours 54 minutes 36 seconds", 
  "357 hours 23 minutes 28 seconds", 
  "464 hours 30 minutes 7 seconds", 
  "51 seconds", 
  "31 hours 39 minutes 2 seconds", 
  "355 hours 29 minutes 10 seconds")

pat <- '(?:(\\d+) hours )?(?:(\\d+) minutes )?(?:(\\d+) seconds)?'
m <- regexpr(pat, Time, perl = TRUE)

m_st <- attr(m, 'capture.start')
m_ln <- attr(m, 'capture.length')

(mm <- mapply(function(x, y) as.numeric(substr(Time, x, y)),
              data.frame(m_st), data.frame(m_st + m_ln - 1)))

(dd <- setNames(data.frame(mm), c('h','m','s')))
#      h  m  s
# 1   22  3 22
# 2  170 15 20
# 3   NA NA 39
# 4    6 44 17
# 5    9 54 36
# 6  357 23 28
# 7  464 30  7
# 8   NA NA 51
# 9   31 39  2
# 10 355 29 10

round(rowSums(dd / data.frame(h = rep(24, nrow(dd)), m = 24 * 60, s = 24 * 60 * 60),
        na.rm = TRUE), 3)
# [1]  0.919  7.094  0.000  0.281  0.413 14.891 19.354  0.001  1.319 14.812

I recommend you to install the stringr package. Then do this

library(stringr)
options(digits=7)
returndays <- function(alist){
        val <-length(alist)
        #print(val)
        hr <- vector()
        min <- vector()
        sec <- vector()
        day <- vector()
        for (i in 1:val){
                myinfo <-"([1-9][0-9]{0,2}) hours" 
                hr[i] <-str_match(alist[i],myinfo)[,2]
                myinfo2 <-"([1-9][0-9]{0,2}) minutes" 
                min[i] <-str_match(alist[i],myinfo2)[,2]
                myinfo3 <-"([1-9][0-9]{0,2}) seconds" 
                sec[i] <-str_match(alist[i],myinfo3)[,2]

                h <- as.numeric(hr[i])/24

                m <- as.numeric(min[i])/1440

                s <- as.numeric(sec[i])/86400

               day[i] <- sum(h+m+s,na.rm = TRUE)


        }

        return(day)

}

days <-returndays(Time)

days

[1]  0.9190046  7.0939815  0.0000000  0.2807523  0.4129167 14.8912963 19.3542477  0.0000000  1.3187731
[10] 14.8119213

lubridate offers the function period() that can conveniently convert hours, minutes, seconds etc. to a perdiod object, which can be easily converted to seconds:

period(days = 3, hours = 10, minutes = 3, seconds = 37)
## [1] "3d 10H 3M 37S"

I use this function to convert your character strings:

to_days <- function(hms_char) {

   # split string
   v <- strsplit(hms_char, " ")[[1]]
   # get numbers
   idx <- seq(1, by = 2, length = length(v)/2)
   nums <- as.list(v[idx])
   # get units and use them as names
   names(nums) <- v[-idx]
   # apply functions, sum and convert to days
   duration <- do.call(period, nums)
   days <- period_to_seconds(duration)/86400

   return(days)
}

It works on a single character string, so you will need to use sapply to convert the complete Time:

sapply(Time, to_days, USE.NAMES = FALSE)
## [1] 9.190046e-01 7.093981e+00 4.513889e-04 2.807523e-01 4.129167e-01 1.489130e+01 1.935425e+01
## [8] 5.902778e-04 1.318773e+00 1.481192e+01

lubridate is useful here. hms automatically extracts hours, minutes, and seconds (saving you some regex), and time_length converts to days.

> library(lubridate)
> time_length(hms(Time), 'day')
estimate only: convert periods to intervals for accuracy
 [1]  0.9190046  7.0939815         NA  0.2807523  0.4129167 14.8912963 19.3542477         NA
 [9]  1.3187731 14.8119213

However hms fails to parse if there aren't three numbers, so a little pre-scrubbing can help:

> library(stringr)
> Time2 <- sapply(Time, function(x){paste(paste(rep(0, 3 - str_count(x, '[0-9]+')), collapse = ' '), x)})
> time_length(hms(Time2), 'day')
estimate only: convert periods to intervals for accuracy
 [1] 9.190046e-01 7.093981e+00 4.513889e-04 2.807523e-01 4.129167e-01 1.489130e+01 1.935425e+01
 [8] 5.902778e-04 1.318773e+00 1.481192e+01
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!