问题
I have seen a lot of questions relating to formatting times, but none in the particular imported format that I have:
Time <- c(
"22 hours 3 minutes 22 seconds",
"170 hours 15 minutes 20 seconds",
"39 seconds",
"2 days 6 hours 44 minutes 17 seconds",
"9 hours 54 minutes 36 seconds",
"357 hours 23 minutes 28 seconds",
"464 hours 30 minutes 7 seconds",
"51 seconds",
"31 hours 39 minutes 2 seconds",
"355 hours 29 minutes 10 seconds")
Some times contain only "seconds", and others "minutes and seconds", "days, hours, minutes and seconds", "days and seconds", etc. There are also NA values that I need to keep. How can I get this character vector to calculate (i.e., add days, hours, minutes, seconds) numeric total days?
For example:
Time
8.10
19.3
0.68
2.28
48.1
0.00
0.70
0.1
3.2
13.9
Thank you!
EDIT
Old question, but a simple lubridate
call does the trick now:
(period_to_seconds(period(time)) / 86400) %>% round(2)
This also does the trick with no packages other than needing %>%
for readability:
Time_vec <- mapply(function(tt, to_days) {
ifelse(grepl(tt, Time), gsub(paste0("^.*?(\\d+) ", tt, ".*$"), "\\1", Time), 0) %>%
as.numeric() / to_days
},
c("day", "hour", "minute", "second"),
c(1, 24, 1440, 86400)
) %>%
apply(1, sum) %>%
round(2)
In my actual data, only one value was different than the lubridate
solution, 0.96
vs 0.97
.
回答1:
again, without packages and a little regex
Time <- c(
"22 hours 3 minutes 22 seconds",
"170 hours 15 minutes 20 seconds",
"39 seconds",
"6 hours 44 minutes 17 seconds",
"9 hours 54 minutes 36 seconds",
"357 hours 23 minutes 28 seconds",
"464 hours 30 minutes 7 seconds",
"51 seconds",
"31 hours 39 minutes 2 seconds",
"355 hours 29 minutes 10 seconds")
pat <- '(?:(\\d+) hours )?(?:(\\d+) minutes )?(?:(\\d+) seconds)?'
m <- regexpr(pat, Time, perl = TRUE)
m_st <- attr(m, 'capture.start')
m_ln <- attr(m, 'capture.length')
(mm <- mapply(function(x, y) as.numeric(substr(Time, x, y)),
data.frame(m_st), data.frame(m_st + m_ln - 1)))
(dd <- setNames(data.frame(mm), c('h','m','s')))
# h m s
# 1 22 3 22
# 2 170 15 20
# 3 NA NA 39
# 4 6 44 17
# 5 9 54 36
# 6 357 23 28
# 7 464 30 7
# 8 NA NA 51
# 9 31 39 2
# 10 355 29 10
round(rowSums(dd / data.frame(h = rep(24, nrow(dd)), m = 24 * 60, s = 24 * 60 * 60),
na.rm = TRUE), 3)
# [1] 0.919 7.094 0.000 0.281 0.413 14.891 19.354 0.001 1.319 14.812
回答2:
I recommend you to install the stringr package. Then do this
library(stringr)
options(digits=7)
returndays <- function(alist){
val <-length(alist)
#print(val)
hr <- vector()
min <- vector()
sec <- vector()
day <- vector()
for (i in 1:val){
myinfo <-"([1-9][0-9]{0,2}) hours"
hr[i] <-str_match(alist[i],myinfo)[,2]
myinfo2 <-"([1-9][0-9]{0,2}) minutes"
min[i] <-str_match(alist[i],myinfo2)[,2]
myinfo3 <-"([1-9][0-9]{0,2}) seconds"
sec[i] <-str_match(alist[i],myinfo3)[,2]
h <- as.numeric(hr[i])/24
m <- as.numeric(min[i])/1440
s <- as.numeric(sec[i])/86400
day[i] <- sum(h+m+s,na.rm = TRUE)
}
return(day)
}
days <-returndays(Time)
days
[1] 0.9190046 7.0939815 0.0000000 0.2807523 0.4129167 14.8912963 19.3542477 0.0000000 1.3187731
[10] 14.8119213
回答3:
lubridate
offers the function period()
that can conveniently convert hours, minutes, seconds etc. to a perdiod
object, which can be easily converted to seconds:
period(days = 3, hours = 10, minutes = 3, seconds = 37)
## [1] "3d 10H 3M 37S"
I use this function to convert your character strings:
to_days <- function(hms_char) {
# split string
v <- strsplit(hms_char, " ")[[1]]
# get numbers
idx <- seq(1, by = 2, length = length(v)/2)
nums <- as.list(v[idx])
# get units and use them as names
names(nums) <- v[-idx]
# apply functions, sum and convert to days
duration <- do.call(period, nums)
days <- period_to_seconds(duration)/86400
return(days)
}
It works on a single character string, so you will need to use sapply
to convert the complete Time
:
sapply(Time, to_days, USE.NAMES = FALSE)
## [1] 9.190046e-01 7.093981e+00 4.513889e-04 2.807523e-01 4.129167e-01 1.489130e+01 1.935425e+01
## [8] 5.902778e-04 1.318773e+00 1.481192e+01
回答4:
lubridate
is useful here. hms
automatically extracts hours, minutes, and seconds (saving you some regex), and time_length
converts to days.
> library(lubridate)
> time_length(hms(Time), 'day')
estimate only: convert periods to intervals for accuracy
[1] 0.9190046 7.0939815 NA 0.2807523 0.4129167 14.8912963 19.3542477 NA
[9] 1.3187731 14.8119213
However hms
fails to parse if there aren't three numbers, so a little pre-scrubbing can help:
> library(stringr)
> Time2 <- sapply(Time, function(x){paste(paste(rep(0, 3 - str_count(x, '[0-9]+')), collapse = ' '), x)})
> time_length(hms(Time2), 'day')
estimate only: convert periods to intervals for accuracy
[1] 9.190046e-01 7.093981e+00 4.513889e-04 2.807523e-01 4.129167e-01 1.489130e+01 1.935425e+01
[8] 5.902778e-04 1.318773e+00 1.481192e+01
来源:https://stackoverflow.com/questions/35087839/calculate-character-string-days-hours-minutes-seconds-to-numeric-total-days