问题
I am trying to extract the daily minimum zenith angle in a dataset which consists of 24h values (1 zenith angle value every hour) over ~31 days for 12 months. It looks like this:
JulianDay Azimuth Zenith Date (YYMMDD HH:MM:SS)
2455928 174.14066 70.04650 2012-01-01 13:00:00
2455928 188.80626 70.30747 2012-01-01 14:00:00
2455928 203.03458 73.12297 2012-01-01 15:00:00
2455928 216.28061 78.20131 2012-01-01 16:00:00
2455928 228.35929 85.10759 2012-01-01 17:00:00
....
2456293 146.33844 77.03456 2012-12-31 11:00:00
2456293 159.80472 72.38003 2012-12-31 12:00:00
Is there a function that can extract the maximum and minimum solar zenith angle from each day (i.e., 365 outputs)?
回答1:
You can do a summary grouped by day, here is one way, suppose your data frame is called df
:
library(data.table)
setDT(df)[, .(maxZenith = max(Zenith), minZenith = min(Zenith)), .(JulianDay)]
If you want to use the Date
column instead of JulianDay
, do something like:
setDT(df)[, .(maxZenith = max(Zenith), minZenith = min(Zenith)), .(as.Date(Date))]
Assuming you renamed your Date (YYMMDD HH:MM:SS)
as Date
. Just FYI, even though allowed, don't consider it as a good practice to contain space in the column name.
回答2:
In base R
:
my.data <- read.table(text = '
JulianDay Azimuth Zenith Date.YYMMDD Date.HHMMSS
2455928 174.14066 70.04650 2012-01-01 13:00:00
2455928 188.80626 70.30747 2012-01-01 14:00:00
2455928 203.03458 73.12297 2012-01-01 15:00:00
2455928 216.28061 78.20131 2012-01-01 16:00:00
2455928 228.35929 85.10759 2012-01-01 17:00:00
2455929 160.00000 70.04650 2012-01-02 13:00:00
2455929 188.80626 70.30747 2012-01-02 14:00:00
2455929 203.03458 73.12297 2012-01-02 15:00:00
2455929 216.28061 78.20131 2012-01-02 16:00:00
2455929 228.35929 85.10759 2012-01-02 17:00:00
', header = TRUE)
with(my.data, aggregate(Azimuth ~ JulianDay, FUN = function(x) c(Min = min(x), Max = max(x))))
One problem with aggregate
is that the output is not is a form that is easy to use. It requires a bit of post processing:
my.min.max <- with(my.data, aggregate(my.data$Azimuth, by = list(my.data$JulianDay),
FUN = function(x) c(MIN = min(x), MAX = max(x)) ))
# to convert output of aggregate into a data frame:
my.min.max2 <- do.call(data.frame, my.min.max)
# combine output from aggregate with original data set
colnames(my.min.max2) <- c('JulianDay', 'my.min', 'my.max')
my.data2 <- merge(my.data, my.min.max2, by = 'JulianDay')
my.data2
# JulianDay Azimuth Zenith Date.YYMMDD Date.HHMMSS my.min my.max
#1 2455928 174.1407 70.04650 2012-01-01 13:00:00 174.1407 228.3593
#2 2455928 188.8063 70.30747 2012-01-01 14:00:00 174.1407 228.3593
#3 2455928 203.0346 73.12297 2012-01-01 15:00:00 174.1407 228.3593
#4 2455928 216.2806 78.20131 2012-01-01 16:00:00 174.1407 228.3593
#5 2455928 228.3593 85.10759 2012-01-01 17:00:00 174.1407 228.3593
#6 2455929 160.0000 70.04650 2012-01-02 13:00:00 160.0000 228.3593
#7 2455929 188.8063 70.30747 2012-01-02 14:00:00 160.0000 228.3593
#8 2455929 203.0346 73.12297 2012-01-02 15:00:00 160.0000 228.3593
#9 2455929 216.2806 78.20131 2012-01-02 16:00:00 160.0000 228.3593
#10 2455929 228.3593 85.10759 2012-01-02 17:00:00 160.0000 228.3593
You can use by
also, but the output from by
also requires a bit of post-processing:
by.min.max <- as.data.frame(do.call("rbind", by(my.data$Azimuth, my.data$JulianDay,
FUN = function(x) c(Min = min(x), Max = max(x)))))
by.min.max <- cbind(JulianDay = rownames(by.min.max), by.min.max)
my.data2 <- merge(my.data, by.min.max, by = 'JulianDay')
my.data2
You can also use tapply
:
my.data$Date_Time <- as.POSIXct(paste(my.data$Date.YYMMDD, my.data$Date.HHMMSS),
format = "%Y-%d-%m %H:%M:%S")
ty.min.max <- as.data.frame(do.call("rbind", tapply(my.data$Azimuth, my.data$JulianDay,
FUN = function(x) c(Min = min(x), Max = max(x)))))
ty.min.max <- cbind(JulianDay = rownames(ty.min.max), ty.min.max)
my.data2 <- merge(my.data, ty.min.max, by = 'JulianDay')
my.data2
You can also use a combination of split
and sapply
:
sy.min.max <- t(sapply(split(my.data$Azimuth, my.data$JulianDay),
function(x) c(Min = min(x), Max = max(x)) ))
sy.min.max <- data.frame(JulianDay = rownames(sy.min.max), sy.min.max,
stringsAsFactors = FALSE)
my.data2 <- merge(my.data, sy.min.max, by = 'JulianDay')
my.data2
You can also use a combination of split
and lapply
:
ly.min.max <- lapply(split(my.data$Azimuth, my.data$JulianDay),
function(x) c(Min = min(x), Max = max(x)))
ly.min.max <- as.data.frame(do.call("rbind", ly.min.max))
ly.min.max <- cbind(JulianDay = rownames(ly.min.max), ly.min.max)
my.data2 <- merge(my.data, ly.min.max, by = 'JulianDay')
my.data2
You can also use ave
, although I have not figured out how to use two functions in one ave
statement:
my.min <- ave(my.data$Azimuth, my.data$JulianDay, FUN = min)
my.max <- ave(my.data$Azimuth, my.data$JulianDay, FUN = max)
my.data2 <- data.frame(my.data, my.min, my.max)
my.data2
回答3:
With dplyr
library(dplyr)
df %>%
group_by(JulianDay) %>% #if you need `Date` class, use `as.Date(JulianDay)`
summarise(MaxZenith = max(Zenith), minZenith = min(Zenith))
where 'JulianDay' is the renamed column name for (YYMMDD HH:MM:SS)
来源:https://stackoverflow.com/questions/37516377/daily-minimum-values-in-r