I have the following table mytable
in Hive:
id radar_id car_id datetime
1 A21 123 2017-03-08 17:31:19.0
2 A21
I missed that you're using Hive
so started writing query for SQL-Server
, but maybe it will help for you. Try something like this:
QUERY
select radar_start,
radar_end,
convert(decimal(6,3), count(*)) / convert(decimal(6,3), count(distinct dt)) as avg_tripscount_per_day
from (
select
t1.radar_id as radar_start,
t2.radar_id as radar_end,
convert(date, t1.[datetime]) dt,
row_number() over (partition by t1.radar_id, t1.car_id, convert(date, t1.[datetime]) order by t1.[datetime] desc) rn1,
row_number() over (partition by t2.radar_id, t2.car_id, convert(date, t2.[datetime]) order by t2.[datetime] desc) rn2
from trips as t1
join trips as t2 on t1.car_id = t2.car_id
and datediff(minute,t1.[datetime], t2.[datetime]) between 0 and 30
and t1.radar_id = 'A21'
and t2.radar_id = 'B15'
)x
where rn1 = 1 and rn2 = 1
group by radar_start, radar_end
OUPUT
radar_start radar_end avg_tripscount_per_day
A21 B15 1.5000000000
SAMPLE DATA
create table trips
(
id int,
radar_id char(3),
car_id int,
[datetime] datetime
)
insert into trips values
(1,'A21',123,'2017-03-08 17:31:19.0'),
(2,'A21',555,'2017-03-08 17:32:00.0'),
(3,'A21',777,'2017-03-08 17:33:00.0'),
(4,'B15',123,'2017-03-08 17:35:22.0'),
(5,'B15',555,'2017-03-08 17:34:05.0'),
(5,'B15',777,'2017-03-08 20:50:12.0'),
(6,'A21',123,'2017-03-09 11:00:00.0'),
(7,'C11',123,'2017-03-09 11:10:00.0'),
(8,'A21',123,'2017-03-09 11:12:00.0'),
(9,'A21',555,'2017-03-09 11:12:10.0'),
(8,'B15',123,'2017-03-09 11:14:00.0'),
(9,'C11',555,'2017-03-09 11:20:00.0')