DRF: group by DateTimeField

女生的网名这么多〃 提交于 2020-10-26 14:24:57

需求

  • 实现queryset 针对年/月/日/小时,四种时间粒度统计数量.
  • es 中天然对时间分桶,django只能自己实现.
  • postgre

前端效果图

场景1.统计微信访问趋势

  • queryset: 数据的queryset
  • start_date: 开始日期
  • end_date: 结束日期
  • filter_type: 时间粒度 data_statistical.py
def get_visit_trend(start_date=None, end_date=None, filter_type=None):
    """
    获取小程序一段时间内的访问趋势

    :param start_date: 开始日期, 2019-10-1
    :param end_date:  结束日期
    :param filter_type: 时间粒度
    :return:
    """
    filter_type = DateRangeChoice.CHOICES_MAP_STR.get(filter_type, 'day')
    if filter_type not in ['day', 'month']:
        return None
    queryset = VisitTrend.objects.all()
    if start_date == end_date:
        queryset = queryset.filter(ref_date=start_date)
    else:
        queryset = queryset.filter(ref_date__gte=start_date,
                                   ref_date__lte=end_date)
    # 1.按天查询不需要聚合.需要分页
    if filter_type == 'day':
        values_key = ['ref_date', 'session_cnt', 'visit_uv', 'visit_pv',
                      'visit_uv_new', 'stay_time_uv', 'stay_time_session',
                      'visit_depth']
        data_list = list(queryset.values(*values_key))
        data_list = to_map_by_group_by_data_mul_filed(
            data_list=data_list, datetime_key='ref_date',
            datetime_fmt='%Y-%m-%d')
        return data_list
    # 2.按月查询, 对每一个字段聚合
    annotate_map = dict(
        session_cnt=Sum('session_cnt'),
        visit_pv=Sum('visit_pv'),
        visit_uv=Sum('visit_uv'),
        visit_uv_new=Sum('visit_uv_new'),
        stay_time_uv=Sum('stay_time_uv'),
        stay_time_session=Sum('stay_time_session'),
        visit_depth=Sum('visit_depth')
    )
    return _get_group_by_data_mul_filed(
        queryset=queryset, filter_type=filter_type, time_filed='ref_date',
        annotate_map=annotate_map)

场景2. 获取xxx的预约数量

def get_xxxmodelxxx_reservation_count(
		queryset=None,
        start_date=None, end_date=None, filter_type=None):
    """
    获取一段时间内的预约数量

    :param start_date: 开始日期, 2019-10-1
    :param end_date:  结束日期
    :param filter_type: 时间粒度, 日/月
    :return:
    """
    queryset = queryset # 由调用方提供
    if start_date == end_date:
        queryset = queryset.filter(create_time__date=start_date)
    else:
        queryset = queryset.filter(create_time__date__gte=start_date,
                                   create_time__date__lte=end_date)
    filter_type = DateRangeChoice.CHOICES_MAP_STR.get(filter_type, 'day')
    count_key = 'count'
    count_val = Count('id')
    return _get_group_by_data(
        queryset=queryset, filter_type=filter_type, time_filed='create_time',
        count_key=count_key, count_val=count_val)

group_by 模块的实现

  • 基于annotate加Trunxxx时间函数实现
  • 调用方友好 (data_statistical.py数据统计模块)

group_by.py 的实现

#!/usr/bin/env python
"""
Author: hyh
Email: hyhlinux@163.com

desc:
    queryset group by year/month/day/hour
    group by 年/月/日/小时
"""
import logging
from collections import OrderedDict
from django.db.models import Count
from django.db.models.functions import (
    TruncHour, TruncMonth, TruncDay,
    TruncYear
)

logger = logging.getLogger('debug')


def group_by_month_mul_filed(
        queryset=None, time_filed='create_time', annotate_map=None):
    """
    获取queryset 按月分组后的数据. 多个聚合字段

    :param queryset: 时间范围过滤后的queryset
    :param time_filed: DatetimeFiled
    :param annotate_map: 聚合别名
    >>> annotate_map = dict(
        session_cnt=Sum('session_cnt'),
        visit_pv=Sum('visit_pv'),
        visit_uv_new=Sum('visit_uv_new'),
        stay_time_uv=Sum('stay_time_uv'),
        stay_time_session=Sum('stay_time_session'),
        visit_depth=Sum('visit_depth')
    )
    >>> values = ['month']
    >>> values.extend(annotate_map.keys())
    >>> values
        ['month', 'visit_depth', 'stay_time_uv', 'stay_time_session',
        'visit_pv', 'visit_uv_new', 'session_cnt']
    >>>
    :return: queryset
    """
    # 最终求值的字段
    values = ['month']
    values.extend(annotate_map.keys())

    data_list = queryset.annotate(month=TruncMonth(time_filed)) \
        .values('month') \
        .annotate(**annotate_map) \
        .values(*values).order_by('month')
    return data_list


def group_by_year(queryset=None, time_filed='create_time',
                  count_key='count', count_val=None):
    """
    获取queryset 按月分组后的数据
    :param queryset: 时间范围过滤后的queryset
    :param time_filed: DatetimeFiled
    :param count_key: 聚合别名, eg:count
    :param count_val: 聚合方法: Count('id')
    :return: queryset
    """
    if not count_val:
        count_val = Count('id')
    annotate_map = {
        count_key: count_val
    }
    data_list = queryset.annotate(year=TruncYear(time_filed)) \
        .values('year') \
        .annotate(**annotate_map) \
        .values('year', count_key).order_by('year')
    return data_list


def group_by_month(queryset=None, time_filed='create_time',
                   count_key='count', count_val=None):
    """
    获取queryset 按月分组后的数据
    :param queryset: 时间范围过滤后的queryset
    :param time_filed: DatetimeFiled
    :param count_key: 聚合别名, eg:count
    :param count_val: 聚合方法: Count('id')
    :return: queryset
    """
    if not count_val:
        count_val = Count('id')
    annotate_map = {
        count_key: count_val
    }
    data_list = queryset.annotate(month=TruncMonth(time_filed)) \
        .values('month') \
        .annotate(**annotate_map) \
        .values('month', count_key).order_by('month')
    return data_list


def group_by_day(queryset=None, time_filed='create_time',
                 count_key='count', count_val=None):
    """
    获取queryset 按月分组后的数据

    按天分组,取天和统计数量,按天排序
    :param queryset: 时间范围过滤后的queryset
    :param time_filed: DatetimeFiled
    :param count_key: 聚合别名, eg:count
    :param count_val: 聚合方法: Count('id')
    :return: queryset
    """
    if not count_val:
        count_val = Count('id')
    annotate_map = {
        count_key: count_val
    }
    data_list = queryset.annotate(day=TruncDay(time_filed)) \
        .values('day') \
        .annotate(**annotate_map) \
        .values('day', count_key).order_by('day')
    return data_list


def group_by_hour(queryset=None, time_filed='create_time',
                  count_key='count', count_val=None):
    """
    获取queryset 按月分组后的数据
    :param queryset: 时间范围过滤后的queryset
    :param time_filed: DatetimeFiled
    :param count_key: 聚合别名, eg:count
    :param count_val: 聚合方法: Count('id')
    :return: queryset
    """
    if not count_val:
        count_val = Count('id')
    annotate_map = {
        count_key: count_val
    }
    data_list = queryset.annotate(hour=TruncHour(time_filed)) \
        .values('hour') \
        .annotate(**annotate_map) \
        .values('hour', count_key).order_by('hour')
    return data_list


def to_map_by_group_by_data(data_list=None, count_key='count',
                            datetime_key='month', datetime_fmt='%Y-%m'):
    """

    :param data_list: group_by_month返回的数据, 元素必须是字典
    :param count_key: 分组中Count聚合函数对应的名字
    :param datetime_key: group by name
    :param datetime_fmt: 格式化日期
    :return: 日期为key, 数量为v的map
    """
    data_map = OrderedDict()
    for item_map in list(data_list):
        if not isinstance(item_map, dict):
            raise TypeError('item_map({}) is not dict'.format(type(item_map)))
        count = item_map.get(count_key)
        datetime_value = item_map.get(datetime_key)
        datetime_str = datetime_value.strftime(datetime_fmt)
        data_map[datetime_str] = count
    return data_map


def to_map_by_group_by_data_mul_filed(
        data_list=None, datetime_key='month', datetime_fmt='%Y-%m'):
    """

    :param data_list: group_by_month返回的数据
    :param datetime_key: group by name
    :param datetime_fmt: 格式化日期
    :return: 日期为key, 数量为v的map
    """
    data_map = OrderedDict()
    for item_map in list(data_list):
        if not isinstance(item_map, dict):
            raise TypeError('item_map({}) is not dict'.format(type(item_map)))
        datetime_value = item_map.pop(datetime_key)
        datetime_str = datetime_value.strftime(datetime_fmt)
        data_map[datetime_str] = item_map
    return data_map


GROUP_BY_FUNC_MAP = {
        'hour': (group_by_hour, 'hour', '%Y-%m-%d %H:%M:%S'),
        'day': (group_by_day, 'day', '%Y-%m-%d'),
        'month': (group_by_month, 'month', '%Y-%m'),
        'year': (group_by_year, 'year', '%Y'),
}

GROUP_BY_FUNC_MAP_MUL_FILED = {
    # 当前visit_trend最小粒度就是天,不需要分组,天的数据直接取
    'month': (group_by_month_mul_filed, 'month', '%Y-%m'),
}


def _get_group_by_data(filter_type='', queryset=None, time_filed='create_time',
                       count_key='count', count_val=None):
    """
    标准类型统计

    :param filter_type:
    :param queryset:
    :param time_filed:
    :param count_key:
    :param count_val:
    :return:
    """
    if not filter_type:
        return None
    if filter_type not in GROUP_BY_FUNC_MAP:
        raise KeyError('filter_type:{} must in {}'.format(
            filter_type, GROUP_BY_FUNC_MAP.keys()))
    group_by_func, datetime_key, datetime_fmt = GROUP_BY_FUNC_MAP[filter_type]
    data_list = group_by_func(
        queryset=queryset, time_filed=time_filed,
        count_key=count_key, count_val=count_val)
    return to_map_by_group_by_data(
        data_list, count_key=count_key,
        datetime_key=datetime_key,
        datetime_fmt=datetime_fmt)


def _get_group_by_data_mul_filed(
        filter_type='month', queryset=None, time_filed='create_time',
        annotate_map=None):
    """
    标准类型统计

    :param filter_type:
    :param queryset:
    :param time_filed:
    :param annotate_map:
    :return:
    """
    if not filter_type:
        return None
    if filter_type not in GROUP_BY_FUNC_MAP_MUL_FILED:
        raise KeyError('filter_type:{} must in {}'.format(
            filter_type, GROUP_BY_FUNC_MAP_MUL_FILED.keys()))
    group_by_func, datetime_key, datetime_fmt = \
        GROUP_BY_FUNC_MAP_MUL_FILED[filter_type]
    data_list = group_by_func(
        queryset=queryset, time_filed=time_filed,
        annotate_map=annotate_map,
        )
    return to_map_by_group_by_data_mul_filed(
        data_list,
        datetime_key=datetime_key,
        datetime_fmt=datetime_fmt
    )

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!