Designing an access/web statistics counter module for appengine

前端 未结 3 863
情书的邮戳
情书的邮戳 2021-01-15 03:04

I need a access statistics module for appengine that tracks a few request-handlers and collects statistics to bigtable. I have not found any ready made solution on github an

3条回答
  •  忘掉有多难
    2021-01-15 03:35

    Here is the code for the implementation of the task-queue approach with hourly timeframe. Interestingly it works without transactions and other mutex magic. (For readability the python indent of methods is wrong.)

    Supporting priorities for memcache would increase accuracy of this solution.

    TASK_URL = '/h/statistics/collect/' # Example: '/h/statistics/collect/{counter-id}"?groupId=" + groupId + "&countableId=" + countableId'
    MEMCACHE_PREFIX = "StatisticsDB_"
    
    class StatisticsDB(ndb.Model):
    """
    Memcached counting saved each hour to DB.
    """
        # key.id() = 2016-01-31-17_groupId_countableId
        countableId = ndb.StringProperty(required=True) # unique name of counter within group
        groupId = ndb.StringProperty() # couter group (allows single DB query for group of counters)
        count = ndb.IntegerProperty(default=0) # count per timeframe
    
    
    @classmethod
    def increment(cls, groupId, countableId):  # throws InvalidTaskNameError
        """
        Increment a counter. countableId is the unique id of the countable
        throws InvalidTaskNameError if ids do not match: [a-zA-Z0-9-_]{1,500}
        """
        # Calculate memcache key and db_key at this time
        # the counting timeframe is 1h, determined by %H, MUST MATCH ETA calculation in _add_task()
        counter_key = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H") + "_" + groupId +"_"+ countableId;
        client = memcache.Client()
    
        n = client.incr(MEMCACHE_PREFIX + counter_key)
        if n is None:
            cls._add_task(counter_key, groupId, countableId)
            client.incr(MEMCACHE_PREFIX + counter_key, initial_value=0)
    
    
    @classmethod
    def _add_task(cls, counter_key, groupId, countableId):
        taskurl = TASK_URL + counter_key + "?groupId=" + groupId + "&countableId=" + countableId
        now = datetime.datetime.now()
        # the counting timeframe is 1h, determined by counter_key, MUST MATCH ETA calculation
        eta = now + datetime.timedelta(minutes = (61-now.minute)) # at most 1h later, randomized over 1 minute, throttled by queue parameters
        task = taskqueue.Task(url=taskurl, method='GET', name=MEMCACHE_PREFIX + counter_key, eta=eta)
        queue = taskqueue.Queue(name='StatisticsDB')
        try:
            queue.add(task)
        except taskqueue.TaskAlreadyExistsError: # may also occur if 2 increments are done simultaneously
            logging.warning("StatisticsDB TaskAlreadyExistsError lost memcache for %s", counter_key)
        except taskqueue.TombstonedTaskError: # task name is locked for ...
            logging.warning("StatisticsDB TombstonedTaskError some bad guy ran this task premature manually %s", counter_key)
    
    
    @classmethod
    def save2db_task_handler(cls, counter_key, countableId, groupId):
        """
        Save counter from memcache to DB. Idempotent method.
        At the time this executes no more increments to this counter occur.
        """
        dbkey = ndb.Key(StatisticsDB, counter_key)
    
        n = memcache.get(MEMCACHE_PREFIX + counter_key)        
        if n is None:
            logging.warning("StatisticsDB lost count for %s", counter_key)
            return
    
        stats = StatisticsDB(key=dbkey, count=n, countableId=countableId, groupId=groupId)
        stats.put()
        memcache.delete(MEMCACHE_PREFIX + counter_key) # delete if put succeeded
        logging.info("StatisticsDB saved %s n = %i", counter_key, n)
    

提交回复
热议问题