I need a access statistics module for appengine that tracks a few request-handlers and collects statistics to bigtable. I have not found any ready made solution on github an
Here is the code for the implementation of the task-queue approach with hourly timeframe. Interestingly it works without transactions and other mutex magic. (For readability the python indent of methods is wrong.)
Supporting priorities for memcache would increase accuracy of this solution.
TASK_URL = '/h/statistics/collect/' # Example: '/h/statistics/collect/{counter-id}"?groupId=" + groupId + "&countableId=" + countableId'
MEMCACHE_PREFIX = "StatisticsDB_"
class StatisticsDB(ndb.Model):
"""
Memcached counting saved each hour to DB.
"""
# key.id() = 2016-01-31-17_groupId_countableId
countableId = ndb.StringProperty(required=True) # unique name of counter within group
groupId = ndb.StringProperty() # couter group (allows single DB query for group of counters)
count = ndb.IntegerProperty(default=0) # count per timeframe
@classmethod
def increment(cls, groupId, countableId): # throws InvalidTaskNameError
"""
Increment a counter. countableId is the unique id of the countable
throws InvalidTaskNameError if ids do not match: [a-zA-Z0-9-_]{1,500}
"""
# Calculate memcache key and db_key at this time
# the counting timeframe is 1h, determined by %H, MUST MATCH ETA calculation in _add_task()
counter_key = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H") + "_" + groupId +"_"+ countableId;
client = memcache.Client()
n = client.incr(MEMCACHE_PREFIX + counter_key)
if n is None:
cls._add_task(counter_key, groupId, countableId)
client.incr(MEMCACHE_PREFIX + counter_key, initial_value=0)
@classmethod
def _add_task(cls, counter_key, groupId, countableId):
taskurl = TASK_URL + counter_key + "?groupId=" + groupId + "&countableId=" + countableId
now = datetime.datetime.now()
# the counting timeframe is 1h, determined by counter_key, MUST MATCH ETA calculation
eta = now + datetime.timedelta(minutes = (61-now.minute)) # at most 1h later, randomized over 1 minute, throttled by queue parameters
task = taskqueue.Task(url=taskurl, method='GET', name=MEMCACHE_PREFIX + counter_key, eta=eta)
queue = taskqueue.Queue(name='StatisticsDB')
try:
queue.add(task)
except taskqueue.TaskAlreadyExistsError: # may also occur if 2 increments are done simultaneously
logging.warning("StatisticsDB TaskAlreadyExistsError lost memcache for %s", counter_key)
except taskqueue.TombstonedTaskError: # task name is locked for ...
logging.warning("StatisticsDB TombstonedTaskError some bad guy ran this task premature manually %s", counter_key)
@classmethod
def save2db_task_handler(cls, counter_key, countableId, groupId):
"""
Save counter from memcache to DB. Idempotent method.
At the time this executes no more increments to this counter occur.
"""
dbkey = ndb.Key(StatisticsDB, counter_key)
n = memcache.get(MEMCACHE_PREFIX + counter_key)
if n is None:
logging.warning("StatisticsDB lost count for %s", counter_key)
return
stats = StatisticsDB(key=dbkey, count=n, countableId=countableId, groupId=groupId)
stats.put()
memcache.delete(MEMCACHE_PREFIX + counter_key) # delete if put succeeded
logging.info("StatisticsDB saved %s n = %i", counter_key, n)