Painfully slow Azure table insert and delete batch operations

前端未结

关注

 4  1597

隐瞒了意图╮ 2021-01-30 05:12

I am running into a huge performance bottleneck when using Azure table storage. My desire is to use tables as a sort of cache, so a long process may result in anywhere from hund

4条回答

甜味超标 (楼主)

2021-01-30 05:50

basic concept - use paralleism to speed this up.

step 1 - give your threadpool enough threads to pull this off - ThreadPool.SetMinThreads(1024, 256);

step 2 - use partitions. I use guids as Ids, i use the last to characters to split into 256 unique partitons (actually I group those into N subsets in my case 48 partitions)

step 3 - insert using tasks, i use object pooling for table refs

public List InsertOrUpdate(List items)
        {
            var subLists = SplitIntoPartitionedSublists(items);

            var tasks = new List();

            foreach (var subList in subLists)
            {
                List list = subList;
                var task = Task.Factory.StartNew(() =>
                    {
                        var batchOp = new TableBatchOperation();
                        var tableRef = GetTableRef();

                        foreach (var item in list)
                        {
                            batchOp.Add(TableOperation.InsertOrReplace(item));
                        }

                        tableRef.ExecuteBatch(batchOp);
                        ReleaseTableRef(tableRef);
                    });
                tasks.Add(task);
            }

            Task.WaitAll(tasks.ToArray());

            return items;
        }

private IEnumerable> SplitIntoPartitionedSublists(IEnumerable items)
        {
            var itemsByPartion = new Dictionary>();

            //split items into partitions
            foreach (var item in items)
            {
                var partition = GetPartition(item);
                if (itemsByPartion.ContainsKey(partition) == false)
                {
                    itemsByPartion[partition] = new List();
                }
                item.PartitionKey = partition;
                item.ETag = "*";
                itemsByPartion[partition].Add(item);
            }

            //split into subsets
            var subLists = new List>();
            foreach (var partition in itemsByPartion.Keys)
            {
                var partitionItems = itemsByPartion[partition];
                for (int i = 0; i < partitionItems.Count; i += MaxBatch)
                {
                    subLists.Add(partitionItems.Skip(i).Take(MaxBatch).ToList());
                }
            }

            return subLists;
        }

        private void BuildPartitionIndentifiers(int partitonCount)
        {
            var chars = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }.ToList();
            var keys = new List();

            for (int i = 0; i < chars.Count; i++)
            {
                var keyA = chars[i];
                for (int j = 0; j < chars.Count; j++)
                {
                    var keyB = chars[j];
                    keys.Add(string.Concat(keyA, keyB));
                }
            }


            var keySetMaxSize = Math.Max(1, (int)Math.Floor((double)keys.Count / ((double)partitonCount)));
            var keySets = new List>();

            if (partitonCount > keys.Count)
            {
                partitonCount = keys.Count;
            }

            //Build the key sets
            var index = 0;
            while (index < keys.Count)
            {
                var keysSet = keys.Skip(index).Take(keySetMaxSize).ToList();
                keySets.Add(keysSet);
                index += keySetMaxSize;
            }

            //build the lookups and datatable for each key set
            _partitions = new List();
            for (int i = 0; i < keySets.Count; i++)
            {
                var partitionName = String.Concat("subSet_", i);
                foreach (var key in keySets[i])
                {
                    _partitionByKey[key] = partitionName;
                }
                _partitions.Add(partitionName);
            }

        }

        private string GetPartition(T item)
        {
            var partKey = item.Id.ToString().Substring(34,2);
            return _partitionByKey[partKey];
        }

        private string GetPartition(Guid id)
        {
            var partKey = id.ToString().Substring(34, 2);
            return _partitionByKey[partKey];
        }

        private CloudTable GetTableRef()
        {
            CloudTable tableRef = null;
            //try to pop a table ref out of the stack
            var foundTableRefInStack = _tableRefs.TryPop(out tableRef);
            if (foundTableRefInStack == false)
            {
                //no table ref available must create a new one
                var client = _account.CreateCloudTableClient();
                client.RetryPolicy = new ExponentialRetry(TimeSpan.FromSeconds(1), 4);
                tableRef = client.GetTableReference(_sTableName);
            }

            //ensure table is created
            if (_bTableCreated != true)
            {
                tableRef.CreateIfNotExists();
                _bTableCreated = true;
            }

            return tableRef;
        }

result - 19-22kops storage account maximum

hit me up if your interested in the full source

need moar? use multiple storage accounts!

this is from months of trial and error, testing, beating my head against a desk. I really hope it helps.

0 讨论(0)

查看其它4个回答