Radix Sort on an Array of Strings?

后端 未结 2 1949
闹比i
闹比i 2021-01-03 14:13

I\'ve been researching around, and while I\'ve figured out the general idea of using Radix Sort to alphabetize an array of strings, I know I\'m going the wrong direction.

2条回答
  •  庸人自扰
    2021-01-03 14:45

    Very similar to iavr, but sorting in place (benchmarked against iavr's solution with g++ -O3 and takes about 2020ms compared to iavr's 1780ms), enjoying a regular interface and resuable code. The problem with Iavr's implementation is that its logic only works with containers of strings, and is not easily extensible to other types. Obviously his specialized version is more efficient, but it might be worth it to sacrifice some performance for regularity. You can find the rest of the code at radix sort implementation

    General Radix sort:

    template  
    using Iter_value = std::iterator_traits::value_type;
    
    // intermediate struct to get partial template specialization
    template
    struct rdx_impl {
        static void rdx_sort(Iter begin, Iter end, int bits) { 
            // bits is # bits to consider up to if a max val is known ahead of time
            // most efficent (theoretically) when digits are base n, having lg(n) bits
            constexpr size_t digit_bits {8};        // # bits in digit, 8 works well for 32 and 64 bit vals
    
                size_t d {0};                   // current digit #
                for (long long mask = (1 << digit_bits) - 1;
                    d * digit_bits < bits;) {// ex. 0x000000ff for setting lower 8 bits on 32 bit num
                    cnt_sort(begin, end, range, Digit_cmp(mask, digit_bits*d));
                    ++d;
                }
            }
        };
    
    // specialization of rdx_sort for strings
    struct Shorter {
        template 
        bool operator()(const Seq& a, const Seq& b) { return a.size() < b.size(); }
    };
    template     
    struct rdx_impl {    // enough to hold ASCII char range
        static void rdx_sort(Iter begin, Iter end, int) {
            // ignore additional int argument
            int len_max = std::max_element(begin, end, Shorter())->size();
            for (int d = len_max - 1; d >= 0; --d)
                cnt_sort(begin, end, 128, Digit_cmp(d));
        }
    };
    
    // generic call interface for all iterators 
    template    // use intermediate struct for partial specialization
    void rdx_sort(Iter begin, Iter end, int bits) {
        rdx_impl>::rdx_sort(begin, end, bits);
    }
    

    Counting sort to sort on each digit (in place):

    template 
    void cnt_sort(Iter begin, Iter end, size_t range, Op op) {
        using T = typename Iter::value_type;
        std::vector counts(range);   // init to 0
        for (auto i = begin; i != end; ++i) // count # elems == i
            ++counts[op(*i)]; 
        for (size_t i = 1; i < range; ++i)
            counts[i] += counts[i-1];   // turn into # elems <= i
        std::vector res(end - begin);
        for (auto j = end;;) {
            --j;
            res[--counts[op(*j)]] = *j;
            if (j == begin) break;
        }
        // ~18% of time is spent on copying
        std::copy(res.begin(), res.end(), begin);
    }
    

    Extract value of digit:

    template    // overload digit_cmp for non-integral types top provide radix sort with digits
    class Digit_cmp {   // functor for comparing a "digit" (particular bits)
        const long long mask; // 0..63 bitfield to test against
        const size_t to_shift;
    public:
        Digit_cmp(long long m, size_t ts) : mask{m}, to_shift{ts} {}
        // by default assumes integral, just shifts
        size_t operator()(T n) const {    // char assuming r = 8
            return (n >> to_shift) & mask; // shift then mask for unit digit
        }
    };
    // specialization for strings
    template <>
    class Digit_cmp {
        const size_t digit;
    public:
        Digit_cmp(size_t d) : digit{d} {}
        size_t operator()(const std::string& str) {
            // 0 indicates past the end of the string
            return str.size() > digit ? str[digit] : 0;
        }
    };
    

提交回复
热议问题