Using boost multi index like relational DB

不羁的心 提交于 2019-12-18 07:16:17

问题


Here is the situation that I am trying to simulate:

  COL1                 Col2     Col3
CBT.151.5.T.FEED       S1       t1
CBT.151.5.T.FEED       s2       t2
CBT.151.5.T.FEED       s3       t3
CBT.151.5.T.FEED       s4       t4
CBT.151.5.T.FEED       s5       t1

CBT.151.8.T.FEED       s7       t1
CBT.151.5.Q.FEED       s8       t3

COL1 - is the ID, for a given ID there can be several symbols.
COL2 - symbols, they are unique
COL3 - update time of a symbol, two different symbols might update at the same time hence they are not unique.

My aim is to get the tickers which are most active, lets say symbols that have updated in the last 60 seconds. For this purpose I have used the boost multi index.

The Header file:

#ifndef __TICKER_INFO_MANAGER_IMPL__
#define __TICKER_INFO_MANAGER_IMPL__

#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <TickerInfoManagerConstants.h>
#include <TickerInfo.h>

namespace bmi = boost::multi_index;
namespace bip = boost::interprocess;

struct id_index{};
struct symbol_index{};
struct last_update_time_index{};

struct Less {
  template<class T, class U>
    bool operator()(T const& t, U const& u) const {
      return t < u;
    }
};


typedef bmi::multi_index_container<
tickerUpdateInfoT,
  bmi::indexed_by<
  bmi::ordered_unique
  <bmi::tag<id_index>,  BOOST_MULTI_INDEX_MEMBER( tickerUpdateInfo, shm_string, m_id), Less>,
  bmi::ordered_unique<
  bmi::tag<symbol_index>,BOOST_MULTI_INDEX_MEMBER(tickerUpdateInfo, shm_string, m_symbol), Less>,
  bmi::ordered_non_unique
  <bmi::tag<last_update_time_index>, BOOST_MULTI_INDEX_MEMBER(tickerUpdateInfo, int, m_last_update_time), Less> >,
  bip::managed_shared_memory::allocator<tickerUpdateInfo>::type
  > ticker_update_info_set;

  class tickerInfoMangerImplementation {

    public:
      tickerInfoMangerImplementation( const sharedMemoryNameT & name );

      bool put_records( const tickerUpdateInfoT & record );

      int get_active_ticker_count( const thresholdT seconds );

      void print_contents();

      bip::managed_shared_memory& get_managed_memory_segment() {
        return m_managed_memory_segment;
      }

    private:
      const sharedMemoryNameT    m_name;
      bip::managed_shared_memory m_managed_memory_segment;
      ticker_update_info_set     *p_ticker_info_set;
  };
#endif

The cpp file

#include <TickerInfoMangerImplementation.h>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <iostream>
#include "basic_time.h"

using namespace boost::interprocess;

tickerInfoMangerImplementation::tickerInfoMangerImplementation( const sharedMemoryNameT & name ): m_name(name),
  m_managed_memory_segment( open_or_create, "test", 65536 )
{

  p_ticker_info_set = m_managed_memory_segment.find_or_construct<ticker_update_info_set>
    ("SetOfTickerUpdateInformation")            //Container's name in shared memory
    ( ticker_update_info_set::ctor_args_list()
      , m_managed_memory_segment.get_allocator<tickerUpdateInfoT>());  //Ctor parameters
}

bool tickerInfoMangerImplementation::put_records( const tickerUpdateInfoT & record ) {

  std::pair<ticker_update_info_set::iterator, bool> result_pair = p_ticker_info_set->insert( record );
  if( result_pair.second ) {
    return result_pair.second;
  }

  typedef ticker_update_info_set::index<symbol_index>::type ticker_update_info_set_by_symbol;

  ticker_update_info_set_by_symbol & sym_index = (*p_ticker_info_set).get<symbol_index>();
  ticker_update_info_set_by_symbol::iterator it = sym_index.find( record.m_symbol );
  tickerUpdateInfoT ticker_info = *it;
  ticker_info.m_last_update_time = record.m_last_update_time;
  return sym_index.replace( it, ticker_info );
}

int tickerInfoMangerImplementation::calculate_historical_time_using_threshold( const thresholdT seconds ) {

  basic_time::Secs_t seconds( threshold );
  basic_time tick_time;
  tick_time -= seconds;
  return ( tick_time.fullTime() );
}

int tickerInfoMangerImplementation::get_active_ticker_count( const thresholdT seconds, std::string key ) {

  typedef ticker_update_info_set::index<id_index>::type ticker_update_info_set_by_id;
  ticker_update_info_set_by_id & id_index = (*p_ticker_info_set).get<id_index>();
  int tick_time = calculate_historical_time_using_threshold( seconds );
  //Here I would like to find the key
  //Based on that key I would like to fetch all the symbols which have updated after a certain time(using lower bound)
  std::copy( it, time_index.end(), std::ostream_iterator<tickerUpdateInfoT>(std::cout) );
}


void tickerInfoMangerImplementation::print_contents() {
  const ticker_update_info_set::nth_index<1>::type& name_index = (*p_ticker_info_set).get<1>();
  std::copy( name_index.begin(), name_index.end(), std::ostream_iterator<tickerUpdateInfoT>(std::cout) );
}

std::ostream& operator<<(std::ostream& os, const tickerUpdateInfoT & obj) {
  os << obj.m_id << " ";
  os << obj.m_symbol << " ";
  os << obj.m_last_update_time << " " << "\n";
  return os;
};

Structure of a record that I would insert into boost multi index

#ifndef __TICKER_INFO__
#define __TICKER_INFO__

#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/string.hpp>

typedef boost::interprocess::managed_shared_memory::allocator<char>::type               char_allocator;
typedef boost::interprocess::basic_string<char, std::char_traits<char>, char_allocator> shm_string;

//Data to insert in shared memory
typedef struct tickerUpdateInfo {

  shm_string  m_id;
  shm_string  m_symbol;
  int         m_last_update_time;

  tickerUpdateInfo( const char * id,
      const char *symbol,
      int last_update_time,
      const char_allocator &a)
    : m_id( id, a), m_symbol( symbol, a), m_last_update_time( last_update_time) {
    }

  tickerUpdateInfo& operator=(const tickerUpdateInfo& other) {
   if (this != &other) {
       m_last_update_time = other.m_last_update_time;
      }
    return *this;
  }
} tickerUpdateInfoT;

#endif

Now in the function get_active_ticker_count() I want to specify the key like CBT.151.5.T.FEED and it should return:

   S1       t1
   s2       t2
   s3       t3
   s4       t4
   s5       t1

Lets assume t1 > t2 > t3 > t4, then I would like to find out such sets where times are greater than t3 and also want to find the count of such symbols. How do I proceed with the same, I have been able to insert but I am stuck with the retrieval part. Please help!


回答1:


I've simplified your (ridiculously complicated¹) model to:

enum TimePoints { // Lets assume t1 > t2 > t3 > t4
    t1 = 100,
    t2 = 80,
    t3 = 70,
    t4 = 20,
};

using IdType = std::string;
using Symbol = std::string;
using TimeT  = unsigned int;

struct tickerUpdateInfo {
    IdType m_id;
    Symbol m_symbol;
    TimeT  m_last_update_time;

    friend std::ostream& operator<<(std::ostream& os, tickerUpdateInfo const& tui) {
        return os << "T[" << tui.m_id << ",\t" << tui.m_symbol << ",\t" << tui.m_last_update_time << "]";
    }
} static const data[] = {
    { "CBT.151.5.T.FEED", "S1", t1 },
    { "CBT.151.5.T.FEED", "s2", t2 },
    { "CBT.151.5.T.FEED", "s3", t3 },
    { "CBT.151.5.T.FEED", "s4", t4 },
    { "CBT.151.5.T.FEED", "s5", t1 },
    { "CBT.151.8.T.FEED", "s7", t1 },
    { "CBT.151.5.Q.FEED", "s8", t3 },
};

There. We can work with that. You want an index that's primarily time based, yet you can refine for symbol/id later:

typedef bmi::multi_index_container<tickerUpdateInfo,
    bmi::indexed_by<
        bmi::ordered_non_unique<bmi::tag<struct most_active_index>,
            bmi::composite_key<tickerUpdateInfo,
                BOOST_MULTI_INDEX_MEMBER(tickerUpdateInfo, TimeT,  m_last_update_time),
                BOOST_MULTI_INDEX_MEMBER(tickerUpdateInfo, Symbol, m_symbol),
                BOOST_MULTI_INDEX_MEMBER(tickerUpdateInfo, IdType, m_id)
        > > >
    > ticker_update_info_set;

For our implementation, we don't even need to use the secondary key components, we can just write

std::map<Symbol, size_t> activity_histo(ticker_update_info_set const& tuis, TimeT since)
{
    std::map<Symbol, size_t> histo;
    auto const& index = tuis.get<most_active_index>();

    auto lb = index.upper_bound(since); // for greater-than-inclusive use lower_bound
    for (auto& rec : boost::make_iterator_range(lb, index.end()))
        histo[rec.m_symbol]++;

    return histo;
}

See it Live On Coliru.

Now if volumes get large, you could be tempted to optimize a bit using the secondary index component:

std::map<Symbol, size_t> activity_histo_ex(ticker_update_info_set const& tuis, TimeT since)
{
    std::map<Symbol, size_t> histo;
    auto const& index = tuis.get<most_active_index>();

    for (auto lb = index.upper_bound(since), end = tuis.end(); lb != end;) // for greater-than-inclusive use lower_bound
    {
        auto ub = index.upper_bound(boost::make_tuple(lb->m_last_update_time, lb->m_symbol));
        histo[lb->m_symbol] += std::distance(lb, ub);

        lb = ub;
    }

    return histo;
}

I'm not sure this would become the quicker approach (your profiler would know). See it Live On Coliru too.

Rethink the design?

TBH this whole multi index thing is likely to slow you down due to suboptimal insertion times and lack of locality-of-reference when iterating records.

I'd suggest looking at

  • a single flat_multimap ordered by update-time
  • or even a (fixed size) linear ring-buffer order by time. This would make a lot of sense since you are most likely receiving the events in increasing time order anyways, so you can just keep appending at the end (and wrap around when the history window is full). This all at once removes all need for reallocation (given that you choose an appropriate maximum capacity for the ringbuffer) as well as give you optimal cache prefetch performance traversing the list for stats.

The second approach should really get some merit once you implement the ringbuffer using Boost Lockfree's spsc_queue offering. Why? Because you can host it in shared memory:

Shared-memory IPC synchronization (lock-free)


¹ the complexity would be warranted iff your code would have been selfcontained. Sadly, it was not (at all). I had to prune it in order to get something to work. This was, obviously, after removing all line numbers :)



来源:https://stackoverflow.com/questions/26474577/using-boost-multi-index-like-relational-db

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!