Polymorphism and derived classes in CUDA / CUDA Thrust

后端 未结 2 2010
北恋
北恋 2020-12-03 13:02

This is my first question on Stack Overflow, and it\'s quite a long question. The tl;dr version is: How do I work with a thrust::device_vector

2条回答
  •  温柔的废话
    2020-12-03 13:46

    I completely agree with @talonmies answer. (e.g. I don't know that thrust has been extensively tested with polymorphism.) Furthermore, I have not fully parsed your code. I post this answer to add additional info, in particular that I believe some level of polymorphism can be made to work with thrust.

    A key observation I would make is that it is not allowed to pass as an argument to a __global__ function an object of a class with virtual functions. This means that polymorphic objects created on the host cannot be passed to the device (via thrust, or in ordinary CUDA C++). (One basis for this limitation is the requirement for virtual function tables in the objects, which will necessarily be different between host and device, coupled with the fact that it is illegal to directly take the address of a device function in host code).

    However, polymorphism can work in device code, including thrust device functions.

    The following example demonstrates this idea, restricting ourselves to objects created on the device although we can certainly initialize them with host data. I have created two classes, Triangle and Rectangle, derived from a base class Polygon which includes a virtual function area. Triangle and Rectangle inherit the function set_values from the base class but replace the virtual area function.

    We can then manipulate objects of those classes polymorphically as demonstrated here:

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #define N 4
    
    
    class Polygon {
      protected:
        int width, height;
      public:
      __host__ __device__  void set_values (int a, int b)
          { width=a; height=b; }
      __host__ __device__  virtual int area ()
          { return 0; }
    };
    
    class Rectangle: public Polygon {
      public:
      __host__ __device__  int area ()
          { return width * height; }
    };
    
    class Triangle: public Polygon {
      public:
      __host__ __device__   int area ()
          { return (width * height / 2); }
    };
    
    
    struct init_f {
      template 
      __host__ __device__ void operator()(const Tuple &arg) {
        (thrust::get<0>(arg)).set_values(thrust::get<1>(arg), thrust::get<2>(arg));}
    };
    
    struct setup_f {
      template 
      __host__ __device__ void operator()(const Tuple &arg) {
        if (thrust::get<0>(arg) == 0)
          thrust::get<1>(arg) = &(thrust::get<2>(arg));
        else
          thrust::get<1>(arg) = &(thrust::get<3>(arg));}
    };
    
    struct area_f {
      template 
      __host__ __device__ void operator()(const Tuple &arg) {
        thrust::get<1>(arg) = (thrust::get<0>(arg))->area();}
    };
    
    
    int main () {
    
      thrust::device_vector  widths(N);
      thrust::device_vector heights(N);
      thrust::sequence( widths.begin(),  widths.end(), 2);
      thrust::sequence(heights.begin(), heights.end(), 3);
      thrust::device_vector rects(N);
      thrust::device_vector  trgls(N);
      thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(rects.begin(), widths.begin(), heights.begin())), thrust::make_zip_iterator(thrust::make_tuple(rects.end(), widths.end(), heights.end())), init_f());
      thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(trgls.begin(), widths.begin(), heights.begin())), thrust::make_zip_iterator(thrust::make_tuple(trgls.end(), widths.end(), heights.end())), init_f());
      thrust::device_vector polys(N);
      thrust::device_vector selector(N);
      for (int i = 0; i areas(N);
      thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(polys.begin(), areas.begin())), thrust::make_zip_iterator(thrust::make_tuple(polys.end(), areas.end())), area_f());
      thrust::copy(areas.begin(), areas.end(), std::ostream_iterator(std::cout, "\n"));
      return 0;
    }
    

    I suggest compiling the above code for a cc2.0 or newer architecture. I tested with CUDA 6 on RHEL 5.5.

    (The polymorphic example idea, and some of the code, was taken from here.)

提交回复
热议问题