Difference between revisions of "CoE Cluster november 2011/CUDA"

From Teknologisk videncenter
Jump to: navigation, search
(Created page with "// Read image from file, remove blue channel and write onto another file #include <iostream> #include <string> #include <sstream> #include <cv.h> #include <highgui.h> using name...")
 
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
// Read image from file, remove blue channel and write onto another file
+
We shall use the following two files (one in C++ and one in CUDA C) as the basis for further development of our parallel programming skills - using Nvidia's CUDA technology for parallel processing onboard Graphics Porcessing Units (GPU).  The programs currently remove all blue tones from a selected still image.  We will develop the idea to explore performance and algorithmic issues when using CUDA and OpenCV (see literature list) in detection of skin tones in still and moving images:
 +
 +
<pre>// Read image from file, remove blue channel and write onto another file
 +
 
 +
include &lt;iostream&gt;
 +
 
 +
include &lt;string&gt;
 +
 
 +
include &lt;sstream&gt;
 +
 
 +
include &lt;cv.h&gt;
  
#include <iostream>
+
include &lt;highgui.h&gt;
#include <string>
 
#include <sstream>
 
#include <cv.h>
 
#include <highgui.h>
 
using namespace cv;
 
using namespace std;
 
#include "PixelMat.hpp"
 
  
void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step);
+
using namespace cv; using namespace std;
  
int main(int argc, char* argv[])
+
include "PixelMat.hpp"
{
 
  int blocks = 0;
 
  int threads = 0;
 
  
  if (argc > 4)
+
void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step);
    {
 
      istringstream buf1(argv[1]);
 
      buf1 >> blocks;
 
      istringstream buf2(argv[2]);
 
      buf2 >> threads;
 
      PixelMat img = (PixelMat) imread(argv[3],-1);
 
      int rows = img.rows;
 
      int cols = img.cols;
 
      int step = img.step;
 
  
      fprintf(stderr, "%d\n", step);
+
int main(int argc, char* argv[]) {
  
      startCUDA(blocks, threads, img.data, rows, cols, step);
+
int blocks = 0;
 +
int threads = 0;
  
      imwrite(argv[4], img);
+
if (argc &gt; 4)
    }
+
  {
  else
+
    istringstream buf1(argv[1]);
    cout << "Usage: " << argv[0] << " <blocks> <threads> <inputfile> <outputfile>" << endl;
+
    buf1 &gt;&gt; blocks;
 +
    istringstream buf2(argv[2]);
 +
    buf2 &gt;&gt; threads;
 +
    PixelMat img = (PixelMat) imread(argv[3],-1);
 +
    int rows = img.rows;
 +
    int cols = img.cols;
 +
    int step = img.step;
  
  return 0;
+
    fprintf(stderr, "%d\n", step);
}
 
  
 +
    startCUDA(blocks, threads, img.data, rows, cols, step);
  
 +
    imwrite(argv[4], img);
 +
  }
 +
else
 +
  cout &lt;&lt; "Usage: " &lt;&lt; argv[0] &lt;&lt; " &lt;blocks&gt; &lt;threads&gt; &lt;inputfile&gt; &lt;outputfile&gt;" &lt;&lt; endl;
  
// removeblue.cu
+
return 0;
  
 +
} </pre>
 +
<br>
 +
<pre> // removeblue.cu
 
// Read image from file, remove blue channel and write onto another file
 
// Read image from file, remove blue channel and write onto another file
  
  
//#include <stdio.h>
+
//#include &lt;stdio.h&gt;
//#include <stdlib.h>
+
//#include &lt;stdlib.h&gt;
//#include <unistd.h>
+
//#include &lt;unistd.h&gt;
  
 
__device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b)
 
__device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b)
Line 63: Line 68:
 
   unsigned char *p;
 
   unsigned char *p;
 
   p = ((unsigned char *) (m + step*x)+3*y);
 
   p = ((unsigned char *) (m + step*x)+3*y);
   *p = (unsigned char) (rgb&0xff);
+
   *p = (unsigned char) (rgb&amp;0xff);
   *(p+1) = (unsigned char) ((rgb>>8)&0xff);
+
   *(p+1) = (unsigned char) ((rgb&gt;&gt;8)&amp;0xff);
   *(p+2) = (unsigned char) ((rgb>>16)&0xff);
+
   *(p+2) = (unsigned char) ((rgb&gt;&gt;16)&amp;0xff);
 
}
 
}
 
__device__ int RGB(int x, int y, unsigned char* m, int step)
 
__device__ int RGB(int x, int y, unsigned char* m, int step)
Line 72: Line 77:
 
   unsigned int b;
 
   unsigned int b;
 
   p = ((unsigned char *) (m + step*x)+3*y);
 
   p = ((unsigned char *) (m + step*x)+3*y);
   b = *p+((*(p+1))<<8)+((*(p+2))<<16);
+
   b = *p+((*(p+1))&lt;&lt;8)+((*(p+2))&lt;&lt;16);
 +
  return b;
 +
}
 +
 
 +
__global__ void Count(unsigned char *img, int rows, int cols, int step)
 +
{
 +
  float rowfac = ((float) rows)/gridDim.x;
 +
  int rowstart = blockIdx.x*rowfac;
 +
  int rowend = (blockIdx.x+1)*rowfac;
 +
  float colfac = ((float) cols)/blockDim.x;
 +
  int colstart = threadIdx.x*colfac;
 +
  int colend = (threadIdx.x+1)*colfac;
 +
  for (int i=rowstart; i&lt;rowend; i++)
 +
    {
 +
      for (int j=colstart; j&lt;colend; j++)
 +
        RGB(i, j, img, step, RGB(i, j, img, step)&amp;0x0000ff);
 +
    }
 +
}
 +
 
 +
void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step)
 +
{
 +
  if (img!=NULL)
 +
  {
 +
      dim3 dimBlock(threads, threads, 1);
 +
          dim3 dimGrid(blocks, 1, 1);
 +
 
 +
      unsigned char *CUDAimg;
 +
      cudaMalloc((void**) &amp;CUDAimg, rows*cols*3);
 +
      cudaMemcpy(CUDAimg, img, rows*cols*3, cudaMemcpyHostToDevice);
 +
      Count&lt;&lt;&lt;dimGrid, dimBlock&gt;&gt;&gt;(CUDAimg, rows, cols, step);
 +
      cudaMemcpy(img, CUDAimg, rows*cols*3, cudaMemcpyDeviceToHost);
 +
      cudaFree(CUDAimg);
 +
  }
 +
}
 +
 
 +
</pre>

Latest revision as of 11:32, 9 December 2011

We shall use the following two files (one in C++ and one in CUDA C) as the basis for further development of our parallel programming skills - using Nvidia's CUDA technology for parallel processing onboard Graphics Porcessing Units (GPU). The programs currently remove all blue tones from a selected still image. We will develop the idea to explore performance and algorithmic issues when using CUDA and OpenCV (see literature list) in detection of skin tones in still and moving images:

// Read image from file, remove blue channel and write onto another file

include <iostream>

include <string>

include <sstream>

include <cv.h>

include <highgui.h>

using namespace cv; using namespace std;

include "PixelMat.hpp"

void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step);

int main(int argc, char* argv[]) { 

 int blocks = 0;
int threads = 0;

 if (argc > 4)
   {
     istringstream buf1(argv[1]);
     buf1 >> blocks;
     istringstream buf2(argv[2]);
     buf2 >> threads;
     PixelMat img = (PixelMat) imread(argv[3],-1);
     int rows = img.rows;
     int cols = img.cols;
     int step = img.step;

     fprintf(stderr, "%d\n", step);

     startCUDA(blocks, threads, img.data, rows, cols, step);

     imwrite(argv[4], img);
   }
else
   cout << "Usage: " << argv[0] << " <blocks> <threads> <inputfile> <outputfile>" << endl;

 return 0;

} 


 // removeblue.cu
// Read image from file, remove blue channel and write onto another file


//#include <stdio.h>
//#include <stdlib.h>
//#include <unistd.h>

__device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b)
{
  unsigned char *p;
  p = ((unsigned char *) (m + step*x))+3*y;
  *p = (unsigned char) b;
  *(p+1) = (unsigned char) g;
  *(p+2) = (unsigned char) r;
}
__device__ void RGB(int x, int y, unsigned char* m, int step, int rgb)
{
  unsigned char *p;
  p = ((unsigned char *) (m + step*x)+3*y);
  *p = (unsigned char) (rgb&0xff);
  *(p+1) = (unsigned char) ((rgb>>8)&0xff);
  *(p+2) = (unsigned char) ((rgb>>16)&0xff);
}
__device__ int RGB(int x, int y, unsigned char* m, int step)
{
  unsigned char *p;
  unsigned int b;
  p = ((unsigned char *) (m + step*x)+3*y);
  b = *p+((*(p+1))<<8)+((*(p+2))<<16);
  return b;
}

__global__ void Count(unsigned char *img, int rows, int cols, int step)
{
  float rowfac = ((float) rows)/gridDim.x;
  int rowstart = blockIdx.x*rowfac;
  int rowend = (blockIdx.x+1)*rowfac;
  float colfac = ((float) cols)/blockDim.x;
  int colstart = threadIdx.x*colfac;
  int colend = (threadIdx.x+1)*colfac;
  for (int i=rowstart; i<rowend; i++)
    {
      for (int j=colstart; j<colend; j++)
        RGB(i, j, img, step, RGB(i, j, img, step)&0x0000ff);
    }
}

void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step)
{
   if (img!=NULL)
   {
      dim3 dimBlock(threads, threads, 1);
          dim3 dimGrid(blocks, 1, 1);

      unsigned char *CUDAimg;
      cudaMalloc((void**) &CUDAimg, rows*cols*3);
      cudaMemcpy(CUDAimg, img, rows*cols*3, cudaMemcpyHostToDevice);
      Count<<<dimGrid, dimBlock>>>(CUDAimg, rows, cols, step);
      cudaMemcpy(img, CUDAimg, rows*cols*3, cudaMemcpyDeviceToHost);
      cudaFree(CUDAimg);
   }
}