Difference between revisions of "CoE Cluster november 2011/CUDA"
From Teknologisk videncenter
Line 44: | Line 44: | ||
return 0; | return 0; | ||
− | } </pre> | + | } </pre> |
<br> | <br> | ||
− | <pre | + | <pre> // removeblue.cu |
− | + | // Read image from file, remove blue channel and write onto another file | |
− | |||
− | unsigned char *CUDAimg; | + | //#include <stdio.h> |
+ | //#include <stdlib.h> | ||
+ | //#include <unistd.h> | ||
+ | |||
+ | __device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b) | ||
+ | { | ||
+ | unsigned char *p; | ||
+ | p = ((unsigned char *) (m + step*x))+3*y; | ||
+ | *p = (unsigned char) b; | ||
+ | *(p+1) = (unsigned char) g; | ||
+ | *(p+2) = (unsigned char) r; | ||
+ | } | ||
+ | __device__ void RGB(int x, int y, unsigned char* m, int step, int rgb) | ||
+ | { | ||
+ | unsigned char *p; | ||
+ | p = ((unsigned char *) (m + step*x)+3*y); | ||
+ | *p = (unsigned char) (rgb&0xff); | ||
+ | *(p+1) = (unsigned char) ((rgb>>8)&0xff); | ||
+ | *(p+2) = (unsigned char) ((rgb>>16)&0xff); | ||
+ | } | ||
+ | __device__ int RGB(int x, int y, unsigned char* m, int step) | ||
+ | { | ||
+ | unsigned char *p; | ||
+ | unsigned int b; | ||
+ | p = ((unsigned char *) (m + step*x)+3*y); | ||
+ | b = *p+((*(p+1))<<8)+((*(p+2))<<16); | ||
+ | return b; | ||
+ | } | ||
+ | |||
+ | __global__ void Count(unsigned char *img, int rows, int cols, int step) | ||
+ | { | ||
+ | float rowfac = ((float) rows)/gridDim.x; | ||
+ | int rowstart = blockIdx.x*rowfac; | ||
+ | int rowend = (blockIdx.x+1)*rowfac; | ||
+ | float colfac = ((float) cols)/blockDim.x; | ||
+ | int colstart = threadIdx.x*colfac; | ||
+ | int colend = (threadIdx.x+1)*colfac; | ||
+ | for (int i=rowstart; i<rowend; i++) | ||
+ | { | ||
+ | for (int j=colstart; j<colend; j++) | ||
+ | RGB(i, j, img, step, RGB(i, j, img, step)&0x0000ff); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step) | ||
+ | { | ||
+ | if (img!=NULL) | ||
+ | { | ||
+ | dim3 dimBlock(threads, threads, 1); | ||
+ | dim3 dimGrid(blocks, 1, 1); | ||
+ | |||
+ | unsigned char *CUDAimg; | ||
+ | cudaMalloc((void**) &CUDAimg, rows*cols*3); | ||
+ | cudaMemcpy(CUDAimg, img, rows*cols*3, cudaMemcpyHostToDevice); | ||
+ | Count<<<dimGrid, dimBlock>>>(CUDAimg, rows, cols, step); | ||
+ | cudaMemcpy(img, CUDAimg, rows*cols*3, cudaMemcpyDeviceToHost); | ||
+ | cudaFree(CUDAimg); | ||
+ | } | ||
+ | } | ||
+ | |||
</pre> | </pre> |
Revision as of 10:26, 9 December 2011
// Read image from file, remove blue channel and write onto another file include <iostream> include <string> include <sstream> include <cv.h> include <highgui.h> using namespace cv; using namespace std; include "PixelMat.hpp" void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step); int main(int argc, char* argv[]) { int blocks = 0; int threads = 0; if (argc > 4) { istringstream buf1(argv[1]); buf1 >> blocks; istringstream buf2(argv[2]); buf2 >> threads; PixelMat img = (PixelMat) imread(argv[3],-1); int rows = img.rows; int cols = img.cols; int step = img.step; fprintf(stderr, "%d\n", step); startCUDA(blocks, threads, img.data, rows, cols, step); imwrite(argv[4], img); } else cout << "Usage: " << argv[0] << " <blocks> <threads> <inputfile> <outputfile>" << endl; return 0; }
// removeblue.cu // Read image from file, remove blue channel and write onto another file //#include <stdio.h> //#include <stdlib.h> //#include <unistd.h> __device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b) { unsigned char *p; p = ((unsigned char *) (m + step*x))+3*y; *p = (unsigned char) b; *(p+1) = (unsigned char) g; *(p+2) = (unsigned char) r; } __device__ void RGB(int x, int y, unsigned char* m, int step, int rgb) { unsigned char *p; p = ((unsigned char *) (m + step*x)+3*y); *p = (unsigned char) (rgb&0xff); *(p+1) = (unsigned char) ((rgb>>8)&0xff); *(p+2) = (unsigned char) ((rgb>>16)&0xff); } __device__ int RGB(int x, int y, unsigned char* m, int step) { unsigned char *p; unsigned int b; p = ((unsigned char *) (m + step*x)+3*y); b = *p+((*(p+1))<<8)+((*(p+2))<<16); return b; } __global__ void Count(unsigned char *img, int rows, int cols, int step) { float rowfac = ((float) rows)/gridDim.x; int rowstart = blockIdx.x*rowfac; int rowend = (blockIdx.x+1)*rowfac; float colfac = ((float) cols)/blockDim.x; int colstart = threadIdx.x*colfac; int colend = (threadIdx.x+1)*colfac; for (int i=rowstart; i<rowend; i++) { for (int j=colstart; j<colend; j++) RGB(i, j, img, step, RGB(i, j, img, step)&0x0000ff); } } void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step) { if (img!=NULL) { dim3 dimBlock(threads, threads, 1); dim3 dimGrid(blocks, 1, 1); unsigned char *CUDAimg; cudaMalloc((void**) &CUDAimg, rows*cols*3); cudaMemcpy(CUDAimg, img, rows*cols*3, cudaMemcpyHostToDevice); Count<<<dimGrid, dimBlock>>>(CUDAimg, rows, cols, step); cudaMemcpy(img, CUDAimg, rows*cols*3, cudaMemcpyDeviceToHost); cudaFree(CUDAimg); } }