/* * tclAppInit.c -- * * Provides a default version of the main program and Tcl_AppInit * function for Tcl applications (without Tk). * * Copyright (c) 1993 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclAppInit.c,v 1.18 2008/04/27 22:21:33 dkf Exp $ */ #include "tcl.h" #ifdef TCL_TEST #include "tclInt.h" extern Tcl_PackageInitProc Procbodytest_Init; extern Tcl_PackageInitProc Procbodytest_SafeInit; extern Tcl_PackageInitProc TclObjTest_Init; extern Tcl_PackageInitProc Tcltest_Init; #endif /* TCL_TEST */ #ifdef TCL_XT_TEST extern void XtToolkitInitialize(void); extern int Tclxttest_Init(Tcl_Interp *interp); #endif /* *---------------------------------------------------------------------- * * main -- * * This is the main program for the application. * * Results: * None: Tcl_Main never returns here, so this function never returns * either. * * Side effects: * Whatever the application does. * *---------------------------------------------------------------------- */ int mainttt(int,char **); static int Hello_Cmd(ClientData cdata, Tcl_Interp *interp, int objc, Tcl_Obj * CONST objv[]) { char *argv[] = { "t" , "-bench"}; mainttt(2, argv); Tcl_SetObjResult(interp, Tcl_NewStringObj("Hello, World!", -1)); return TCL_OK; } /* * Hello_Init -- Called when Tcl loads your extension. int DLLEXPORT */ int Hello_Init(Tcl_Interp *interp) { if (Tcl_InitStubs(interp, TCL_VERSION, 0) == NULL) { return TCL_ERROR; } /* changed this to check for an error - GPS */ if (Tcl_PkgProvide(interp, "Hello", "1.0") == TCL_ERROR) { return TCL_ERROR; } Tcl_CreateObjCommand(interp, "hello", Hello_Cmd, NULL, NULL); return TCL_OK; } int main( int argc, /* Number of command-line arguments. */ char **argv) /* Values of command-line arguments. */ { /* * The following #if block allows you to change the AppInit function by * using a #define of TCL_LOCAL_APPINIT instead of rewriting this entire * file. The #if checks for that #define and uses Tcl_AppInit if it does * not exist. */ #ifndef TCL_LOCAL_APPINIT #define TCL_LOCAL_APPINIT Tcl_AppInit #endif extern int TCL_LOCAL_APPINIT(Tcl_Interp *interp); /* * The following #if block allows you to change how Tcl finds the startup * script, prime the library or encoding paths, fiddle with the argv, * etc., without needing to rewrite Tcl_Main() */ #ifdef TCL_LOCAL_MAIN_HOOK extern int TCL_LOCAL_MAIN_HOOK(int *argc, char ***argv); #endif #ifdef TCL_XT_TEST XtToolkitInitialize(); #endif #ifdef TCL_LOCAL_MAIN_HOOK TCL_LOCAL_MAIN_HOOK(&argc, &argv); #endif Tcl_Main(argc, argv, TCL_LOCAL_APPINIT); return 0; /* Needed only to prevent compiler warning. */ } /* *---------------------------------------------------------------------- * * Tcl_AppInit -- * * This function performs application-specific initialization. Most * applications, especially those that incorporate additional packages, * will have their own version of this function. * * Results: * Returns a standard Tcl completion code, and leaves an error message in * the interp's result if an error occurs. * * Side effects: * Depends on the startup script. * *---------------------------------------------------------------------- */ int Tcl_AppInit( Tcl_Interp *interp) /* Interpreter for application. */ { if (Tcl_Init(interp) == TCL_ERROR) { return TCL_ERROR; } #ifdef TCL_TEST #ifdef TCL_XT_TEST if (Tclxttest_Init(interp) == TCL_ERROR) { return TCL_ERROR; } #endif if (Tcltest_Init(interp) == TCL_ERROR) { return TCL_ERROR; } Tcl_StaticPackage(interp, "Tcltest", Tcltest_Init, (Tcl_PackageInitProc *) NULL); if (TclObjTest_Init(interp) == TCL_ERROR) { return TCL_ERROR; } if (Procbodytest_Init(interp) == TCL_ERROR) { return TCL_ERROR; } Tcl_StaticPackage(interp, "procbodytest", Procbodytest_Init, Procbodytest_SafeInit); #endif /* TCL_TEST */ /* * Call the init functions for included packages. Each call should look * like this: * * if (Mod_Init(interp) == TCL_ERROR) { * return TCL_ERROR; * } * * where "Mod" is the name of the module. (Dynamically-loadable packages * should have the same entry-point name.) */ /* * Call Tcl_CreateCommand for application-specific commands, if they * weren't already created by the init functions called above. */ /* * Specify a user-specific startup file to invoke if the application is * run interactively. Typically the startup file is "~/.apprc" where "app" * is the name of the application. If this line is deleted then no user- * specific startup file will be run under any conditions. */ #ifdef DJGPP Tcl_SetVar(interp, "tcl_rcFileName", "~/tclsh.rc", TCL_GLOBAL_ONLY); #else Tcl_SetVar(interp, "tcl_rcFileName", "~/.tclshrc", TCL_GLOBAL_ONLY); #endif Hello_Init(interp); /* Test */ return TCL_OK; } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ /* * Copyright 1993-2007 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws. * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE. * * U.S. Government End Users. This source code is a "commercial item" as * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of * "commercial computer software" and "commercial computer software * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the * source code with only those rights set forth herein. */ /* Recursive Gaussian filter sgreen 8/1/08 This code sample implements a Gaussian blur using Deriche's recursive method: http://citeseer.ist.psu.edu/deriche93recursively.html This is similar to the box filter sample in the SDK, but it uses the previous outputs of the filter as well as the previous inputs. This is also known as an IIR (infinite impulse response) filter, since its response to an input impulse can last forever. The main advantage of this method is that the execution time is independent of the filter width. The GPU processes columns of the image in parallel. To avoid uncoalesced reads for the row pass we transpose the image and then transpose it back again afterwards. The implementation is based on code from the CImg library: http://cimg.sourceforge.net/ Thanks to David Tschumperlé and all the CImg contributors! */ #include #include #include #include #include #if defined (__APPLE__) || defined(MACOSX) #include #else #include #endif #include #include #include #define USE_SIMPLE_FILTER 0 char *image_filename = "lena.ppm"; float sigma = 10.0f; int order = 0; int nthreads = 64; // number of threads per block unsigned int width, height; uint* h_img = NULL; uint* d_img = NULL; uint* d_temp = NULL; GLuint pbo = 0; // OpenGL pixel buffer object GLuint texid = 0; // texture unsigned int timer = 0; int fpsCount = 0; // FPS count for averaging int fpsLimit = 1; // FPS limit for sampling int glutwin =-1; //Round a / b to nearest higher integer value int iDivUp(int a, int b){ return (a % b != 0) ? (a / b + 1) : (a / b); } /* Transpose a 2D array (see SDK transpose example) */ void transpose(uint *d_src, uint *d_dest, uint width, int height) { dim3 grid(iDivUp(width, BLOCK_DIM), iDivUp(height, BLOCK_DIM), 1); dim3 threads(BLOCK_DIM, BLOCK_DIM, 1); d_transpose<<< grid, threads >>>(d_dest, d_src, width, height); cutilCheckMsg("Kernel execution failed"); } /* Perform Gaussian filter on a 2D image using CUDA Parameters: d_src - pointer to input image in device memory d_dest - pointer to destination image in device memory d_temp - pointer to temporary storage in device memory width - image width height - image height sigma - sigma of Gaussian order - filter order (0, 1 or 2) */ // 8-bit RGBA version void gaussianFilterRGBA(uint *d_src, uint *d_dest, uint *d_temp, int width, int height, float sigma, int order) { const float nsigma = sigma < 0.1f ? 0.1f : sigma, alpha = 1.695f / nsigma, ema = (float)std::exp(-alpha), ema2 = (float)std::exp(-2*alpha), b1 = -2*ema, b2 = ema2; float a0 = 0, a1 = 0, a2 = 0, a3 = 0, coefp = 0, coefn = 0; switch (order) { case 0: { const float k = (1-ema)*(1-ema)/(1+2*alpha*ema-ema2); a0 = k; a1 = k*(alpha-1)*ema; a2 = k*(alpha+1)*ema; a3 = -k*ema2; } break; case 1: { const float k = (1-ema)*(1-ema)/ema; a0 = k*ema; a1 = a3 = 0; a2 = -a0; } break; case 2: { const float ea = (float)std::exp(-alpha), k = -(ema2-1)/(2*alpha*ema), kn = (-2*(-1+3*ea-3*ea*ea+ea*ea*ea)/(3*ea+1+3*ea*ea+ea*ea*ea)); a0 = kn; a1 = -kn*(1+k*alpha)*ema; a2 = kn*(1-k*alpha)*ema; a3 = -kn*ema2; } break; default: fprintf(stderr, "gaussianFilter: invalid order parameter!\n"); return; } coefp = (a0+a1)/(1+b1+b2); coefn = (a2+a3)/(1+b1+b2); // process columns #if USE_SIMPLE_FILTER d_simpleRecursive_rgba<<< iDivUp(width, nthreads), nthreads >>>(d_src, d_temp, width, height, ema); #else d_recursiveGaussian_rgba<<< iDivUp(width, nthreads), nthreads >>>(d_src, d_temp, width, height, a0, a1, a2, a3, b1, b2, coefp, coefn); #endif cutilCheckMsg("Kernel execution failed"); transpose(d_temp, d_dest, width, height); cutilCheckMsg("transpose: Kernel execution failed"); // process rows #if USE_SIMPLE_FILTER d_simpleRecursive_rgba<<< iDivUp(height, nthreads), nthreads >>>(d_dest, d_temp, height, width, ema); #else d_recursiveGaussian_rgba<<< iDivUp(height, nthreads), nthreads >>>(d_dest, d_temp, height, width, a0, a1, a2, a3, b1, b2, coefp, coefn); #endif cutilCheckMsg("Kernel execution failed"); transpose(d_temp, d_dest, height, width); } // display results using OpenGL void display() { cutilCheckError(cutStartTimer(timer)); // execute filter, writing results to pbo uint *d_result; cutilSafeCall(cudaGLMapBufferObject((void**)&d_result, pbo)); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order); cutilSafeCall(cudaGLUnmapBufferObject(pbo)); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // display results glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_TEXTURE_2D); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); glVertex2f(0, 0); glTexCoord2f(0, 0); glVertex2f(0, 1); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(0, 1); glEnd(); glDisable(GL_TEXTURE_2D); glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); fpsCount++; if (fpsCount == fpsLimit) { char fps[256]; float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f); sprintf(fps, "CUDA Recursive Gaussian filter: %3.1f fps", ifps); glutSetWindowTitle(fps); fpsCount = 0; fpsLimit = (int)max(ifps, 1.f); cutilCheckError(cutResetTimer(timer)); } } void idle() { glutPostRedisplay(); } void cleanup(); void keyboard(unsigned char key, int x, int y) { switch(key) { case 27: // exit(0); // cleanup(); glutDestroyWindow(glutwin); // glutLeaveMainLoop(); return; break; case '=': sigma+=0.1f; break; case '-': sigma-=0.1f; if (sigma < 0.0) sigma = 0.0f; break; case '+': sigma+=1.0f; break; case '_': sigma-=1.0f; if (sigma < 0.0) sigma = 0.0f; break; case '0': order = 0; break; case '1': order = 1; sigma = 0.5f; break; case '2': order = 2; sigma = 0.5f; break; default: break; } printf("sigma = %f\n", sigma); glutPostRedisplay(); } void reshape(int x, int y) { glViewport(0, 0, x, y); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); } void initCuda() { unsigned int size = width * height * sizeof(uint); // allocate device memory cutilSafeCall( cudaMalloc( (void**) &d_img, size)); cutilSafeCall( cudaMalloc( (void**) &d_temp, size)); cutilSafeCall( cudaMemcpy( d_img, h_img, size, cudaMemcpyHostToDevice)); cutilCheckError( cutCreateTimer( &timer)); } void cleanup() { cutilCheckError( cutDeleteTimer( timer)); free(h_img); cutilSafeCall(cudaFree(d_img)); cutilSafeCall(cudaFree(d_temp)); if (pbo) { cutilSafeCall(cudaGLUnregisterBufferObject(pbo)); glDeleteBuffersARB(1, &pbo); } if (texid) { glDeleteTextures(1, &texid); } } void initOpenGL() { // create pixel buffer object to store final image glGenBuffersARB(1, &pbo); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, h_img, GL_STREAM_DRAW_ARB); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); cutilSafeCall(cudaGLRegisterBufferObject(pbo)); // create texture for display glGenTextures(1, &texid); glBindTexture(GL_TEXTURE_2D, texid); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); } void benchmark(int iterations) { // allocate memory for result uint *d_result; unsigned int size = width * height * sizeof(uint); cutilSafeCall( cudaMalloc( (void**) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStartTimer( timer)); // execute the kernel for(int i=0; i