[hamradio-commits] [gnss-sdr] 08/18: Removing cudahelpers library and usage by a copyright issue. It does not affect functionality.
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Fri Sep 18 10:27:52 UTC 2015
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit a84b4baef0d8577c908744d9ff2220f3317801a6
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Thu Sep 10 17:46:38 2015 +0200
Removing cudahelpers library and usage by a copyright issue. It does not
affect functionality.
---
src/algorithms/libs/cudahelpers/exception.h | 151 --
src/algorithms/libs/cudahelpers/helper_cuda.h | 1255 -----------------
.../libs/cudahelpers/helper_cuda_drvapi.h | 517 -------
src/algorithms/libs/cudahelpers/helper_cuda_gl.h | 165 ---
src/algorithms/libs/cudahelpers/helper_functions.h | 42 -
src/algorithms/libs/cudahelpers/helper_image.h | 1110 ---------------
src/algorithms/libs/cudahelpers/helper_math.h | 1453 --------------------
src/algorithms/libs/cudahelpers/helper_string.h | 516 -------
src/algorithms/libs/cudahelpers/helper_timer.h | 499 -------
.../tracking/gnuradio_blocks/CMakeLists.txt | 4 +-
.../gps_l1_ca_dll_pll_tracking_gpu_cc.cc | 47 +-
src/algorithms/tracking/libs/CMakeLists.txt | 8 +-
.../tracking/libs/cuda_multicorrelator.cu | 104 +-
13 files changed, 75 insertions(+), 5796 deletions(-)
diff --git a/src/algorithms/libs/cudahelpers/exception.h b/src/algorithms/libs/cudahelpers/exception.h
deleted file mode 100644
index adda4bc..0000000
--- a/src/algorithms/libs/cudahelpers/exception.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
-* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-/* CUda UTility Library */
-#ifndef _EXCEPTION_H_
-#define _EXCEPTION_H_
-
-// includes, system
-#include <exception>
-#include <stdexcept>
-#include <iostream>
-#include <stdlib.h>
-
-//! Exception wrapper.
-//! @param Std_Exception Exception out of namespace std for easy typing.
-template<class Std_Exception>
-class Exception : public Std_Exception
-{
- public:
-
- //! @brief Static construction interface
- //! @return Alwayss throws ( Located_Exception<Exception>)
- //! @param file file in which the Exception occurs
- //! @param line line in which the Exception occurs
- //! @param detailed details on the code fragment causing the Exception
- static void throw_it(const char *file,
- const int line,
- const char *detailed = "-");
-
- //! Static construction interface
- //! @return Alwayss throws ( Located_Exception<Exception>)
- //! @param file file in which the Exception occurs
- //! @param line line in which the Exception occurs
- //! @param detailed details on the code fragment causing the Exception
- static void throw_it(const char *file,
- const int line,
- const std::string &detailed);
-
- //! Destructor
- virtual ~Exception() throw();
-
- private:
-
- //! Constructor, default (private)
- Exception();
-
- //! Constructor, standard
- //! @param str string returned by what()
- Exception(const std::string &str);
-
-};
-
-////////////////////////////////////////////////////////////////////////////////
-//! Exception handler function for arbitrary exceptions
-//! @param ex exception to handle
-////////////////////////////////////////////////////////////////////////////////
-template<class Exception_Typ>
-inline void
-handleException(const Exception_Typ &ex)
-{
- std::cerr << ex.what() << std::endl;
-
- exit(EXIT_FAILURE);
-}
-
-//! Convenience macros
-
-//! Exception caused by dynamic program behavior, e.g. file does not exist
-#define RUNTIME_EXCEPTION( msg) \
- Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
-
-//! Logic exception in program, e.g. an assert failed
-#define LOGIC_EXCEPTION( msg) \
- Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
-
-//! Out of range exception
-#define RANGE_EXCEPTION( msg) \
- Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
-
-////////////////////////////////////////////////////////////////////////////////
-//! Implementation
-
-// includes, system
-#include <sstream>
-
-////////////////////////////////////////////////////////////////////////////////
-//! Static construction interface.
-//! @param Exception causing code fragment (file and line) and detailed infos.
-////////////////////////////////////////////////////////////////////////////////
-/*static*/ template<class Std_Exception>
-void
-Exception<Std_Exception>::
-throw_it(const char *file, const int line, const char *detailed)
-{
- std::stringstream s;
-
- // Quiet heavy-weight but exceptions are not for
- // performance / release versions
- s << "Exception in file '" << file << "' in line " << line << "\n"
- << "Detailed description: " << detailed << "\n";
-
- throw Exception(s.str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Static construction interface.
-//! @param Exception causing code fragment (file and line) and detailed infos.
-////////////////////////////////////////////////////////////////////////////////
-/*static*/ template<class Std_Exception>
-void
-Exception<Std_Exception>::
-throw_it(const char *file, const int line, const std::string &msg)
-{
- throw_it(file, line, msg.c_str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Constructor, default (private).
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::Exception() :
- Std_Exception("Unknown Exception.\n")
-{ }
-
-////////////////////////////////////////////////////////////////////////////////
-//! Constructor, standard (private).
-//! String returned by what().
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::Exception(const std::string &s) :
- Std_Exception(s)
-{ }
-
-////////////////////////////////////////////////////////////////////////////////
-//! Destructor
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::~Exception() throw() { }
-
-// functions, exported
-
-#endif // #ifndef _EXCEPTION_H_
-
diff --git a/src/algorithms/libs/cudahelpers/helper_cuda.h b/src/algorithms/libs/cudahelpers/helper_cuda.h
deleted file mode 100644
index 1d3e920..0000000
--- a/src/algorithms/libs/cudahelpers/helper_cuda.h
+++ /dev/null
@@ -1,1255 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA Helper functions for initialization and error checking
-
-#ifndef HELPER_CUDA_H
-#define HELPER_CUDA_H
-
-#pragma once
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <helper_string.h>
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// Note, it is required that your SDK sample to include the proper header files, please
-// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
-// on which CUDA functions are used.
-
-// CUDA Runtime error messages
-#ifdef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
- switch (error)
- {
- case cudaSuccess:
- return "cudaSuccess";
-
- case cudaErrorMissingConfiguration:
- return "cudaErrorMissingConfiguration";
-
- case cudaErrorMemoryAllocation:
- return "cudaErrorMemoryAllocation";
-
- case cudaErrorInitializationError:
- return "cudaErrorInitializationError";
-
- case cudaErrorLaunchFailure:
- return "cudaErrorLaunchFailure";
-
- case cudaErrorPriorLaunchFailure:
- return "cudaErrorPriorLaunchFailure";
-
- case cudaErrorLaunchTimeout:
- return "cudaErrorLaunchTimeout";
-
- case cudaErrorLaunchOutOfResources:
- return "cudaErrorLaunchOutOfResources";
-
- case cudaErrorInvalidDeviceFunction:
- return "cudaErrorInvalidDeviceFunction";
-
- case cudaErrorInvalidConfiguration:
- return "cudaErrorInvalidConfiguration";
-
- case cudaErrorInvalidDevice:
- return "cudaErrorInvalidDevice";
-
- case cudaErrorInvalidValue:
- return "cudaErrorInvalidValue";
-
- case cudaErrorInvalidPitchValue:
- return "cudaErrorInvalidPitchValue";
-
- case cudaErrorInvalidSymbol:
- return "cudaErrorInvalidSymbol";
-
- case cudaErrorMapBufferObjectFailed:
- return "cudaErrorMapBufferObjectFailed";
-
- case cudaErrorUnmapBufferObjectFailed:
- return "cudaErrorUnmapBufferObjectFailed";
-
- case cudaErrorInvalidHostPointer:
- return "cudaErrorInvalidHostPointer";
-
- case cudaErrorInvalidDevicePointer:
- return "cudaErrorInvalidDevicePointer";
-
- case cudaErrorInvalidTexture:
- return "cudaErrorInvalidTexture";
-
- case cudaErrorInvalidTextureBinding:
- return "cudaErrorInvalidTextureBinding";
-
- case cudaErrorInvalidChannelDescriptor:
- return "cudaErrorInvalidChannelDescriptor";
-
- case cudaErrorInvalidMemcpyDirection:
- return "cudaErrorInvalidMemcpyDirection";
-
- case cudaErrorAddressOfConstant:
- return "cudaErrorAddressOfConstant";
-
- case cudaErrorTextureFetchFailed:
- return "cudaErrorTextureFetchFailed";
-
- case cudaErrorTextureNotBound:
- return "cudaErrorTextureNotBound";
-
- case cudaErrorSynchronizationError:
- return "cudaErrorSynchronizationError";
-
- case cudaErrorInvalidFilterSetting:
- return "cudaErrorInvalidFilterSetting";
-
- case cudaErrorInvalidNormSetting:
- return "cudaErrorInvalidNormSetting";
-
- case cudaErrorMixedDeviceExecution:
- return "cudaErrorMixedDeviceExecution";
-
- case cudaErrorCudartUnloading:
- return "cudaErrorCudartUnloading";
-
- case cudaErrorUnknown:
- return "cudaErrorUnknown";
-
- case cudaErrorNotYetImplemented:
- return "cudaErrorNotYetImplemented";
-
- case cudaErrorMemoryValueTooLarge:
- return "cudaErrorMemoryValueTooLarge";
-
- case cudaErrorInvalidResourceHandle:
- return "cudaErrorInvalidResourceHandle";
-
- case cudaErrorNotReady:
- return "cudaErrorNotReady";
-
- case cudaErrorInsufficientDriver:
- return "cudaErrorInsufficientDriver";
-
- case cudaErrorSetOnActiveProcess:
- return "cudaErrorSetOnActiveProcess";
-
- case cudaErrorInvalidSurface:
- return "cudaErrorInvalidSurface";
-
- case cudaErrorNoDevice:
- return "cudaErrorNoDevice";
-
- case cudaErrorECCUncorrectable:
- return "cudaErrorECCUncorrectable";
-
- case cudaErrorSharedObjectSymbolNotFound:
- return "cudaErrorSharedObjectSymbolNotFound";
-
- case cudaErrorSharedObjectInitFailed:
- return "cudaErrorSharedObjectInitFailed";
-
- case cudaErrorUnsupportedLimit:
- return "cudaErrorUnsupportedLimit";
-
- case cudaErrorDuplicateVariableName:
- return "cudaErrorDuplicateVariableName";
-
- case cudaErrorDuplicateTextureName:
- return "cudaErrorDuplicateTextureName";
-
- case cudaErrorDuplicateSurfaceName:
- return "cudaErrorDuplicateSurfaceName";
-
- case cudaErrorDevicesUnavailable:
- return "cudaErrorDevicesUnavailable";
-
- case cudaErrorInvalidKernelImage:
- return "cudaErrorInvalidKernelImage";
-
- case cudaErrorNoKernelImageForDevice:
- return "cudaErrorNoKernelImageForDevice";
-
- case cudaErrorIncompatibleDriverContext:
- return "cudaErrorIncompatibleDriverContext";
-
- case cudaErrorPeerAccessAlreadyEnabled:
- return "cudaErrorPeerAccessAlreadyEnabled";
-
- case cudaErrorPeerAccessNotEnabled:
- return "cudaErrorPeerAccessNotEnabled";
-
- case cudaErrorDeviceAlreadyInUse:
- return "cudaErrorDeviceAlreadyInUse";
-
- case cudaErrorProfilerDisabled:
- return "cudaErrorProfilerDisabled";
-
- case cudaErrorProfilerNotInitialized:
- return "cudaErrorProfilerNotInitialized";
-
- case cudaErrorProfilerAlreadyStarted:
- return "cudaErrorProfilerAlreadyStarted";
-
- case cudaErrorProfilerAlreadyStopped:
- return "cudaErrorProfilerAlreadyStopped";
-
- /* Since CUDA 4.0*/
- case cudaErrorAssert:
- return "cudaErrorAssert";
-
- case cudaErrorTooManyPeers:
- return "cudaErrorTooManyPeers";
-
- case cudaErrorHostMemoryAlreadyRegistered:
- return "cudaErrorHostMemoryAlreadyRegistered";
-
- case cudaErrorHostMemoryNotRegistered:
- return "cudaErrorHostMemoryNotRegistered";
-
- /* Since CUDA 5.0 */
- case cudaErrorOperatingSystem:
- return "cudaErrorOperatingSystem";
-
- case cudaErrorPeerAccessUnsupported:
- return "cudaErrorPeerAccessUnsupported";
-
- case cudaErrorLaunchMaxDepthExceeded:
- return "cudaErrorLaunchMaxDepthExceeded";
-
- case cudaErrorLaunchFileScopedTex:
- return "cudaErrorLaunchFileScopedTex";
-
- case cudaErrorLaunchFileScopedSurf:
- return "cudaErrorLaunchFileScopedSurf";
-
- case cudaErrorSyncDepthExceeded:
- return "cudaErrorSyncDepthExceeded";
-
- case cudaErrorLaunchPendingCountExceeded:
- return "cudaErrorLaunchPendingCountExceeded";
-
- case cudaErrorNotPermitted:
- return "cudaErrorNotPermitted";
-
- case cudaErrorNotSupported:
- return "cudaErrorNotSupported";
-
- /* Since CUDA 6.0 */
- case cudaErrorHardwareStackError:
- return "cudaErrorHardwareStackError";
-
- case cudaErrorIllegalInstruction:
- return "cudaErrorIllegalInstruction";
-
- case cudaErrorMisalignedAddress:
- return "cudaErrorMisalignedAddress";
-
- case cudaErrorInvalidAddressSpace:
- return "cudaErrorInvalidAddressSpace";
-
- case cudaErrorInvalidPc:
- return "cudaErrorInvalidPc";
-
- case cudaErrorIllegalAddress:
- return "cudaErrorIllegalAddress";
-
- /* Since CUDA 6.5*/
- case cudaErrorInvalidPtx:
- return "cudaErrorInvalidPtx";
-
- case cudaErrorInvalidGraphicsContext:
- return "cudaErrorInvalidGraphicsContext";
-
- case cudaErrorStartupFailure:
- return "cudaErrorStartupFailure";
-
- case cudaErrorApiFailureBase:
- return "cudaErrorApiFailureBase";
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef __cuda_cuda_h__
-// CUDA Driver API errors
-static const char *_cudaGetErrorEnum(CUresult error)
-{
- switch (error)
- {
- case CUDA_SUCCESS:
- return "CUDA_SUCCESS";
-
- case CUDA_ERROR_INVALID_VALUE:
- return "CUDA_ERROR_INVALID_VALUE";
-
- case CUDA_ERROR_OUT_OF_MEMORY:
- return "CUDA_ERROR_OUT_OF_MEMORY";
-
- case CUDA_ERROR_NOT_INITIALIZED:
- return "CUDA_ERROR_NOT_INITIALIZED";
-
- case CUDA_ERROR_DEINITIALIZED:
- return "CUDA_ERROR_DEINITIALIZED";
-
- case CUDA_ERROR_PROFILER_DISABLED:
- return "CUDA_ERROR_PROFILER_DISABLED";
-
- case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
- return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
-
- case CUDA_ERROR_PROFILER_ALREADY_STARTED:
- return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
-
- case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
- return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
-
- case CUDA_ERROR_NO_DEVICE:
- return "CUDA_ERROR_NO_DEVICE";
-
- case CUDA_ERROR_INVALID_DEVICE:
- return "CUDA_ERROR_INVALID_DEVICE";
-
- case CUDA_ERROR_INVALID_IMAGE:
- return "CUDA_ERROR_INVALID_IMAGE";
-
- case CUDA_ERROR_INVALID_CONTEXT:
- return "CUDA_ERROR_INVALID_CONTEXT";
-
- case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
- return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
-
- case CUDA_ERROR_MAP_FAILED:
- return "CUDA_ERROR_MAP_FAILED";
-
- case CUDA_ERROR_UNMAP_FAILED:
- return "CUDA_ERROR_UNMAP_FAILED";
-
- case CUDA_ERROR_ARRAY_IS_MAPPED:
- return "CUDA_ERROR_ARRAY_IS_MAPPED";
-
- case CUDA_ERROR_ALREADY_MAPPED:
- return "CUDA_ERROR_ALREADY_MAPPED";
-
- case CUDA_ERROR_NO_BINARY_FOR_GPU:
- return "CUDA_ERROR_NO_BINARY_FOR_GPU";
-
- case CUDA_ERROR_ALREADY_ACQUIRED:
- return "CUDA_ERROR_ALREADY_ACQUIRED";
-
- case CUDA_ERROR_NOT_MAPPED:
- return "CUDA_ERROR_NOT_MAPPED";
-
- case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
- return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
-
- case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
- return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
-
- case CUDA_ERROR_ECC_UNCORRECTABLE:
- return "CUDA_ERROR_ECC_UNCORRECTABLE";
-
- case CUDA_ERROR_UNSUPPORTED_LIMIT:
- return "CUDA_ERROR_UNSUPPORTED_LIMIT";
-
- case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
- return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
-
- case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
- return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
-
- case CUDA_ERROR_INVALID_PTX:
- return "CUDA_ERROR_INVALID_PTX";
-
- case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
- return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
-
- case CUDA_ERROR_INVALID_SOURCE:
- return "CUDA_ERROR_INVALID_SOURCE";
-
- case CUDA_ERROR_FILE_NOT_FOUND:
- return "CUDA_ERROR_FILE_NOT_FOUND";
-
- case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
- return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
-
- case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
- return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
-
- case CUDA_ERROR_OPERATING_SYSTEM:
- return "CUDA_ERROR_OPERATING_SYSTEM";
-
- case CUDA_ERROR_INVALID_HANDLE:
- return "CUDA_ERROR_INVALID_HANDLE";
-
- case CUDA_ERROR_NOT_FOUND:
- return "CUDA_ERROR_NOT_FOUND";
-
- case CUDA_ERROR_NOT_READY:
- return "CUDA_ERROR_NOT_READY";
-
- case CUDA_ERROR_ILLEGAL_ADDRESS:
- return "CUDA_ERROR_ILLEGAL_ADDRESS";
-
- case CUDA_ERROR_LAUNCH_FAILED:
- return "CUDA_ERROR_LAUNCH_FAILED";
-
- case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
- return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
-
- case CUDA_ERROR_LAUNCH_TIMEOUT:
- return "CUDA_ERROR_LAUNCH_TIMEOUT";
-
- case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
- return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
-
- case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
- return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
-
- case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
- return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
-
- case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
- return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
-
- case CUDA_ERROR_CONTEXT_IS_DESTROYED:
- return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
-
- case CUDA_ERROR_ASSERT:
- return "CUDA_ERROR_ASSERT";
-
- case CUDA_ERROR_TOO_MANY_PEERS:
- return "CUDA_ERROR_TOO_MANY_PEERS";
-
- case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
- return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
-
- case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
- return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
-
- case CUDA_ERROR_HARDWARE_STACK_ERROR:
- return "CUDA_ERROR_HARDWARE_STACK_ERROR";
-
- case CUDA_ERROR_ILLEGAL_INSTRUCTION:
- return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
-
- case CUDA_ERROR_MISALIGNED_ADDRESS:
- return "CUDA_ERROR_MISALIGNED_ADDRESS";
-
- case CUDA_ERROR_INVALID_ADDRESS_SPACE:
- return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
-
- case CUDA_ERROR_INVALID_PC:
- return "CUDA_ERROR_INVALID_PC";
-
- case CUDA_ERROR_NOT_PERMITTED:
- return "CUDA_ERROR_NOT_PERMITTED";
-
- case CUDA_ERROR_NOT_SUPPORTED:
- return "CUDA_ERROR_NOT_SUPPORTED";
-
- case CUDA_ERROR_UNKNOWN:
- return "CUDA_ERROR_UNKNOWN";
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef CUBLAS_API_H_
-// cuBLAS API errors
-static const char *_cudaGetErrorEnum(cublasStatus_t error)
-{
- switch (error)
- {
- case CUBLAS_STATUS_SUCCESS:
- return "CUBLAS_STATUS_SUCCESS";
-
- case CUBLAS_STATUS_NOT_INITIALIZED:
- return "CUBLAS_STATUS_NOT_INITIALIZED";
-
- case CUBLAS_STATUS_ALLOC_FAILED:
- return "CUBLAS_STATUS_ALLOC_FAILED";
-
- case CUBLAS_STATUS_INVALID_VALUE:
- return "CUBLAS_STATUS_INVALID_VALUE";
-
- case CUBLAS_STATUS_ARCH_MISMATCH:
- return "CUBLAS_STATUS_ARCH_MISMATCH";
-
- case CUBLAS_STATUS_MAPPING_ERROR:
- return "CUBLAS_STATUS_MAPPING_ERROR";
-
- case CUBLAS_STATUS_EXECUTION_FAILED:
- return "CUBLAS_STATUS_EXECUTION_FAILED";
-
- case CUBLAS_STATUS_INTERNAL_ERROR:
- return "CUBLAS_STATUS_INTERNAL_ERROR";
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef _CUFFT_H_
-// cuFFT API errors
-static const char *_cudaGetErrorEnum(cufftResult error)
-{
- switch (error)
- {
- case CUFFT_SUCCESS:
- return "CUFFT_SUCCESS";
-
- case CUFFT_INVALID_PLAN:
- return "CUFFT_INVALID_PLAN";
-
- case CUFFT_ALLOC_FAILED:
- return "CUFFT_ALLOC_FAILED";
-
- case CUFFT_INVALID_TYPE:
- return "CUFFT_INVALID_TYPE";
-
- case CUFFT_INVALID_VALUE:
- return "CUFFT_INVALID_VALUE";
-
- case CUFFT_INTERNAL_ERROR:
- return "CUFFT_INTERNAL_ERROR";
-
- case CUFFT_EXEC_FAILED:
- return "CUFFT_EXEC_FAILED";
-
- case CUFFT_SETUP_FAILED:
- return "CUFFT_SETUP_FAILED";
-
- case CUFFT_INVALID_SIZE:
- return "CUFFT_INVALID_SIZE";
-
- case CUFFT_UNALIGNED_DATA:
- return "CUFFT_UNALIGNED_DATA";
-
- case CUFFT_INCOMPLETE_PARAMETER_LIST:
- return "CUFFT_INCOMPLETE_PARAMETER_LIST";
-
- case CUFFT_INVALID_DEVICE:
- return "CUFFT_INVALID_DEVICE";
-
- case CUFFT_PARSE_ERROR:
- return "CUFFT_PARSE_ERROR";
-
- case CUFFT_NO_WORKSPACE:
- return "CUFFT_NO_WORKSPACE";
-
- case CUFFT_NOT_IMPLEMENTED:
- return "CUFFT_NOT_IMPLEMENTED";
-
- case CUFFT_LICENSE_ERROR:
- return "CUFFT_LICENSE_ERROR";
- }
-
- return "<unknown>";
-}
-#endif
-
-
-#ifdef CUSPARSEAPI
-// cuSPARSE API errors
-static const char *_cudaGetErrorEnum(cusparseStatus_t error)
-{
- switch (error)
- {
- case CUSPARSE_STATUS_SUCCESS:
- return "CUSPARSE_STATUS_SUCCESS";
-
- case CUSPARSE_STATUS_NOT_INITIALIZED:
- return "CUSPARSE_STATUS_NOT_INITIALIZED";
-
- case CUSPARSE_STATUS_ALLOC_FAILED:
- return "CUSPARSE_STATUS_ALLOC_FAILED";
-
- case CUSPARSE_STATUS_INVALID_VALUE:
- return "CUSPARSE_STATUS_INVALID_VALUE";
-
- case CUSPARSE_STATUS_ARCH_MISMATCH:
- return "CUSPARSE_STATUS_ARCH_MISMATCH";
-
- case CUSPARSE_STATUS_MAPPING_ERROR:
- return "CUSPARSE_STATUS_MAPPING_ERROR";
-
- case CUSPARSE_STATUS_EXECUTION_FAILED:
- return "CUSPARSE_STATUS_EXECUTION_FAILED";
-
- case CUSPARSE_STATUS_INTERNAL_ERROR:
- return "CUSPARSE_STATUS_INTERNAL_ERROR";
-
- case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
- return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef CUSOLVER_COMMON_H_
-//cuSOLVER API errors
-static const char *_cudaGetErrorEnum(cusolverStatus_t error)
-{
- switch(error)
- {
- case CUSOLVER_STATUS_SUCCESS:
- return "CUSOLVER_STATUS_SUCCESS";
- case CUSOLVER_STATUS_NOT_INITIALIZED:
- return "CUSOLVER_STATUS_NOT_INITIALIZED";
- case CUSOLVER_STATUS_ALLOC_FAILED:
- return "CUSOLVER_STATUS_ALLOC_FAILED";
- case CUSOLVER_STATUS_INVALID_VALUE:
- return "CUSOLVER_STATUS_INVALID_VALUE";
- case CUSOLVER_STATUS_ARCH_MISMATCH:
- return "CUSOLVER_STATUS_ARCH_MISMATCH";
- case CUSOLVER_STATUS_MAPPING_ERROR:
- return "CUSOLVER_STATUS_MAPPING_ERROR";
- case CUSOLVER_STATUS_EXECUTION_FAILED:
- return "CUSOLVER_STATUS_EXECUTION_FAILED";
- case CUSOLVER_STATUS_INTERNAL_ERROR:
- return "CUSOLVER_STATUS_INTERNAL_ERROR";
- case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
- return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
- case CUSOLVER_STATUS_NOT_SUPPORTED :
- return "CUSOLVER_STATUS_NOT_SUPPORTED ";
- case CUSOLVER_STATUS_ZERO_PIVOT:
- return "CUSOLVER_STATUS_ZERO_PIVOT";
- case CUSOLVER_STATUS_INVALID_LICENSE:
- return "CUSOLVER_STATUS_INVALID_LICENSE";
- }
-
- return "<unknown>";
-
-}
-#endif
-
-#ifdef CURAND_H_
-// cuRAND API errors
-static const char *_cudaGetErrorEnum(curandStatus_t error)
-{
- switch (error)
- {
- case CURAND_STATUS_SUCCESS:
- return "CURAND_STATUS_SUCCESS";
-
- case CURAND_STATUS_VERSION_MISMATCH:
- return "CURAND_STATUS_VERSION_MISMATCH";
-
- case CURAND_STATUS_NOT_INITIALIZED:
- return "CURAND_STATUS_NOT_INITIALIZED";
-
- case CURAND_STATUS_ALLOCATION_FAILED:
- return "CURAND_STATUS_ALLOCATION_FAILED";
-
- case CURAND_STATUS_TYPE_ERROR:
- return "CURAND_STATUS_TYPE_ERROR";
-
- case CURAND_STATUS_OUT_OF_RANGE:
- return "CURAND_STATUS_OUT_OF_RANGE";
-
- case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
- return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
-
- case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
- return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
-
- case CURAND_STATUS_LAUNCH_FAILURE:
- return "CURAND_STATUS_LAUNCH_FAILURE";
-
- case CURAND_STATUS_PREEXISTING_FAILURE:
- return "CURAND_STATUS_PREEXISTING_FAILURE";
-
- case CURAND_STATUS_INITIALIZATION_FAILED:
- return "CURAND_STATUS_INITIALIZATION_FAILED";
-
- case CURAND_STATUS_ARCH_MISMATCH:
- return "CURAND_STATUS_ARCH_MISMATCH";
-
- case CURAND_STATUS_INTERNAL_ERROR:
- return "CURAND_STATUS_INTERNAL_ERROR";
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef NV_NPPIDEFS_H
-// NPP API errors
-static const char *_cudaGetErrorEnum(NppStatus error)
-{
- switch (error)
- {
- case NPP_NOT_SUPPORTED_MODE_ERROR:
- return "NPP_NOT_SUPPORTED_MODE_ERROR";
-
- case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
- return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
-
- case NPP_RESIZE_NO_OPERATION_ERROR:
- return "NPP_RESIZE_NO_OPERATION_ERROR";
-
- case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
- return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
- case NPP_BAD_ARG_ERROR:
- return "NPP_BAD_ARGUMENT_ERROR";
-
- case NPP_COEFF_ERROR:
- return "NPP_COEFFICIENT_ERROR";
-
- case NPP_RECT_ERROR:
- return "NPP_RECTANGLE_ERROR";
-
- case NPP_QUAD_ERROR:
- return "NPP_QUADRANGLE_ERROR";
-
- case NPP_MEM_ALLOC_ERR:
- return "NPP_MEMORY_ALLOCATION_ERROR";
-
- case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
- return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
- case NPP_INVALID_INPUT:
- return "NPP_INVALID_INPUT";
-
- case NPP_POINTER_ERROR:
- return "NPP_POINTER_ERROR";
-
- case NPP_WARNING:
- return "NPP_WARNING";
-
- case NPP_ODD_ROI_WARNING:
- return "NPP_ODD_ROI_WARNING";
-#else
-
- // These are for CUDA 5.5 or higher
- case NPP_BAD_ARGUMENT_ERROR:
- return "NPP_BAD_ARGUMENT_ERROR";
-
- case NPP_COEFFICIENT_ERROR:
- return "NPP_COEFFICIENT_ERROR";
-
- case NPP_RECTANGLE_ERROR:
- return "NPP_RECTANGLE_ERROR";
-
- case NPP_QUADRANGLE_ERROR:
- return "NPP_QUADRANGLE_ERROR";
-
- case NPP_MEMORY_ALLOCATION_ERR:
- return "NPP_MEMORY_ALLOCATION_ERROR";
-
- case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
- return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
- case NPP_INVALID_HOST_POINTER_ERROR:
- return "NPP_INVALID_HOST_POINTER_ERROR";
-
- case NPP_INVALID_DEVICE_POINTER_ERROR:
- return "NPP_INVALID_DEVICE_POINTER_ERROR";
-#endif
-
- case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
- return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
-
- case NPP_TEXTURE_BIND_ERROR:
- return "NPP_TEXTURE_BIND_ERROR";
-
- case NPP_WRONG_INTERSECTION_ROI_ERROR:
- return "NPP_WRONG_INTERSECTION_ROI_ERROR";
-
- case NPP_NOT_EVEN_STEP_ERROR:
- return "NPP_NOT_EVEN_STEP_ERROR";
-
- case NPP_INTERPOLATION_ERROR:
- return "NPP_INTERPOLATION_ERROR";
-
- case NPP_RESIZE_FACTOR_ERROR:
- return "NPP_RESIZE_FACTOR_ERROR";
-
- case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
- return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
-
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
- case NPP_MEMFREE_ERR:
- return "NPP_MEMFREE_ERR";
-
- case NPP_MEMSET_ERR:
- return "NPP_MEMSET_ERR";
-
- case NPP_MEMCPY_ERR:
- return "NPP_MEMCPY_ERROR";
-
- case NPP_MIRROR_FLIP_ERR:
- return "NPP_MIRROR_FLIP_ERR";
-#else
-
- case NPP_MEMFREE_ERROR:
- return "NPP_MEMFREE_ERROR";
-
- case NPP_MEMSET_ERROR:
- return "NPP_MEMSET_ERROR";
-
- case NPP_MEMCPY_ERROR:
- return "NPP_MEMCPY_ERROR";
-
- case NPP_MIRROR_FLIP_ERROR:
- return "NPP_MIRROR_FLIP_ERROR";
-#endif
-
- case NPP_ALIGNMENT_ERROR:
- return "NPP_ALIGNMENT_ERROR";
-
- case NPP_STEP_ERROR:
- return "NPP_STEP_ERROR";
-
- case NPP_SIZE_ERROR:
- return "NPP_SIZE_ERROR";
-
- case NPP_NULL_POINTER_ERROR:
- return "NPP_NULL_POINTER_ERROR";
-
- case NPP_CUDA_KERNEL_EXECUTION_ERROR:
- return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
-
- case NPP_NOT_IMPLEMENTED_ERROR:
- return "NPP_NOT_IMPLEMENTED_ERROR";
-
- case NPP_ERROR:
- return "NPP_ERROR";
-
- case NPP_SUCCESS:
- return "NPP_SUCCESS";
-
- case NPP_WRONG_INTERSECTION_QUAD_WARNING:
- return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
-
- case NPP_MISALIGNED_DST_ROI_WARNING:
- return "NPP_MISALIGNED_DST_ROI_WARNING";
-
- case NPP_AFFINE_QUAD_INCORRECT_WARNING:
- return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
-
- case NPP_DOUBLE_SIZE_WARNING:
- return "NPP_DOUBLE_SIZE_WARNING";
-
- case NPP_WRONG_INTERSECTION_ROI_WARNING:
- return "NPP_WRONG_INTERSECTION_ROI_WARNING";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
- /* These are 6.0 or higher */
- case NPP_LUT_PALETTE_BITSIZE_ERROR:
- return "NPP_LUT_PALETTE_BITSIZE_ERROR";
-
- case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
- return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
-
- case NPP_QUALITY_INDEX_ERROR:
- return "NPP_QUALITY_INDEX_ERROR";
-
- case NPP_CHANNEL_ORDER_ERROR:
- return "NPP_CHANNEL_ORDER_ERROR";
-
- case NPP_ZERO_MASK_VALUE_ERROR:
- return "NPP_ZERO_MASK_VALUE_ERROR";
-
- case NPP_NUMBER_OF_CHANNELS_ERROR:
- return "NPP_NUMBER_OF_CHANNELS_ERROR";
-
- case NPP_COI_ERROR:
- return "NPP_COI_ERROR";
-
- case NPP_DIVISOR_ERROR:
- return "NPP_DIVISOR_ERROR";
-
- case NPP_CHANNEL_ERROR:
- return "NPP_CHANNEL_ERROR";
-
- case NPP_STRIDE_ERROR:
- return "NPP_STRIDE_ERROR";
-
- case NPP_ANCHOR_ERROR:
- return "NPP_ANCHOR_ERROR";
-
- case NPP_MASK_SIZE_ERROR:
- return "NPP_MASK_SIZE_ERROR";
-
- case NPP_MOMENT_00_ZERO_ERROR:
- return "NPP_MOMENT_00_ZERO_ERROR";
-
- case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
- return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
-
- case NPP_THRESHOLD_ERROR:
- return "NPP_THRESHOLD_ERROR";
-
- case NPP_CONTEXT_MATCH_ERROR:
- return "NPP_CONTEXT_MATCH_ERROR";
-
- case NPP_FFT_FLAG_ERROR:
- return "NPP_FFT_FLAG_ERROR";
-
- case NPP_FFT_ORDER_ERROR:
- return "NPP_FFT_ORDER_ERROR";
-
- case NPP_SCALE_RANGE_ERROR:
- return "NPP_SCALE_RANGE_ERROR";
-
- case NPP_DATA_TYPE_ERROR:
- return "NPP_DATA_TYPE_ERROR";
-
- case NPP_OUT_OFF_RANGE_ERROR:
- return "NPP_OUT_OFF_RANGE_ERROR";
-
- case NPP_DIVIDE_BY_ZERO_ERROR:
- return "NPP_DIVIDE_BY_ZERO_ERROR";
-
- case NPP_RANGE_ERROR:
- return "NPP_RANGE_ERROR";
-
- case NPP_NO_MEMORY_ERROR:
- return "NPP_NO_MEMORY_ERROR";
-
- case NPP_ERROR_RESERVED:
- return "NPP_ERROR_RESERVED";
-
- case NPP_NO_OPERATION_WARNING:
- return "NPP_NO_OPERATION_WARNING";
-
- case NPP_DIVIDE_BY_ZERO_WARNING:
- return "NPP_DIVIDE_BY_ZERO_WARNING";
-#endif
-
- }
-
- return "<unknown>";
-}
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-#ifndef DEVICE_RESET
-#define DEVICE_RESET cudaDeviceReset();
-#endif
-#else
-#ifndef DEVICE_RESET
-#define DEVICE_RESET
-#endif
-#endif
-
-template< typename T >
-void check(T result, char const *const func, const char *const file, int const line)
-{
- if (result)
- {
- fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
- file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
- DEVICE_RESET
- // Make sure we call CUDA Device Reset before exiting
- exit(EXIT_FAILURE);
- }
-}
-
-#ifdef __DRIVER_TYPES_H__
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
-
-// This will output the proper error string when calling cudaGetLastError
-#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
-
-inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
-{
- cudaError_t err = cudaGetLastError();
-
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
- file, line, errorMessage, (int)err, cudaGetErrorString(err));
- DEVICE_RESET
- exit(EXIT_FAILURE);
- }
-}
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (a > b ? a : b)
-#endif
-
-// Float To Int conversion
-inline int ftoi(float value)
-{
- return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
-}
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2Cores(int major, int minor)
-{
- // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
- typedef struct
- {
- int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
- int Cores;
- } sSMtoCores;
-
- sSMtoCores nGpuArchCoresPerSM[] =
- {
- { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
- { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
- { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
- { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
- { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
- { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
- { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
- { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
- { -1, -1 }
- };
-
- int index = 0;
-
- while (nGpuArchCoresPerSM[index].SM != -1)
- {
- if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
- {
- return nGpuArchCoresPerSM[index].Cores;
- }
-
- index++;
- }
-
- // If we don't find the values, we default use the previous one to run properly
- printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
- return nGpuArchCoresPerSM[index-1].Cores;
-}
-// end of GPU Architecture definitions
-
-#ifdef __CUDA_RUNTIME_H__
-// General GPU Device CUDA Initialization
-inline int gpuDeviceInit(int devID)
-{
- int device_count;
- checkCudaErrors(cudaGetDeviceCount(&device_count));
-
- if (device_count == 0)
- {
- fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
- exit(EXIT_FAILURE);
- }
-
- if (devID < 0)
- {
- devID = 0;
- }
-
- if (devID > device_count-1)
- {
- fprintf(stderr, "\n");
- fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
- fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
- fprintf(stderr, "\n");
- return -devID;
- }
-
- cudaDeviceProp deviceProp;
- checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
-
- if (deviceProp.computeMode == cudaComputeModeProhibited)
- {
- fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
- return -1;
- }
-
- if (deviceProp.major < 1)
- {
- fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
- exit(EXIT_FAILURE);
- }
-
- checkCudaErrors(cudaSetDevice(devID));
- printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
-
- return devID;
-}
-
-// This function returns the best GPU (with maximum GFLOPS)
-inline int gpuGetMaxGflopsDeviceId()
-{
- int current_device = 0, sm_per_multiproc = 0;
- int max_perf_device = 0;
- int device_count = 0, best_SM_arch = 0;
- int devices_prohibited = 0;
-
- unsigned long long max_compute_perf = 0;
- cudaDeviceProp deviceProp;
- cudaGetDeviceCount(&device_count);
-
- checkCudaErrors(cudaGetDeviceCount(&device_count));
-
- if (device_count == 0)
- {
- fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
- exit(EXIT_FAILURE);
- }
-
- // Find the best major SM Architecture GPU device
- while (current_device < device_count)
- {
- cudaGetDeviceProperties(&deviceProp, current_device);
-
- // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
- if (deviceProp.computeMode != cudaComputeModeProhibited)
- {
- if (deviceProp.major > 0 && deviceProp.major < 9999)
- {
- best_SM_arch = MAX(best_SM_arch, deviceProp.major);
- }
- }
- else
- {
- devices_prohibited++;
- }
-
- current_device++;
- }
-
- if (devices_prohibited == device_count)
- {
- fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
- exit(EXIT_FAILURE);
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
-
- while (current_device < device_count)
- {
- cudaGetDeviceProperties(&deviceProp, current_device);
-
- // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
- if (deviceProp.computeMode != cudaComputeModeProhibited)
- {
- if (deviceProp.major == 9999 && deviceProp.minor == 9999)
- {
- sm_per_multiproc = 1;
- }
- else
- {
- sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
- }
-
- unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
-
- if (compute_perf > max_compute_perf)
- {
- // If we find GPU with SM major > 2, search only these
- if (best_SM_arch > 2)
- {
- // If our device==dest_SM_arch, choose this, or else pass
- if (deviceProp.major == best_SM_arch)
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- else
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- }
-
- ++current_device;
- }
-
- return max_perf_device;
-}
-
-
-// Initialization code to find the best CUDA Device
-inline int findCudaDevice(int argc, const char **argv)
-{
- cudaDeviceProp deviceProp;
- int devID = 0;
-
- // If the command-line has a device number specified, use it
- if (checkCmdLineFlag(argc, argv, "device"))
- {
- devID = getCmdLineArgumentInt(argc, argv, "device=");
-
- if (devID < 0)
- {
- printf("Invalid command line parameter\n ");
- exit(EXIT_FAILURE);
- }
- else
- {
- devID = gpuDeviceInit(devID);
-
- if (devID < 0)
- {
- printf("exiting...\n");
- exit(EXIT_FAILURE);
- }
- }
- }
- else
- {
- // Otherwise pick the device with highest Gflops/s
- devID = gpuGetMaxGflopsDeviceId();
- checkCudaErrors(cudaSetDevice(devID));
- checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
- printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
- }
-
- return devID;
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool checkCudaCapabilities(int major_version, int minor_version)
-{
- cudaDeviceProp deviceProp;
- deviceProp.major = 0;
- deviceProp.minor = 0;
- int dev;
-
- checkCudaErrors(cudaGetDevice(&dev));
- checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
-
- if ((deviceProp.major > major_version) ||
- (deviceProp.major == major_version && deviceProp.minor >= minor_version))
- {
- printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
- return true;
- }
- else
- {
- printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
- return false;
- }
-}
-#endif
-
-// end of CUDA Helper Functions
-
-
-#endif
diff --git a/src/algorithms/libs/cudahelpers/helper_cuda_drvapi.h b/src/algorithms/libs/cudahelpers/helper_cuda_drvapi.h
deleted file mode 100644
index 8112ec9..0000000
--- a/src/algorithms/libs/cudahelpers/helper_cuda_drvapi.h
+++ /dev/null
@@ -1,517 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// Helper functions for CUDA Driver API error handling (make sure that CUDA_H is included in your projects)
-#ifndef HELPER_CUDA_DRVAPI_H
-#define HELPER_CUDA_DRVAPI_H
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <helper_string.h>
-#include <drvapi_error_string.h>
-
-#ifndef MAX
-#define MAX(a,b) (a > b ? a : b)
-#endif
-
-#ifndef HELPER_CUDA_H
-inline int ftoi(float value)
-{
- return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
-}
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA Helper functions
-
-// add a level of protection to the CUDA SDK samples, let's force samples to explicitly include CUDA.H
-#ifdef __cuda_cuda_h__
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#ifndef checkCudaErrors
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-// These are the inline versions for all of the SDK helper functions
-inline void __checkCudaErrors(CUresult err, const char *file, const int line)
-{
- if (CUDA_SUCCESS != err)
- {
- fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
- err, getCudaDrvErrorString(err), file, line);
- exit(EXIT_FAILURE);
- }
-}
-#endif
-
-#ifdef getLastCudaDrvErrorMsg
-#undef getLastCudaDrvErrorMsg
-#endif
-
-#define getLastCudaDrvErrorMsg(msg) __getLastCudaDrvErrorMsg (msg, __FILE__, __LINE__)
-
-inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file, const int line)
-{
- CUresult err = cuCtxSynchronize();
-
- if (CUDA_SUCCESS != err)
- {
- fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
- fprintf(stderr, "getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d \"%s\" in file <%s>, line %i.\n",
- err, getCudaDrvErrorString(err), file, line);
- exit(EXIT_FAILURE);
- }
-}
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
-{
- CUresult error_result = cuDeviceGetAttribute(attribute, device_attribute, device);
-
- if (error_result != CUDA_SUCCESS)
- {
- printf("cuDeviceGetAttribute returned %d\n-> %s\n", (int)error_result, getCudaDrvErrorString(error_result));
- exit(EXIT_SUCCESS);
- }
-}
-#endif
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2CoresDRV(int major, int minor)
-{
- // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
- typedef struct
- {
- int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
- int Cores;
- } sSMtoCores;
-
- sSMtoCores nGpuArchCoresPerSM[] =
- {
- { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
- { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
- { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
- { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
- { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
- { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
- { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
- { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
- { -1, -1 }
- };
-
- int index = 0;
-
- while (nGpuArchCoresPerSM[index].SM != -1)
- {
- if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
- {
- return nGpuArchCoresPerSM[index].Cores;
- }
-
- index++;
- }
-
- // If we don't find the values, we default use the previous one to run properly
- printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
- return nGpuArchCoresPerSM[index-1].Cores;
-}
-// end of GPU Architecture definitions
-
-#ifdef __cuda_cuda_h__
-// General GPU Device CUDA Initialization
-inline int gpuDeviceInitDRV(int ARGC, const char **ARGV)
-{
- int cuDevice = 0;
- int deviceCount = 0;
- CUresult err = cuInit(0);
-
- if (CUDA_SUCCESS == err)
- {
- checkCudaErrors(cuDeviceGetCount(&deviceCount));
- }
-
- if (deviceCount == 0)
- {
- fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
- exit(EXIT_FAILURE);
- }
-
- int dev = 0;
- dev = getCmdLineArgumentInt(ARGC, (const char **) ARGV, "device=");
-
- if (dev < 0)
- {
- dev = 0;
- }
-
- if (dev > deviceCount-1)
- {
- fprintf(stderr, "\n");
- fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
- fprintf(stderr, ">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
- fprintf(stderr, "\n");
- return -dev;
- }
-
- checkCudaErrors(cuDeviceGet(&cuDevice, dev));
- char name[100];
- cuDeviceGetName(name, 100, cuDevice);
-
- int computeMode;
- getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
-
- if (computeMode == CU_COMPUTEMODE_PROHIBITED)
- {
- fprintf(stderr, "Error: device is running in <CU_COMPUTEMODE_PROHIBITED>, no threads can use this CUDA Device.\n");
- return -1;
- }
-
- if (checkCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == false)
- {
- printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name);
- }
-
- return dev;
-}
-
-// This function returns the best GPU based on performance
-inline int gpuGetMaxGflopsDeviceIdDRV()
-{
- CUdevice current_device = 0;
- CUdevice max_perf_device = 0;
- int device_count = 0;
- int sm_per_multiproc = 0;
- unsigned long long max_compute_perf = 0;
- int best_SM_arch = 0;
- int major = 0;
- int minor = 0;
- int multiProcessorCount;
- int clockRate;
- int devices_prohibited = 0;
-
- cuInit(0);
- checkCudaErrors(cuDeviceGetCount(&device_count));
-
- if (device_count == 0)
- {
- fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n");
- exit(EXIT_FAILURE);
- }
-
- // Find the best major SM Architecture GPU device
- while (current_device < device_count)
- {
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
- if (major > 0 && major < 9999)
- {
- best_SM_arch = MAX(best_SM_arch, major);
- }
-
- current_device++;
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
-
- while (current_device < device_count)
- {
- checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- current_device));
- checkCudaErrors(cuDeviceGetAttribute(&clockRate,
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
- current_device));
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
- int computeMode;
- getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
- if (computeMode != CU_COMPUTEMODE_PROHIBITED)
- {
- if (major == 9999 && minor == 9999)
- {
- sm_per_multiproc = 1;
- }
- else
- {
- sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
- }
-
- unsigned long long compute_perf = (unsigned long long) (multiProcessorCount * sm_per_multiproc * clockRate);
-
- if (compute_perf > max_compute_perf)
- {
- // If we find GPU with SM major > 2, search only these
- if (best_SM_arch > 2)
- {
- // If our device==dest_SM_arch, choose this, or else pass
- if (major == best_SM_arch)
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- else
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- }
- else
- {
- devices_prohibited++;
- }
-
- ++current_device;
- }
-
- if (devices_prohibited == device_count)
- {
- fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: all devices have compute mode prohibited.\n");
- exit(EXIT_FAILURE);
- }
-
- return max_perf_device;
-}
-
-// This function returns the best Graphics GPU based on performance
-inline int gpuGetMaxGflopsGLDeviceIdDRV()
-{
- CUdevice current_device = 0, max_perf_device = 0;
- int device_count = 0, sm_per_multiproc = 0;
- int max_compute_perf = 0, best_SM_arch = 0;
- int major = 0, minor = 0, multiProcessorCount, clockRate;
- int bTCC = 0;
- int devices_prohibited = 0;
- char deviceName[256];
-
- cuInit(0);
- checkCudaErrors(cuDeviceGetCount(&device_count));
-
- if (device_count == 0)
- {
- fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: no devices supporting CUDA\n");
- exit(EXIT_FAILURE);
- }
-
- // Find the best major SM Architecture GPU device that are graphics devices
- while (current_device < device_count)
- {
- checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-#if CUDA_VERSION >= 3020
- checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
-#else
-
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceName[0] == 'T')
- {
- bTCC = 1;
- }
-
-#endif
-
- int computeMode;
- getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
- if (computeMode != CU_COMPUTEMODE_PROHIBITED)
- {
- if (!bTCC)
- {
- if (major > 0 && major < 9999)
- {
- best_SM_arch = MAX(best_SM_arch, major);
- }
- }
- }
- else
- {
- devices_prohibited++;
- }
-
- current_device++;
- }
-
- if (devices_prohibited == device_count)
- {
- fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: all devices have compute mode prohibited.\n");
- exit(EXIT_FAILURE);
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
-
- while (current_device < device_count)
- {
- checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- current_device));
- checkCudaErrors(cuDeviceGetAttribute(&clockRate,
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
- current_device));
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-#if CUDA_VERSION >= 3020
- checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
-#else
-
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceName[0] == 'T')
- {
- bTCC = 1;
- }
-
-#endif
-
- int computeMode;
- getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
- if (computeMode != CU_COMPUTEMODE_PROHIBITED)
- {
- if (major == 9999 && minor == 9999)
- {
- sm_per_multiproc = 1;
- }
- else
- {
- sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
- }
-
- // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor
- if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
- {
- int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
-
- if (compute_perf > max_compute_perf)
- {
- // If we find GPU with SM major > 2, search only these
- if (best_SM_arch > 2)
- {
- // If our device = dest_SM_arch, then we pick this one
- if (major == best_SM_arch)
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- else
- {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- }
- }
-
- ++current_device;
- }
-
- return max_perf_device;
-}
-
-// General initialization call to pick the best CUDA Device
-inline CUdevice findCudaDeviceDRV(int argc, const char **argv)
-{
- CUdevice cuDevice;
- int devID = 0;
-
- // If the command-line has a device number specified, use it
- if (checkCmdLineFlag(argc, (const char **)argv, "device"))
- {
- devID = gpuDeviceInitDRV(argc, argv);
-
- if (devID < 0)
- {
- printf("exiting...\n");
- exit(EXIT_SUCCESS);
- }
- }
- else
- {
- // Otherwise pick the device with highest Gflops/s
- char name[100];
- devID = gpuGetMaxGflopsDeviceIdDRV();
- checkCudaErrors(cuDeviceGet(&cuDevice, devID));
- cuDeviceGetName(name, 100, cuDevice);
- printf("> Using CUDA Device [%d]: %s\n", devID, name);
- }
-
- cuDeviceGet(&cuDevice, devID);
-
- return cuDevice;
-}
-
-// This function will pick the best CUDA device available with OpenGL interop
-inline CUdevice findCudaGLDeviceDRV(int argc, const char **argv)
-{
- CUdevice cuDevice;
- int devID = 0;
-
- // If the command-line has a device number specified, use it
- if (checkCmdLineFlag(argc, (const char **)argv, "device"))
- {
- devID = gpuDeviceInitDRV(argc, (const char **)argv);
-
- if (devID < 0)
- {
- printf("no CUDA capable devices found, exiting...\n");
- exit(EXIT_SUCCESS);
- }
- }
- else
- {
- char name[100];
- // Otherwise pick the device with highest Gflops/s
- devID = gpuGetMaxGflopsGLDeviceIdDRV();
- checkCudaErrors(cuDeviceGet(&cuDevice, devID));
- cuDeviceGetName(name, 100, cuDevice);
- printf("> Using CUDA/GL Device [%d]: %s\n", devID, name);
- }
-
- return devID;
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int devID)
-{
- CUdevice cuDevice;
- char name[256];
- int major = 0, minor = 0;
-
- checkCudaErrors(cuDeviceGet(&cuDevice, devID));
- checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
- checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
-
- if ((major > major_version) ||
- (major == major_version && minor >= minor_version))
- {
- printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name, major, minor);
- return true;
- }
- else
- {
- printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
- return false;
- }
-}
-#endif
-
-// end of CUDA Helper Functions
-
-#endif
diff --git a/src/algorithms/libs/cudahelpers/helper_cuda_gl.h b/src/algorithms/libs/cudahelpers/helper_cuda_gl.h
deleted file mode 100644
index 3d2d943..0000000
--- a/src/algorithms/libs/cudahelpers/helper_cuda_gl.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#ifndef HELPER_CUDA_GL_H
-#define HELPER_CUDA_GL_H
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-// includes, graphics
-#if defined (__APPLE__) || defined(MACOSX)
-#include <OpenGL/gl.h>
-#include <OpenGL/glu.h>
-#else
-#include <GL/gl.h>
-#include <GL/glu.h>
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-#ifndef DEVICE_RESET
-#define DEVICE_RESET cudaDeviceReset()
-#endif
-#else
-#ifndef DEVICE_RESET
-#define DEVICE_RESET
-#endif
-#endif
-
-#ifdef __CUDA_GL_INTEROP_H__
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA OpenGL Helper functions
-
-inline int gpuGLDeviceInit(int ARGC, const char **ARGV)
-{
- int deviceCount;
- checkCudaErrors(cudaGetDeviceCount(&deviceCount));
-
- if (deviceCount == 0)
- {
- fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
- exit(EXIT_FAILURE);
- }
-
- int dev = 0;
- dev = getCmdLineArgumentInt(ARGC, ARGV, "device=");
-
- if (dev < 0)
- {
- dev = 0;
- }
-
- if (dev > deviceCount-1)
- {
- fprintf(stderr, "\n");
- fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
- fprintf(stderr, ">> gpuGLDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
- fprintf(stderr, "\n");
- return -dev;
- }
-
- cudaDeviceProp deviceProp;
- checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
-
- if (deviceProp.computeMode == cudaComputeModeProhibited)
- {
- fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
- return -1;
- }
-
- if (deviceProp.major < 1)
- {
- fprintf(stderr, "Error: device does not support CUDA.\n");
- exit(EXIT_FAILURE);
- }
-
- if (checkCmdLineFlag(ARGC, ARGV, "quiet") == false)
- {
- fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name);
- }
-
- checkCudaErrors(cudaGLSetGLDevice(dev));
- return dev;
-}
-
-// This function will pick the best CUDA device available with OpenGL interop
-inline int findCudaGLDevice(int argc, const char **argv)
-{
- int devID = 0;
-
- // If the command-line has a device number specified, use it
- if (checkCmdLineFlag(argc, (const char **)argv, "device"))
- {
- devID = gpuGLDeviceInit(argc, (const char **)argv);
-
- if (devID < 0)
- {
- printf("no CUDA capable devices found, exiting...\n");
- DEVICE_RESET
- exit(EXIT_SUCCESS);
- }
- }
- else
- {
- // Otherwise pick the device with highest Gflops/s
- devID = gpuGetMaxGflopsDeviceId();
- cudaGLSetGLDevice(devID);
- }
-
- return devID;
-}
-
-////////////////////////////////////////////////////////////////////////////
-//! Check for OpenGL error
-//! @return bool if no GL error has been encountered, otherwise 0
-//! @param file __FILE__ macro
-//! @param line __LINE__ macro
-//! @note The GL error is listed on stderr
-//! @note This function should be used via the CHECK_ERROR_GL() macro
-////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkCheckErrorGL(const char *file, const int line)
-{
- bool ret_val = true;
-
- // check for error
- GLenum gl_error = glGetError();
-
- if (gl_error != GL_NO_ERROR)
- {
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
- char tmpStr[512];
- // NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
- // when the user double clicks on the error line in the Output pane. Like any compile error.
- sprintf_s(tmpStr, 255, "\n%s(%i) : GL Error : %s\n\n", file, line, gluErrorString(gl_error));
- fprintf(stderr, "%s", tmpStr);
-#endif
- fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
- fprintf(stderr, "%s\n", gluErrorString(gl_error));
- ret_val = false;
- }
-
- return ret_val;
-}
-
-#define SDK_CHECK_ERROR_GL() \
- if( false == sdkCheckErrorGL( __FILE__, __LINE__)) { \
- DEVICE_RESET \
- exit(EXIT_FAILURE); \
- }
-#endif
-
-#endif
diff --git a/src/algorithms/libs/cudahelpers/helper_functions.h b/src/algorithms/libs/cudahelpers/helper_functions.h
deleted file mode 100644
index 11538ba..0000000
--- a/src/algorithms/libs/cudahelpers/helper_functions.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, image helpers, etc)
-#ifndef HELPER_FUNCTIONS_H
-#define HELPER_FUNCTIONS_H
-
-#ifdef WIN32
-#pragma warning(disable:4996)
-#endif
-
-// includes, project
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <assert.h>
-#include <exception.h>
-#include <math.h>
-
-#include <fstream>
-#include <vector>
-#include <iostream>
-#include <algorithm>
-
-// includes, timer, string parsing, image helpers
-#include <helper_timer.h> // helper functions for timers
-#include <helper_string.h> // helper functions for string parsing
-#include <helper_image.h> // helper functions for image compare, dump, data comparisons
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#endif // HELPER_FUNCTIONS_H
diff --git a/src/algorithms/libs/cudahelpers/helper_image.h b/src/algorithms/libs/cudahelpers/helper_image.h
deleted file mode 100644
index 4e8b25c..0000000
--- a/src/algorithms/libs/cudahelpers/helper_image.h
+++ /dev/null
@@ -1,1110 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (image,bitmap)
-#ifndef HELPER_IMAGE_H
-#define HELPER_IMAGE_H
-
-#include <string>
-#include <fstream>
-#include <vector>
-#include <iostream>
-#include <algorithm>
-
-#include <assert.h>
-#include <exception.h>
-#include <math.h>
-
-#ifndef MIN
-#define MIN(a,b) ((a < b) ? a : b)
-#endif
-#ifndef MAX
-#define MAX(a,b) ((a > b) ? a : b)
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#include <helper_string.h>
-
-// namespace unnamed (internal)
-namespace
-{
- //! size of PGM file header
- const unsigned int PGMHeaderSize = 0x40;
-
- // types
-
- //! Data converter from unsigned char / unsigned byte to type T
- template<class T>
- struct ConverterFromUByte;
-
- //! Data converter from unsigned char / unsigned byte
- template<>
- struct ConverterFromUByte<unsigned char>
- {
- //! Conversion operator
- //! @return converted value
- //! @param val value to convert
- float operator()(const unsigned char &val)
- {
- return static_cast<unsigned char>(val);
- }
- };
-
- //! Data converter from unsigned char / unsigned byte to float
- template<>
- struct ConverterFromUByte<float>
- {
- //! Conversion operator
- //! @return converted value
- //! @param val value to convert
- float operator()(const unsigned char &val)
- {
- return static_cast<float>(val) / 255.0f;
- }
- };
-
- //! Data converter from unsigned char / unsigned byte to type T
- template<class T>
- struct ConverterToUByte;
-
- //! Data converter from unsigned char / unsigned byte to unsigned int
- template<>
- struct ConverterToUByte<unsigned char>
- {
- //! Conversion operator (essentially a passthru
- //! @return converted value
- //! @param val value to convert
- unsigned char operator()(const unsigned char &val)
- {
- return val;
- }
- };
-
- //! Data converter from unsigned char / unsigned byte to unsigned int
- template<>
- struct ConverterToUByte<float>
- {
- //! Conversion operator
- //! @return converted value
- //! @param val value to convert
- unsigned char operator()(const float &val)
- {
- return static_cast<unsigned char>(val * 255.0f);
- }
- };
-}
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result != 0)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf_s
-#endif
-#else
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result == NULL)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf
-#endif
-#endif
-
-inline bool
-__loadPPM(const char *file, unsigned char **data,
- unsigned int *w, unsigned int *h, unsigned int *channels)
-{
- FILE *fp = NULL;
-
- if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
- {
- std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
- return false;
- }
-
- // check header
- char header[PGMHeaderSize];
-
- if (fgets(header, PGMHeaderSize, fp) == NULL)
- {
- std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
- return false;
- }
-
- if (strncmp(header, "P5", 2) == 0)
- {
- *channels = 1;
- }
- else if (strncmp(header, "P6", 2) == 0)
- {
- *channels = 3;
- }
- else
- {
- std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
- *channels = 0;
- return false;
- }
-
- // parse header, read maxval, width and height
- unsigned int width = 0;
- unsigned int height = 0;
- unsigned int maxval = 0;
- unsigned int i = 0;
-
- while (i < 3)
- {
- if (fgets(header, PGMHeaderSize, fp) == NULL)
- {
- std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
- return false;
- }
-
- if (header[0] == '#')
- {
- continue;
- }
-
- if (i == 0)
- {
- i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
- }
- else if (i == 1)
- {
- i += SSCANF(header, "%u %u", &height, &maxval);
- }
- else if (i == 2)
- {
- i += SSCANF(header, "%u", &maxval);
- }
- }
-
- // check if given handle for the data is initialized
- if (NULL != *data)
- {
- if (*w != width || *h != height)
- {
- std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
- }
- }
- else
- {
- *data = (unsigned char *) malloc(sizeof(unsigned char) * width * height **channels);
- *w = width;
- *h = height;
- }
-
- // read and close file
- if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == 0)
- {
- std::cerr << "__LoadPPM() read data returned error." << std::endl;
- }
-
- fclose(fp);
-
- return true;
-}
-
-template <class T>
-inline bool
-sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
-{
- unsigned char *idata = NULL;
- unsigned int channels;
-
- if (true != __loadPPM(file, &idata, w, h, &channels))
- {
- return false;
- }
-
- unsigned int size = *w **h * channels;
-
- // initialize mem if necessary
- // the correct size is checked / set in loadPGMc()
- if (NULL == *data)
- {
- *data = (T *) malloc(sizeof(T) * size);
- }
-
- // copy and cast data
- std::transform(idata, idata + size, *data, ConverterFromUByte<T>());
-
- free(idata);
-
- return true;
-}
-
-template <class T>
-inline bool
-sdkLoadPPM4(const char *file, T **data,
- unsigned int *w,unsigned int *h)
-{
- unsigned char *idata = 0;
- unsigned int channels;
-
- if (__loadPPM(file, &idata, w, h, &channels))
- {
- // pad 4th component
- int size = *w **h;
- // keep the original pointer
- unsigned char *idata_orig = idata;
- *data = (T *) malloc(sizeof(T) * size * 4);
- unsigned char *ptr = *data;
-
- for (int i=0; i<size; i++)
- {
- *ptr++ = *idata++;
- *ptr++ = *idata++;
- *ptr++ = *idata++;
- *ptr++ = 0;
- }
-
- free(idata_orig);
- return true;
- }
- else
- {
- free(idata);
- return false;
- }
-}
-
-inline bool
-__savePPM(const char *file, unsigned char *data,
- unsigned int w, unsigned int h, unsigned int channels)
-{
- assert(NULL != data);
- assert(w > 0);
- assert(h > 0);
-
- std::fstream fh(file, std::fstream::out | std::fstream::binary);
-
- if (fh.bad())
- {
- std::cerr << "__savePPM() : Opening file failed." << std::endl;
- return false;
- }
-
- if (channels == 1)
- {
- fh << "P5\n";
- }
- else if (channels == 3)
- {
- fh << "P6\n";
- }
- else
- {
- std::cerr << "__savePPM() : Invalid number of channels." << std::endl;
- return false;
- }
-
- fh << w << "\n" << h << "\n" << 0xff << std::endl;
-
- for (unsigned int i = 0; (i < (w*h*channels)) && fh.good(); ++i)
- {
- fh << data[i];
- }
-
- fh.flush();
-
- if (fh.bad())
- {
- std::cerr << "__savePPM() : Writing data failed." << std::endl;
- return false;
- }
-
- fh.close();
-
- return true;
-}
-
-template<class T>
-inline bool
-sdkSavePGM(const char *file, T *data, unsigned int w, unsigned int h)
-{
- unsigned int size = w * h;
- unsigned char *idata =
- (unsigned char *) malloc(sizeof(unsigned char) * size);
-
- std::transform(data, data + size, idata, ConverterToUByte<T>());
-
- // write file
- bool result = __savePPM(file, idata, w, h, 1);
-
- // cleanup
- free(idata);
-
- return result;
-}
-
-inline bool
-sdkSavePPM4ub(const char *file, unsigned char *data,
- unsigned int w, unsigned int h)
-{
- // strip 4th component
- int size = w * h;
- unsigned char *ndata = (unsigned char *) malloc(sizeof(unsigned char) * size*3);
- unsigned char *ptr = ndata;
-
- for (int i=0; i<size; i++)
- {
- *ptr++ = *data++;
- *ptr++ = *data++;
- *ptr++ = *data++;
- data++;
- }
-
- bool result = __savePPM(file, ndata, w, h, 3);
- free(ndata);
- return result;
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-//! Read file \filename and return the data
-//! @return bool if reading the file succeeded, otherwise false
-//! @param filename name of the source file
-//! @param data uninitialized pointer, returned initialized and pointing to
-//! the data read
-//! @param len number of data elements in data, -1 on error
-//////////////////////////////////////////////////////////////////////////////
-template<class T>
-inline bool
-sdkReadFile(const char *filename, T **data, unsigned int *len, bool verbose)
-{
- // check input arguments
- assert(NULL != filename);
- assert(NULL != len);
-
- // intermediate storage for the data read
- std::vector<T> data_read;
-
- // open file for reading
- FILE *fh = NULL;
-
- // check if filestream is valid
- if (FOPEN_FAIL(FOPEN(fh, filename, "r")))
- {
- printf("Unable to open input file: %s\n", filename);
- return false;
- }
-
- // read all data elements
- T token;
-
- while (!feof(fh))
- {
- fscanf(fh, "%f", &token);
- data_read.push_back(token);
- }
-
- // the last element is read twice
- data_read.pop_back();
- fclose(fh);
-
- // check if the given handle is already initialized
- if (NULL != *data)
- {
- if (*len != data_read.size())
- {
- std::cerr << "sdkReadFile() : Initialized memory given but "
- << "size mismatch with signal read "
- << "(data read / data init = " << (unsigned int)data_read.size()
- << " / " << *len << ")" << std::endl;
-
- return false;
- }
- }
- else
- {
- // allocate storage for the data read
- *data = (T *) malloc(sizeof(T) * data_read.size());
- // store signal size
- *len = static_cast<unsigned int>(data_read.size());
- }
-
- // copy data
- memcpy(*data, &data_read.front(), sizeof(T) * data_read.size());
-
- return true;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Read file \filename and return the data
-//! @return bool if reading the file succeeded, otherwise false
-//! @param filename name of the source file
-//! @param data uninitialized pointer, returned initialized and pointing to
-//! the data read
-//! @param len number of data elements in data, -1 on error
-//////////////////////////////////////////////////////////////////////////////
-template<class T>
-inline bool
-sdkReadFileBlocks(const char *filename, T **data, unsigned int *len, unsigned int block_num, unsigned int block_size, bool verbose)
-{
- // check input arguments
- assert(NULL != filename);
- assert(NULL != len);
-
- // open file for reading
- FILE *fh = fopen(filename, "rb");
-
- if (fh == NULL && verbose)
- {
- std::cerr << "sdkReadFile() : Opening file failed." << std::endl;
- return false;
- }
-
- // check if the given handle is already initialized
- // allocate storage for the data read
- data[block_num] = (T *) malloc(block_size);
-
- // read all data elements
- fseek(fh, block_num * block_size, SEEK_SET);
- *len = fread(data[block_num], sizeof(T), block_size/sizeof(T), fh);
-
- fclose(fh);
-
- return true;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Write a data file \filename
-//! @return true if writing the file succeeded, otherwise false
-//! @param filename name of the source file
-//! @param data data to write
-//! @param len number of data elements in data, -1 on error
-//! @param epsilon epsilon for comparison
-//////////////////////////////////////////////////////////////////////////////
-template<class T, class S>
-inline bool
-sdkWriteFile(const char *filename, const T *data, unsigned int len,
- const S epsilon, bool verbose, bool append = false)
-{
- assert(NULL != filename);
- assert(NULL != data);
-
- // open file for writing
- // if (append) {
- std::fstream fh(filename, std::fstream::out | std::fstream::ate);
-
- if (verbose)
- {
- std::cerr << "sdkWriteFile() : Open file " << filename << " for write/append." << std::endl;
- }
-
- /* } else {
- std::fstream fh(filename, std::fstream::out);
- if (verbose) {
- std::cerr << "sdkWriteFile() : Open file " << filename << " for write." << std::endl;
- }
- }
- */
-
- // check if filestream is valid
- if (! fh.good())
- {
- if (verbose)
- {
- std::cerr << "sdkWriteFile() : Opening file failed." << std::endl;
- }
-
- return false;
- }
-
- // first write epsilon
- fh << "# " << epsilon << "\n";
-
- // write data
- for (unsigned int i = 0; (i < len) && (fh.good()); ++i)
- {
- fh << data[i] << ' ';
- }
-
- // Check if writing succeeded
- if (! fh.good())
- {
- if (verbose)
- {
- std::cerr << "sdkWriteFile() : Writing file failed." << std::endl;
- }
-
- return false;
- }
-
- // file ends with nl
- fh << std::endl;
-
- return true;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Compare two arrays of arbitrary type
-//! @return true if \a reference and \a data are identical, otherwise false
-//! @param reference timer_interface to the reference data / gold image
-//! @param data handle to the computed data
-//! @param len number of elements in reference and data
-//! @param epsilon epsilon to use for the comparison
-//////////////////////////////////////////////////////////////////////////////
-template<class T, class S>
-inline bool
-compareData(const T *reference, const T *data, const unsigned int len,
- const S epsilon, const float threshold)
-{
- assert(epsilon >= 0);
-
- bool result = true;
- unsigned int error_count = 0;
-
- for (unsigned int i = 0; i < len; ++i)
- {
- float diff = (float)reference[i] - (float)data[i];
- bool comp = (diff <= epsilon) && (diff >= -epsilon);
- result &= comp;
-
- error_count += !comp;
-
-#if 0
-
- if (! comp)
- {
- std::cerr << "ERROR, i = " << i << ",\t "
- << reference[i] << " / "
- << data[i]
- << " (reference / data)\n";
- }
-
-#endif
- }
-
- if (threshold == 0.0f)
- {
- return (result) ? true : false;
- }
- else
- {
- if (error_count)
- {
- printf("%4.2f(%%) of bytes mismatched (count=%d)\n", (float)error_count*100/(float)len, error_count);
- }
-
- return (len*threshold > error_count) ? true : false;
- }
-}
-
-#ifndef __MIN_EPSILON_ERROR
-#define __MIN_EPSILON_ERROR 1e-3f
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//! Compare two arrays of arbitrary type
-//! @return true if \a reference and \a data are identical, otherwise false
-//! @param reference handle to the reference data / gold image
-//! @param data handle to the computed data
-//! @param len number of elements in reference and data
-//! @param epsilon epsilon to use for the comparison
-//! @param epsilon threshold % of (# of bytes) for pass/fail
-//////////////////////////////////////////////////////////////////////////////
-template<class T, class S>
-inline bool
-compareDataAsFloatThreshold(const T *reference, const T *data, const unsigned int len,
- const S epsilon, const float threshold)
-{
- assert(epsilon >= 0);
-
- // If we set epsilon to be 0, let's set a minimum threshold
- float max_error = MAX((float)epsilon, __MIN_EPSILON_ERROR);
- int error_count = 0;
- bool result = true;
-
- for (unsigned int i = 0; i < len; ++i)
- {
- float diff = fabs((float)reference[i] - (float)data[i]);
- bool comp = (diff < max_error);
- result &= comp;
-
- if (! comp)
- {
- error_count++;
-#if 0
-
- if (error_count < 50)
- {
- printf("\n ERROR(epsilon=%4.3f), i=%d, (ref)0x%02x / (data)0x%02x / (diff)%d\n",
- max_error, i,
- *(unsigned int *)&reference[i],
- *(unsigned int *)&data[i],
- (unsigned int)diff);
- }
-
-#endif
- }
- }
-
- if (threshold == 0.0f)
- {
- if (error_count)
- {
- printf("total # of errors = %d\n", error_count);
- }
-
- return (error_count == 0) ? true : false;
- }
- else
- {
- if (error_count)
- {
- printf("%4.2f(%%) of bytes mismatched (count=%d)\n", (float)error_count*100/(float)len, error_count);
- }
-
- return ((len*threshold > error_count) ? true : false);
- }
-}
-
-inline
-void sdkDumpBin(void *data, unsigned int bytes, const char *filename)
-{
- printf("sdkDumpBin: <%s>\n", filename);
- FILE *fp;
- FOPEN(fp, filename, "wb");
- fwrite(data, bytes, 1, fp);
- fflush(fp);
- fclose(fp);
-}
-
-inline
-bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file, unsigned int nelements, const float epsilon, const float threshold, char *exec_path)
-{
- unsigned int *src_buffer, *ref_buffer;
- FILE *src_fp = NULL, *ref_fp = NULL;
-
- unsigned long error_count = 0;
- size_t fsize = 0;
-
- if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb")))
- {
- printf("compareBin2Bin <unsigned int> unable to open src_file: %s\n", src_file);
- error_count++;
- }
-
- char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
-
- if (ref_file_path == NULL)
- {
- printf("compareBin2Bin <unsigned int> unable to find <%s> in <%s>\n", ref_file, exec_path);
- printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", ref_file);
- printf("Aborting comparison!\n");
- printf(" FAILED\n");
- error_count++;
-
- if (src_fp)
- {
- fclose(src_fp);
- }
-
- if (ref_fp)
- {
- fclose(ref_fp);
- }
- }
- else
- {
- if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb")))
- {
- printf("compareBin2Bin <unsigned int> unable to open ref_file: %s\n", ref_file_path);
- error_count++;
- }
-
- if (src_fp && ref_fp)
- {
- src_buffer = (unsigned int *)malloc(nelements*sizeof(unsigned int));
- ref_buffer = (unsigned int *)malloc(nelements*sizeof(unsigned int));
-
- fsize = fread(src_buffer, nelements, sizeof(unsigned int), src_fp);
- fsize = fread(ref_buffer, nelements, sizeof(unsigned int), ref_fp);
-
- printf("> compareBin2Bin <unsigned int> nelements=%d, epsilon=%4.2f, threshold=%4.2f\n", nelements, epsilon, threshold);
- printf(" src_file <%s>, size=%d bytes\n", src_file, (int)fsize);
- printf(" ref_file <%s>, size=%d bytes\n", ref_file_path, (int)fsize);
-
- if (!compareData<unsigned int, float>(ref_buffer, src_buffer, nelements, epsilon, threshold))
- {
- error_count++;
- }
-
- fclose(src_fp);
- fclose(ref_fp);
-
- free(src_buffer);
- free(ref_buffer);
- }
- else
- {
- if (src_fp)
- {
- fclose(src_fp);
- }
-
- if (ref_fp)
- {
- fclose(ref_fp);
- }
- }
- }
-
- if (error_count == 0)
- {
- printf(" OK\n");
- }
- else
- {
- printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
- }
-
- return (error_count == 0); // returns true if all pixels pass
-}
-
-inline
-bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file, unsigned int nelements, const float epsilon, const float threshold, char *exec_path)
-{
- float *src_buffer, *ref_buffer;
- FILE *src_fp = NULL, *ref_fp = NULL;
- size_t fsize = 0;
-
- unsigned long error_count = 0;
-
- if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb")))
- {
- printf("compareBin2Bin <float> unable to open src_file: %s\n", src_file);
- error_count = 1;
- }
-
- char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
-
- if (ref_file_path == NULL)
- {
- printf("compareBin2Bin <float> unable to find <%s> in <%s>\n", ref_file, exec_path);
- printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", exec_path);
- printf("Aborting comparison!\n");
- printf(" FAILED\n");
- error_count++;
-
- if (src_fp)
- {
- fclose(src_fp);
- }
-
- if (ref_fp)
- {
- fclose(ref_fp);
- }
- }
- else
- {
- if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb")))
- {
- printf("compareBin2Bin <float> unable to open ref_file: %s\n", ref_file_path);
- error_count = 1;
- }
-
- if (src_fp && ref_fp)
- {
- src_buffer = (float *)malloc(nelements*sizeof(float));
- ref_buffer = (float *)malloc(nelements*sizeof(float));
-
- fsize = fread(src_buffer, nelements, sizeof(float), src_fp);
- fsize = fread(ref_buffer, nelements, sizeof(float), ref_fp);
-
- printf("> compareBin2Bin <float> nelements=%d, epsilon=%4.2f, threshold=%4.2f\n", nelements, epsilon, threshold);
- printf(" src_file <%s>, size=%d bytes\n", src_file, (int)fsize);
- printf(" ref_file <%s>, size=%d bytes\n", ref_file_path, (int)fsize);
-
- if (!compareDataAsFloatThreshold<float, float>(ref_buffer, src_buffer, nelements, epsilon, threshold))
- {
- error_count++;
- }
-
- fclose(src_fp);
- fclose(ref_fp);
-
- free(src_buffer);
- free(ref_buffer);
- }
- else
- {
- if (src_fp)
- {
- fclose(src_fp);
- }
-
- if (ref_fp)
- {
- fclose(ref_fp);
- }
- }
- }
-
- if (error_count == 0)
- {
- printf(" OK\n");
- }
- else
- {
- printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
- }
-
- return (error_count == 0); // returns true if all pixels pass
-}
-
-inline bool
-sdkCompareL2fe(const float *reference, const float *data,
- const unsigned int len, const float epsilon)
-{
- assert(epsilon >= 0);
-
- float error = 0;
- float ref = 0;
-
- for (unsigned int i = 0; i < len; ++i)
- {
-
- float diff = reference[i] - data[i];
- error += diff * diff;
- ref += reference[i] * reference[i];
- }
-
- float normRef = sqrtf(ref);
-
- if (fabs(ref) < 1e-7)
- {
-#ifdef _DEBUG
- std::cerr << "ERROR, reference l2-norm is 0\n";
-#endif
- return false;
- }
-
- float normError = sqrtf(error);
- error = normError / normRef;
- bool result = error < epsilon;
-#ifdef _DEBUG
-
- if (! result)
- {
- std::cerr << "ERROR, l2-norm error "
- << error << " is greater than epsilon " << epsilon << "\n";
- }
-
-#endif
-
- return result;
-}
-
-inline bool
-sdkLoadPPMub(const char *file, unsigned char **data,
- unsigned int *w,unsigned int *h)
-{
- unsigned int channels;
- return __loadPPM(file, data, w, h, &channels);
-}
-
-inline bool
-sdkLoadPPM4ub(const char *file, unsigned char **data,
- unsigned int *w, unsigned int *h)
-{
- unsigned char *idata = 0;
- unsigned int channels;
-
- if (__loadPPM(file, &idata, w, h, &channels))
- {
- // pad 4th component
- int size = *w **h;
- // keep the original pointer
- unsigned char *idata_orig = idata;
- *data = (unsigned char *) malloc(sizeof(unsigned char) * size * 4);
- unsigned char *ptr = *data;
-
- for (int i=0; i<size; i++)
- {
- *ptr++ = *idata++;
- *ptr++ = *idata++;
- *ptr++ = *idata++;
- *ptr++ = 0;
- }
-
- free(idata_orig);
- return true;
- }
- else
- {
- free(idata);
- return false;
- }
-}
-
-
-inline bool
-sdkComparePPM(const char *src_file, const char *ref_file,
- const float epsilon, const float threshold, bool verboseErrors)
-{
- unsigned char *src_data, *ref_data;
- unsigned long error_count = 0;
- unsigned int ref_width, ref_height;
- unsigned int src_width, src_height;
-
- if (src_file == NULL || ref_file == NULL)
- {
- if (verboseErrors)
- {
- std::cerr << "PPMvsPPM: src_file or ref_file is NULL. Aborting comparison\n";
- }
-
- return false;
- }
-
- if (verboseErrors)
- {
- std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
- std::cerr << "> (b)reference: <" << ref_file << ">\n";
- }
-
-
- if (sdkLoadPPM4ub(ref_file, &ref_data, &ref_width, &ref_height) != true)
- {
- if (verboseErrors)
- {
- std::cerr << "PPMvsPPM: unable to load ref image file: "<< ref_file << "\n";
- }
-
- return false;
- }
-
- if (sdkLoadPPM4ub(src_file, &src_data, &src_width, &src_height) != true)
- {
- std::cerr << "PPMvsPPM: unable to load src image file: " << src_file << "\n";
- return false;
- }
-
- if (src_height != ref_height || src_width != ref_width)
- {
- if (verboseErrors) std::cerr << "PPMvsPPM: source and ref size mismatch (" << src_width <<
- "," << src_height << ")vs(" << ref_width << "," << ref_height << ")\n";
- }
-
- if (verboseErrors) std::cerr << "PPMvsPPM: comparing images size (" << src_width <<
- "," << src_height << ") epsilon(" << epsilon << "), threshold(" << threshold*100 << "%)\n";
-
- if (compareData(ref_data, src_data, src_width*src_height*4, epsilon, threshold) == false)
- {
- error_count=1;
- }
-
- if (error_count == 0)
- {
- if (verboseErrors)
- {
- std::cerr << " OK\n\n";
- }
- }
- else
- {
- if (verboseErrors)
- {
- std::cerr << " FAILURE! "<<error_count<<" errors...\n\n";
- }
- }
-
- return (error_count == 0)? true : false; // returns true if all pixels pass
-}
-
-inline bool
-sdkComparePGM(const char *src_file, const char *ref_file,
- const float epsilon, const float threshold, bool verboseErrors)
-{
- unsigned char *src_data = 0, *ref_data = 0;
- unsigned long error_count = 0;
- unsigned int ref_width, ref_height;
- unsigned int src_width, src_height;
-
- if (src_file == NULL || ref_file == NULL)
- {
- if (verboseErrors)
- {
- std::cerr << "PGMvsPGM: src_file or ref_file is NULL. Aborting comparison\n";
- }
-
- return false;
- }
-
- if (verboseErrors)
- {
- std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
- std::cerr << "> (b)reference: <" << ref_file << ">\n";
- }
-
-
- if (sdkLoadPPMub(ref_file, &ref_data, &ref_width, &ref_height) != true)
- {
- if (verboseErrors)
- {
- std::cerr << "PGMvsPGM: unable to load ref image file: "<< ref_file << "\n";
- }
-
- return false;
- }
-
- if (sdkLoadPPMub(src_file, &src_data, &src_width, &src_height) != true)
- {
- std::cerr << "PGMvsPGM: unable to load src image file: " << src_file << "\n";
- return false;
- }
-
- if (src_height != ref_height || src_width != ref_width)
- {
- if (verboseErrors) std::cerr << "PGMvsPGM: source and ref size mismatch (" << src_width <<
- "," << src_height << ")vs(" << ref_width << "," << ref_height << ")\n";
- }
-
- if (verboseErrors) std::cerr << "PGMvsPGM: comparing images size (" << src_width <<
- "," << src_height << ") epsilon(" << epsilon << "), threshold(" << threshold*100 << "%)\n";
-
- if (compareData(ref_data, src_data, src_width*src_height, epsilon, threshold) == false)
- {
- error_count=1;
- }
-
- if (error_count == 0)
- {
- if (verboseErrors)
- {
- std::cerr << " OK\n\n";
- }
- }
- else
- {
- if (verboseErrors)
- {
- std::cerr << " FAILURE! "<<error_count<<" errors...\n\n";
- }
- }
-
- return (error_count == 0)? true : false; // returns true if all pixels pass
-}
-
-#endif // HELPER_IMAGE_H
diff --git a/src/algorithms/libs/cudahelpers/helper_math.h b/src/algorithms/libs/cudahelpers/helper_math.h
deleted file mode 100644
index c9c07c3..0000000
--- a/src/algorithms/libs/cudahelpers/helper_math.h
+++ /dev/null
@@ -1,1453 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/*
- * This file implements common mathematical operations on vector types
- * (float3, float4 etc.) since these are not provided as standard by CUDA.
- *
- * The syntax is modeled on the Cg standard library.
- *
- * This is part of the Helper library includes
- *
- * Thanks to Linh Hah for additions and fixes.
- */
-
-#ifndef HELPER_MATH_H
-#define HELPER_MATH_H
-
-#include "cuda_runtime.h"
-
-typedef unsigned int uint;
-typedef unsigned short ushort;
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#ifndef __CUDACC__
-#include <math.h>
-
-////////////////////////////////////////////////////////////////////////////////
-// host implementations of CUDA functions
-////////////////////////////////////////////////////////////////////////////////
-
-inline float fminf(float a, float b)
-{
- return a < b ? a : b;
-}
-
-inline float fmaxf(float a, float b)
-{
- return a > b ? a : b;
-}
-
-inline int max(int a, int b)
-{
- return a > b ? a : b;
-}
-
-inline int min(int a, int b)
-{
- return a < b ? a : b;
-}
-
-inline float rsqrtf(float x)
-{
- return 1.0f / sqrtf(x);
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// constructors
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 make_float2(float s)
-{
- return make_float2(s, s);
-}
-inline __host__ __device__ float2 make_float2(float3 a)
-{
- return make_float2(a.x, a.y);
-}
-inline __host__ __device__ float2 make_float2(int2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-inline __host__ __device__ float2 make_float2(uint2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-
-inline __host__ __device__ int2 make_int2(int s)
-{
- return make_int2(s, s);
-}
-inline __host__ __device__ int2 make_int2(int3 a)
-{
- return make_int2(a.x, a.y);
-}
-inline __host__ __device__ int2 make_int2(uint2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-inline __host__ __device__ int2 make_int2(float2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-
-inline __host__ __device__ uint2 make_uint2(uint s)
-{
- return make_uint2(s, s);
-}
-inline __host__ __device__ uint2 make_uint2(uint3 a)
-{
- return make_uint2(a.x, a.y);
-}
-inline __host__ __device__ uint2 make_uint2(int2 a)
-{
- return make_uint2(uint(a.x), uint(a.y));
-}
-
-inline __host__ __device__ float3 make_float3(float s)
-{
- return make_float3(s, s, s);
-}
-inline __host__ __device__ float3 make_float3(float2 a)
-{
- return make_float3(a.x, a.y, 0.0f);
-}
-inline __host__ __device__ float3 make_float3(float2 a, float s)
-{
- return make_float3(a.x, a.y, s);
-}
-inline __host__ __device__ float3 make_float3(float4 a)
-{
- return make_float3(a.x, a.y, a.z);
-}
-inline __host__ __device__ float3 make_float3(int3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-inline __host__ __device__ float3 make_float3(uint3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-
-inline __host__ __device__ int3 make_int3(int s)
-{
- return make_int3(s, s, s);
-}
-inline __host__ __device__ int3 make_int3(int2 a)
-{
- return make_int3(a.x, a.y, 0);
-}
-inline __host__ __device__ int3 make_int3(int2 a, int s)
-{
- return make_int3(a.x, a.y, s);
-}
-inline __host__ __device__ int3 make_int3(uint3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-inline __host__ __device__ int3 make_int3(float3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-
-inline __host__ __device__ uint3 make_uint3(uint s)
-{
- return make_uint3(s, s, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a)
-{
- return make_uint3(a.x, a.y, 0);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
-{
- return make_uint3(a.x, a.y, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint4 a)
-{
- return make_uint3(a.x, a.y, a.z);
-}
-inline __host__ __device__ uint3 make_uint3(int3 a)
-{
- return make_uint3(uint(a.x), uint(a.y), uint(a.z));
-}
-
-inline __host__ __device__ float4 make_float4(float s)
-{
- return make_float4(s, s, s, s);
-}
-inline __host__ __device__ float4 make_float4(float3 a)
-{
- return make_float4(a.x, a.y, a.z, 0.0f);
-}
-inline __host__ __device__ float4 make_float4(float3 a, float w)
-{
- return make_float4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ float4 make_float4(int4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-inline __host__ __device__ float4 make_float4(uint4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-
-inline __host__ __device__ int4 make_int4(int s)
-{
- return make_int4(s, s, s, s);
-}
-inline __host__ __device__ int4 make_int4(int3 a)
-{
- return make_int4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ int4 make_int4(int3 a, int w)
-{
- return make_int4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ int4 make_int4(uint4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-inline __host__ __device__ int4 make_int4(float4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-
-
-inline __host__ __device__ uint4 make_uint4(uint s)
-{
- return make_uint4(s, s, s, s);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a)
-{
- return make_uint4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
-{
- return make_uint4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ uint4 make_uint4(int4 a)
-{
- return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// negate
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 &a)
-{
- return make_float2(-a.x, -a.y);
-}
-inline __host__ __device__ int2 operator-(int2 &a)
-{
- return make_int2(-a.x, -a.y);
-}
-inline __host__ __device__ float3 operator-(float3 &a)
-{
- return make_float3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ int3 operator-(int3 &a)
-{
- return make_int3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ float4 operator-(float4 &a)
-{
- return make_float4(-a.x, -a.y, -a.z, -a.w);
-}
-inline __host__ __device__ int4 operator-(int4 &a)
-{
- return make_int4(-a.x, -a.y, -a.z, -a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// addition
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator+(float2 a, float2 b)
-{
- return make_float2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(float2 &a, float2 b)
-{
- a.x += b.x;
- a.y += b.y;
-}
-inline __host__ __device__ float2 operator+(float2 a, float b)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ float2 operator+(float b, float2 a)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(float2 &a, float b)
-{
- a.x += b;
- a.y += b;
-}
-
-inline __host__ __device__ int2 operator+(int2 a, int2 b)
-{
- return make_int2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(int2 &a, int2 b)
-{
- a.x += b.x;
- a.y += b.y;
-}
-inline __host__ __device__ int2 operator+(int2 a, int b)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ int2 operator+(int b, int2 a)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(int2 &a, int b)
-{
- a.x += b;
- a.y += b;
-}
-
-inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
-{
- return make_uint2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
-{
- a.x += b.x;
- a.y += b.y;
-}
-inline __host__ __device__ uint2 operator+(uint2 a, uint b)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ uint2 operator+(uint b, uint2 a)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint b)
-{
- a.x += b;
- a.y += b;
-}
-
-
-inline __host__ __device__ float3 operator+(float3 a, float3 b)
-{
- return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(float3 &a, float3 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
-}
-inline __host__ __device__ float3 operator+(float3 a, float b)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(float3 &a, float b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int3 a, int3 b)
-{
- return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(int3 &a, int3 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
-}
-inline __host__ __device__ int3 operator+(int3 a, int b)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(int3 &a, int b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
-}
-
-inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
-{
- return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
-}
-inline __host__ __device__ uint3 operator+(uint3 a, uint b)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int b, int3 a)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ uint3 operator+(uint b, uint3 a)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ float3 operator+(float b, float3 a)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-
-inline __host__ __device__ float4 operator+(float4 a, float4 b)
-{
- return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(float4 &a, float4 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
- a.w += b.w;
-}
-inline __host__ __device__ float4 operator+(float4 a, float b)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ float4 operator+(float b, float4 a)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(float4 &a, float b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
- a.w += b;
-}
-
-inline __host__ __device__ int4 operator+(int4 a, int4 b)
-{
- return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(int4 &a, int4 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
- a.w += b.w;
-}
-inline __host__ __device__ int4 operator+(int4 a, int b)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ int4 operator+(int b, int4 a)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(int4 &a, int b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
- a.w += b;
-}
-
-inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
-{
- return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
-{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
- a.w += b.w;
-}
-inline __host__ __device__ uint4 operator+(uint4 a, uint b)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ uint4 operator+(uint b, uint4 a)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint b)
-{
- a.x += b;
- a.y += b;
- a.z += b;
- a.w += b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// subtract
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 a, float2 b)
-{
- return make_float2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float2 b)
-{
- a.x -= b.x;
- a.y -= b.y;
-}
-inline __host__ __device__ float2 operator-(float2 a, float b)
-{
- return make_float2(a.x - b, a.y - b);
-}
-inline __host__ __device__ float2 operator-(float b, float2 a)
-{
- return make_float2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float b)
-{
- a.x -= b;
- a.y -= b;
-}
-
-inline __host__ __device__ int2 operator-(int2 a, int2 b)
-{
- return make_int2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int2 b)
-{
- a.x -= b.x;
- a.y -= b.y;
-}
-inline __host__ __device__ int2 operator-(int2 a, int b)
-{
- return make_int2(a.x - b, a.y - b);
-}
-inline __host__ __device__ int2 operator-(int b, int2 a)
-{
- return make_int2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int b)
-{
- a.x -= b;
- a.y -= b;
-}
-
-inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
-{
- return make_uint2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
-{
- a.x -= b.x;
- a.y -= b.y;
-}
-inline __host__ __device__ uint2 operator-(uint2 a, uint b)
-{
- return make_uint2(a.x - b, a.y - b);
-}
-inline __host__ __device__ uint2 operator-(uint b, uint2 a)
-{
- return make_uint2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint b)
-{
- a.x -= b;
- a.y -= b;
-}
-
-inline __host__ __device__ float3 operator-(float3 a, float3 b)
-{
- return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float3 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
-}
-inline __host__ __device__ float3 operator-(float3 a, float b)
-{
- return make_float3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ float3 operator-(float b, float3 a)
-{
- return make_float3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
-}
-
-inline __host__ __device__ int3 operator-(int3 a, int3 b)
-{
- return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int3 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
-}
-inline __host__ __device__ int3 operator-(int3 a, int b)
-{
- return make_int3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ int3 operator-(int b, int3 a)
-{
- return make_int3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
-}
-
-inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
-{
- return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
-}
-inline __host__ __device__ uint3 operator-(uint3 a, uint b)
-{
- return make_uint3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ uint3 operator-(uint b, uint3 a)
-{
- return make_uint3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
-}
-
-inline __host__ __device__ float4 operator-(float4 a, float4 b)
-{
- return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(float4 &a, float4 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
- a.w -= b.w;
-}
-inline __host__ __device__ float4 operator-(float4 a, float b)
-{
- return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ void operator-=(float4 &a, float b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
- a.w -= b;
-}
-
-inline __host__ __device__ int4 operator-(int4 a, int4 b)
-{
- return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int4 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
- a.w -= b.w;
-}
-inline __host__ __device__ int4 operator-(int4 a, int b)
-{
- return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ int4 operator-(int b, int4 a)
-{
- return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
- a.w -= b;
-}
-
-inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
-{
- return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
-{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
- a.w -= b.w;
-}
-inline __host__ __device__ uint4 operator-(uint4 a, uint b)
-{
- return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ uint4 operator-(uint b, uint4 a)
-{
- return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint b)
-{
- a.x -= b;
- a.y -= b;
- a.z -= b;
- a.w -= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// multiply
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator*(float2 a, float2 b)
-{
- return make_float2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float2 b)
-{
- a.x *= b.x;
- a.y *= b.y;
-}
-inline __host__ __device__ float2 operator*(float2 a, float b)
-{
- return make_float2(a.x * b, a.y * b);
-}
-inline __host__ __device__ float2 operator*(float b, float2 a)
-{
- return make_float2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float b)
-{
- a.x *= b;
- a.y *= b;
-}
-
-inline __host__ __device__ int2 operator*(int2 a, int2 b)
-{
- return make_int2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int2 b)
-{
- a.x *= b.x;
- a.y *= b.y;
-}
-inline __host__ __device__ int2 operator*(int2 a, int b)
-{
- return make_int2(a.x * b, a.y * b);
-}
-inline __host__ __device__ int2 operator*(int b, int2 a)
-{
- return make_int2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int b)
-{
- a.x *= b;
- a.y *= b;
-}
-
-inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
-{
- return make_uint2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
-{
- a.x *= b.x;
- a.y *= b.y;
-}
-inline __host__ __device__ uint2 operator*(uint2 a, uint b)
-{
- return make_uint2(a.x * b, a.y * b);
-}
-inline __host__ __device__ uint2 operator*(uint b, uint2 a)
-{
- return make_uint2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint b)
-{
- a.x *= b;
- a.y *= b;
-}
-
-inline __host__ __device__ float3 operator*(float3 a, float3 b)
-{
- return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float3 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
-}
-inline __host__ __device__ float3 operator*(float3 a, float b)
-{
- return make_float3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ float3 operator*(float b, float3 a)
-{
- return make_float3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
-}
-
-inline __host__ __device__ int3 operator*(int3 a, int3 b)
-{
- return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int3 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
-}
-inline __host__ __device__ int3 operator*(int3 a, int b)
-{
- return make_int3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ int3 operator*(int b, int3 a)
-{
- return make_int3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
-}
-
-inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
-{
- return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
-}
-inline __host__ __device__ uint3 operator*(uint3 a, uint b)
-{
- return make_uint3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ uint3 operator*(uint b, uint3 a)
-{
- return make_uint3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
-}
-
-inline __host__ __device__ float4 operator*(float4 a, float4 b)
-{
- return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float4 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
- a.w *= b.w;
-}
-inline __host__ __device__ float4 operator*(float4 a, float b)
-{
- return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ float4 operator*(float b, float4 a)
-{
- return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
- a.w *= b;
-}
-
-inline __host__ __device__ int4 operator*(int4 a, int4 b)
-{
- return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int4 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
- a.w *= b.w;
-}
-inline __host__ __device__ int4 operator*(int4 a, int b)
-{
- return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ int4 operator*(int b, int4 a)
-{
- return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
- a.w *= b;
-}
-
-inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
-{
- return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
-{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
- a.w *= b.w;
-}
-inline __host__ __device__ uint4 operator*(uint4 a, uint b)
-{
- return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ uint4 operator*(uint b, uint4 a)
-{
- return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint b)
-{
- a.x *= b;
- a.y *= b;
- a.z *= b;
- a.w *= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// divide
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator/(float2 a, float2 b)
-{
- return make_float2(a.x / b.x, a.y / b.y);
-}
-inline __host__ __device__ void operator/=(float2 &a, float2 b)
-{
- a.x /= b.x;
- a.y /= b.y;
-}
-inline __host__ __device__ float2 operator/(float2 a, float b)
-{
- return make_float2(a.x / b, a.y / b);
-}
-inline __host__ __device__ void operator/=(float2 &a, float b)
-{
- a.x /= b;
- a.y /= b;
-}
-inline __host__ __device__ float2 operator/(float b, float2 a)
-{
- return make_float2(b / a.x, b / a.y);
-}
-
-inline __host__ __device__ float3 operator/(float3 a, float3 b)
-{
- return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-}
-inline __host__ __device__ void operator/=(float3 &a, float3 b)
-{
- a.x /= b.x;
- a.y /= b.y;
- a.z /= b.z;
-}
-inline __host__ __device__ float3 operator/(float3 a, float b)
-{
- return make_float3(a.x / b, a.y / b, a.z / b);
-}
-inline __host__ __device__ void operator/=(float3 &a, float b)
-{
- a.x /= b;
- a.y /= b;
- a.z /= b;
-}
-inline __host__ __device__ float3 operator/(float b, float3 a)
-{
- return make_float3(b / a.x, b / a.y, b / a.z);
-}
-
-inline __host__ __device__ float4 operator/(float4 a, float4 b)
-{
- return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-}
-inline __host__ __device__ void operator/=(float4 &a, float4 b)
-{
- a.x /= b.x;
- a.y /= b.y;
- a.z /= b.z;
- a.w /= b.w;
-}
-inline __host__ __device__ float4 operator/(float4 a, float b)
-{
- return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
-}
-inline __host__ __device__ void operator/=(float4 &a, float b)
-{
- a.x /= b;
- a.y /= b;
- a.z /= b;
- a.w /= b;
-}
-inline __host__ __device__ float4 operator/(float b, float4 a)
-{
- return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// min
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fminf(float2 a, float2 b)
-{
- return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
-}
-inline __host__ __device__ float3 fminf(float3 a, float3 b)
-{
- return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
-}
-inline __host__ __device__ float4 fminf(float4 a, float4 b)
-{
- return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 min(int2 a, int2 b)
-{
- return make_int2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ int3 min(int3 a, int3 b)
-{
- return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ int4 min(int4 a, int4 b)
-{
- return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 min(uint2 a, uint2 b)
-{
- return make_uint2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ uint3 min(uint3 a, uint3 b)
-{
- return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ uint4 min(uint4 a, uint4 b)
-{
- return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// max
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
-{
- return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
-}
-inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
-{
- return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
-}
-inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
-{
- return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 max(int2 a, int2 b)
-{
- return make_int2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ int3 max(int3 a, int3 b)
-{
- return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ int4 max(int4 a, int4 b)
-{
- return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 max(uint2 a, uint2 b)
-{
- return make_uint2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ uint3 max(uint3 a, uint3 b)
-{
- return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ uint4 max(uint4 a, uint4 b)
-{
- return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// lerp
-// - linear interpolation between a and b, based on value t in [0, 1] range
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float lerp(float a, float b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
-{
- return a + t*(b-a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// clamp
-// - clamp the value v to be in the range [a, b]
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float clamp(float f, float a, float b)
-{
- return fmaxf(a, fminf(f, b));
-}
-inline __device__ __host__ int clamp(int f, int a, int b)
-{
- return max(a, min(f, b));
-}
-inline __device__ __host__ uint clamp(uint f, uint a, uint b)
-{
- return max(a, min(f, b));
-}
-
-inline __device__ __host__ float2 clamp(float2 v, float a, float b)
-{
- return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
-{
- return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ float3 clamp(float3 v, float a, float b)
-{
- return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
-{
- return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ float4 clamp(float4 v, float a, float b)
-{
- return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
-{
- return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ int2 clamp(int2 v, int a, int b)
-{
- return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
-{
- return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ int3 clamp(int3 v, int a, int b)
-{
- return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
-{
- return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ int4 clamp(int4 v, int a, int b)
-{
- return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
-{
- return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
-{
- return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
-{
- return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
-{
- return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
-{
- return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
-{
- return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
-{
- return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// dot product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float dot(float2 a, float2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ float dot(float3 a, float3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ float dot(float4 a, float4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ int dot(int2 a, int2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ int dot(int3 a, int3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ int dot(int4 a, int4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ uint dot(uint2 a, uint2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ uint dot(uint3 a, uint3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ uint dot(uint4 a, uint4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// length
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float length(float2 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float3 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float4 v)
-{
- return sqrtf(dot(v, v));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// normalize
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 normalize(float2 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float3 normalize(float3 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float4 normalize(float4 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// floor
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 floorf(float2 v)
-{
- return make_float2(floorf(v.x), floorf(v.y));
-}
-inline __host__ __device__ float3 floorf(float3 v)
-{
- return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
-}
-inline __host__ __device__ float4 floorf(float4 v)
-{
- return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// frac - returns the fractional portion of a scalar or each vector component
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float fracf(float v)
-{
- return v - floorf(v);
-}
-inline __host__ __device__ float2 fracf(float2 v)
-{
- return make_float2(fracf(v.x), fracf(v.y));
-}
-inline __host__ __device__ float3 fracf(float3 v)
-{
- return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
-}
-inline __host__ __device__ float4 fracf(float4 v)
-{
- return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// fmod
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmodf(float2 a, float2 b)
-{
- return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
-}
-inline __host__ __device__ float3 fmodf(float3 a, float3 b)
-{
- return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
-}
-inline __host__ __device__ float4 fmodf(float4 a, float4 b)
-{
- return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// absolute value
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fabs(float2 v)
-{
- return make_float2(fabs(v.x), fabs(v.y));
-}
-inline __host__ __device__ float3 fabs(float3 v)
-{
- return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
-}
-inline __host__ __device__ float4 fabs(float4 v)
-{
- return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
-}
-
-inline __host__ __device__ int2 abs(int2 v)
-{
- return make_int2(abs(v.x), abs(v.y));
-}
-inline __host__ __device__ int3 abs(int3 v)
-{
- return make_int3(abs(v.x), abs(v.y), abs(v.z));
-}
-inline __host__ __device__ int4 abs(int4 v)
-{
- return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// reflect
-// - returns reflection of incident ray I around surface normal N
-// - N should be normalized, reflected vector's length is equal to length of I
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 reflect(float3 i, float3 n)
-{
- return i - 2.0f * n * dot(n,i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// cross product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 cross(float3 a, float3 b)
-{
- return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// smoothstep
-// - returns 0 if x < a
-// - returns 1 if x > b
-// - otherwise returns smooth interpolation between 0 and 1 based on x
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float smoothstep(float a, float b, float x)
-{
- float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(3.0f - (2.0f*y)));
-}
-inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
-{
- float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
-}
-inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
-{
- float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
-}
-inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
-{
- float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
-}
-
-#endif
diff --git a/src/algorithms/libs/cudahelpers/helper_string.h b/src/algorithms/libs/cudahelpers/helper_string.h
deleted file mode 100644
index c734314..0000000
--- a/src/algorithms/libs/cudahelpers/helper_string.h
+++ /dev/null
@@ -1,516 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, etc)
-#ifndef STRING_HELPER_H
-#define STRING_HELPER_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <string>
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#ifndef _CRT_SECURE_NO_DEPRECATE
-#define _CRT_SECURE_NO_DEPRECATE
-#endif
-#ifndef STRCASECMP
-#define STRCASECMP _stricmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP _strnicmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result != 0)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf_s
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf_s
-#endif
-#else // Linux Includes
-#include <string.h>
-#include <strings.h>
-
-#ifndef STRCASECMP
-#define STRCASECMP strcasecmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP strncasecmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result == NULL)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf
-#endif
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// CUDA Utility Helper Functions
-inline int stringRemoveDelimiter(char delimiter, const char *string)
-{
- int string_start = 0;
-
- while (string[string_start] == delimiter)
- {
- string_start++;
- }
-
- if (string_start >= (int)strlen(string)-1)
- {
- return 0;
- }
-
- return string_start;
-}
-
-inline int getFileExtension(char *filename, char **extension)
-{
- int string_length = (int)strlen(filename);
-
- while (filename[string_length--] != '.')
- {
- if (string_length == 0)
- break;
- }
-
- if (string_length > 0) string_length += 2;
-
- if (string_length == 0)
- *extension = NULL;
- else
- *extension = &filename[string_length];
-
- return string_length;
-}
-
-
-inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
-{
- bool bFound = false;
-
- if (argc >= 1)
- {
- for (int i=1; i < argc; i++)
- {
- int string_start = stringRemoveDelimiter('-', argv[i]);
- const char *string_argv = &argv[i][string_start];
-
- const char *equal_pos = strchr(string_argv, '=');
- int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
-
- int length = (int)strlen(string_ref);
-
- if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
- {
- bFound = true;
- continue;
- }
- }
- }
-
- return bFound;
-}
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
-{
- bool bFound = false;
-
- if (argc >= 1)
- {
- for (int i=1; i < argc; i++)
- {
- int string_start = stringRemoveDelimiter('-', argv[i]);
- const char *string_argv = &argv[i][string_start];
- int length = (int)strlen(string_ref);
-
- if (!STRNCASECMP(string_argv, string_ref, length))
- {
- if (length+1 <= (int)strlen(string_argv))
- {
- int auto_inc = (string_argv[length] == '=') ? 1 : 0;
- *value = (T)atoi(&string_argv[length + auto_inc]);
- }
-
- bFound = true;
- i=argc;
- }
- }
- }
-
- return bFound;
-}
-
-inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
-{
- bool bFound = false;
- int value = -1;
-
- if (argc >= 1)
- {
- for (int i=1; i < argc; i++)
- {
- int string_start = stringRemoveDelimiter('-', argv[i]);
- const char *string_argv = &argv[i][string_start];
- int length = (int)strlen(string_ref);
-
- if (!STRNCASECMP(string_argv, string_ref, length))
- {
- if (length+1 <= (int)strlen(string_argv))
- {
- int auto_inc = (string_argv[length] == '=') ? 1 : 0;
- value = atoi(&string_argv[length + auto_inc]);
- }
- else
- {
- value = 0;
- }
-
- bFound = true;
- continue;
- }
- }
- }
-
- if (bFound)
- {
- return value;
- }
- else
- {
- return 0;
- }
-}
-
-inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
-{
- bool bFound = false;
- float value = -1;
-
- if (argc >= 1)
- {
- for (int i=1; i < argc; i++)
- {
- int string_start = stringRemoveDelimiter('-', argv[i]);
- const char *string_argv = &argv[i][string_start];
- int length = (int)strlen(string_ref);
-
- if (!STRNCASECMP(string_argv, string_ref, length))
- {
- if (length+1 <= (int)strlen(string_argv))
- {
- int auto_inc = (string_argv[length] == '=') ? 1 : 0;
- value = (float)atof(&string_argv[length + auto_inc]);
- }
- else
- {
- value = 0.f;
- }
-
- bFound = true;
- continue;
- }
- }
- }
-
- if (bFound)
- {
- return value;
- }
- else
- {
- return 0;
- }
-}
-
-inline bool getCmdLineArgumentString(const int argc, const char **argv,
- const char *string_ref, char **string_retval)
-{
- bool bFound = false;
-
- if (argc >= 1)
- {
- for (int i=1; i < argc; i++)
- {
- int string_start = stringRemoveDelimiter('-', argv[i]);
- char *string_argv = (char *)&argv[i][string_start];
- int length = (int)strlen(string_ref);
-
- if (!STRNCASECMP(string_argv, string_ref, length))
- {
- *string_retval = &string_argv[length+1];
- bFound = true;
- continue;
- }
- }
- }
-
- if (!bFound)
- {
- *string_retval = NULL;
- }
-
- return bFound;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Find the path for a file assuming that
-//! files are found in the searchPath.
-//!
-//! @return the path if succeeded, otherwise 0
-//! @param filename name of the file
-//! @param executable_path optional absolute path of the executable
-//////////////////////////////////////////////////////////////////////////////
-inline char *sdkFindFilePath(const char *filename, const char *executable_path)
-{
- // <executable_name> defines a variable that is replaced with the name of the executable
-
- // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
- // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
- const char *searchPath[] =
- {
- "./", // same dir
- "./common/", // "/common/" subdir
- "./common/data/", // "/common/data/" subdir
- "./data/", // "/data/" subdir
- "./src/", // "/src/" subdir
- "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
- "./inc/", // "/inc/" subdir
- "./0_Simple/", // "/0_Simple/" subdir
- "./1_Utilities/", // "/1_Utilities/" subdir
- "./2_Graphics/", // "/2_Graphics/" subdir
- "./3_Imaging/", // "/3_Imaging/" subdir
- "./4_Finance/", // "/4_Finance/" subdir
- "./5_Simulations/", // "/5_Simulations/" subdir
- "./6_Advanced/", // "/6_Advanced/" subdir
- "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
- "./8_Android/", // "/8_Android/" subdir
- "./samples/", // "/samples/" subdir
-
- "../", // up 1 in tree
- "../common/", // up 1 in tree, "/common/" subdir
- "../common/data/", // up 1 in tree, "/common/data/" subdir
- "../data/", // up 1 in tree, "/data/" subdir
- "../src/", // up 1 in tree, "/src/" subdir
- "../inc/", // up 1 in tree, "/inc/" subdir
-
- "../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir
- "../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
- "../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
- "../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
- "../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir
- "../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
- "../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
- "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir
- "../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir
- "../../", // up 2 in tree
- "../../common/", // up 2 in tree, "/common/" subdir
- "../../common/data/", // up 2 in tree, "/common/data/" subdir
- "../../data/", // up 2 in tree, "/data/" subdir
- "../../src/", // up 2 in tree, "/src/" subdir
- "../../inc/", // up 2 in tree, "/inc/" subdir
- "../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir
- "../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir
- "../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir
- "../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir
- "../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir
- "../../../", // up 3 in tree
- "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir
- "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir
- "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir
- "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir
- "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir
- "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
- "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
- "../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
- "../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
- "../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
- "../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
- "../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
- "../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
- "../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
- "../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir
- "../../../common/", // up 3 in tree, "../../../common/" subdir
- "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
- "../../../data/", // up 3 in tree, "../../../data/" subdir
- "../../../../", // up 4 in tree
- "../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir
- "../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir
- "../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir
- "../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir
- "../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir
- "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
- "../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
- "../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
- "../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
- "../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
- "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
- "../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
- "../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
- "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
- "../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir
- "../../../../common/", // up 4 in tree, "../../../common/" subdir
- "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir
- "../../../../data/", // up 4 in tree, "../../../data/" subdir
- "../../../../../", // up 5 in tree
- "../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir
- "../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir
- "../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir
- "../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir
- "../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir
- "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
- "../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
- "../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
- "../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir
- "../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
- "../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
- "../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
- "../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir
- "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
- "../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
- "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
- "../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir
- "../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir
- "../../../../../common/", // up 5 in tree, "../../../common/" subdir
- "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir
- };
-
- // Extract the executable name
- std::string executable_name;
-
- if (executable_path != 0)
- {
- executable_name = std::string(executable_path);
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
- // Windows path delimiter
- size_t delimiter_pos = executable_name.find_last_of('\\');
- executable_name.erase(0, delimiter_pos + 1);
-
- if (executable_name.rfind(".exe") != std::string::npos)
- {
- // we strip .exe, only if the .exe is found
- executable_name.resize(executable_name.size() - 4);
- }
-
-#else
- // Linux & OSX path delimiter
- size_t delimiter_pos = executable_name.find_last_of('/');
- executable_name.erase(0,delimiter_pos+1);
-#endif
- }
-
- // Loop over all search paths and return the first hit
- for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
- {
- std::string path(searchPath[i]);
- size_t executable_name_pos = path.find("<executable_name>");
-
- // If there is executable_name variable in the searchPath
- // replace it with the value
- if (executable_name_pos != std::string::npos)
- {
- if (executable_path != 0)
- {
- path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
- }
- else
- {
- // Skip this path entry if no executable argument is given
- continue;
- }
- }
-
-#ifdef _DEBUG
- printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
-#endif
-
- // Test if the file exists
- path.append(filename);
- FILE *fp;
- FOPEN(fp, path.c_str(), "rb");
-
- if (fp != NULL)
- {
- fclose(fp);
- // File found
- // returning an allocated array here for backwards compatibility reasons
- char *file_path = (char *) malloc(path.length() + 1);
- STRCPY(file_path, path.length() + 1, path.c_str());
- return file_path;
- }
-
- if (fp)
- {
- fclose(fp);
- }
- }
-
- // File not found
- return 0;
-}
-
-#endif
diff --git a/src/algorithms/libs/cudahelpers/helper_timer.h b/src/algorithms/libs/cudahelpers/helper_timer.h
deleted file mode 100644
index 39ddc77..0000000
--- a/src/algorithms/libs/cudahelpers/helper_timer.h
+++ /dev/null
@@ -1,499 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// Helper Timing Functions
-#ifndef HELPER_TIMER_H
-#define HELPER_TIMER_H
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// includes, system
-#include <vector>
-
-// includes, project
-#include <exception.h>
-
-// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
-// But rather in a self contained class interface
-class StopWatchInterface
-{
- public:
- StopWatchInterface() {};
- virtual ~StopWatchInterface() {};
-
- public:
- //! Start time measurement
- virtual void start() = 0;
-
- //! Stop time measurement
- virtual void stop() = 0;
-
- //! Reset time counters to zero
- virtual void reset() = 0;
-
- //! Time in msec. after start. If the stop watch is still running (i.e. there
- //! was no call to stop()) then the elapsed time is returned, otherwise the
- //! time between the last start() and stop call is returned
- virtual float getTime() = 0;
-
- //! Mean time to date based on the number of times the stopwatch has been
- //! _stopped_ (ie finished sessions) and the current total time
- virtual float getAverageTime() = 0;
-};
-
-
-//////////////////////////////////////////////////////////////////
-// Begin Stopwatch timer class definitions for all OS platforms //
-//////////////////////////////////////////////////////////////////
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-// includes, system
-#define WINDOWS_LEAN_AND_MEAN
-#include <windows.h>
-#undef min
-#undef max
-
-//! Windows specific implementation of StopWatch
-class StopWatchWin : public StopWatchInterface
-{
- public:
- //! Constructor, default
- StopWatchWin() :
- start_time(), end_time(),
- diff_time(0.0f), total_time(0.0f),
- running(false), clock_sessions(0), freq(0), freq_set(false)
- {
- if (! freq_set)
- {
- // helper variable
- LARGE_INTEGER temp;
-
- // get the tick frequency from the OS
- QueryPerformanceFrequency((LARGE_INTEGER *) &temp);
-
- // convert to type in which it is needed
- freq = ((double) temp.QuadPart) / 1000.0;
-
- // rememeber query
- freq_set = true;
- }
- };
-
- // Destructor
- ~StopWatchWin() { };
-
- public:
- //! Start time measurement
- inline void start();
-
- //! Stop time measurement
- inline void stop();
-
- //! Reset time counters to zero
- inline void reset();
-
- //! Time in msec. after start. If the stop watch is still running (i.e. there
- //! was no call to stop()) then the elapsed time is returned, otherwise the
- //! time between the last start() and stop call is returned
- inline float getTime();
-
- //! Mean time to date based on the number of times the stopwatch has been
- //! _stopped_ (ie finished sessions) and the current total time
- inline float getAverageTime();
-
- private:
- // member variables
-
- //! Start of measurement
- LARGE_INTEGER start_time;
- //! End of measurement
- LARGE_INTEGER end_time;
-
- //! Time difference between the last start and stop
- float diff_time;
-
- //! TOTAL time difference between starts and stops
- float total_time;
-
- //! flag if the stop watch is running
- bool running;
-
- //! Number of times clock has been started
- //! and stopped to allow averaging
- int clock_sessions;
-
- //! tick frequency
- double freq;
-
- //! flag if the frequency has been set
- bool freq_set;
-};
-
-// functions, inlined
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start time measurement
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::start()
-{
- QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
- running = true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop time measurement and increment add to the current diff_time summation
-//! variable. Also increment the number of times this clock has been run.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::stop()
-{
- QueryPerformanceCounter((LARGE_INTEGER *) &end_time);
- diff_time = (float)
- (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
-
- total_time += diff_time;
- clock_sessions++;
- running = false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Reset the timer to 0. Does not change the timer running state but does
-//! recapture this point in time as the current start time if it is running.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::reset()
-{
- diff_time = 0;
- total_time = 0;
- clock_sessions = 0;
-
- if (running)
- {
- QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
- }
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. after start. If the stop watch is still running (i.e. there
-//! was no call to stop()) then the elapsed time is returned added to the
-//! current diff_time sum, otherwise the current summed time difference alone
-//! is returned.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchWin::getTime()
-{
- // Return the TOTAL time to date
- float retval = total_time;
-
- if (running)
- {
- LARGE_INTEGER temp;
- QueryPerformanceCounter((LARGE_INTEGER *) &temp);
- retval += (float)
- (((double)(temp.QuadPart - start_time.QuadPart)) / freq);
- }
-
- return retval;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. for a single run based on the total number of COMPLETED runs
-//! and the total time.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchWin::getAverageTime()
-{
- return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
-}
-#else
-// Declarations for Stopwatch on Linux and Mac OSX
-// includes, system
-#include <ctime>
-#include <sys/time.h>
-
-//! Windows specific implementation of StopWatch
-class StopWatchLinux : public StopWatchInterface
-{
- public:
- //! Constructor, default
- StopWatchLinux() :
- start_time(), diff_time(0.0), total_time(0.0),
- running(false), clock_sessions(0)
- { };
-
- // Destructor
- virtual ~StopWatchLinux()
- { };
-
- public:
- //! Start time measurement
- inline void start();
-
- //! Stop time measurement
- inline void stop();
-
- //! Reset time counters to zero
- inline void reset();
-
- //! Time in msec. after start. If the stop watch is still running (i.e. there
- //! was no call to stop()) then the elapsed time is returned, otherwise the
- //! time between the last start() and stop call is returned
- inline float getTime();
-
- //! Mean time to date based on the number of times the stopwatch has been
- //! _stopped_ (ie finished sessions) and the current total time
- inline float getAverageTime();
-
- private:
-
- // helper functions
-
- //! Get difference between start time and current time
- inline float getDiffTime();
-
- private:
-
- // member variables
-
- //! Start of measurement
- struct timeval start_time;
-
- //! Time difference between the last start and stop
- float diff_time;
-
- //! TOTAL time difference between starts and stops
- float total_time;
-
- //! flag if the stop watch is running
- bool running;
-
- //! Number of times clock has been started
- //! and stopped to allow averaging
- int clock_sessions;
-};
-
-// functions, inlined
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start time measurement
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::start()
-{
- gettimeofday(&start_time, 0);
- running = true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop time measurement and increment add to the current diff_time summation
-//! variable. Also increment the number of times this clock has been run.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::stop()
-{
- diff_time = getDiffTime();
- total_time += diff_time;
- running = false;
- clock_sessions++;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Reset the timer to 0. Does not change the timer running state but does
-//! recapture this point in time as the current start time if it is running.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::reset()
-{
- diff_time = 0;
- total_time = 0;
- clock_sessions = 0;
-
- if (running)
- {
- gettimeofday(&start_time, 0);
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. after start. If the stop watch is still running (i.e. there
-//! was no call to stop()) then the elapsed time is returned added to the
-//! current diff_time sum, otherwise the current summed time difference alone
-//! is returned.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getTime()
-{
- // Return the TOTAL time to date
- float retval = total_time;
-
- if (running)
- {
- retval += getDiffTime();
- }
-
- return retval;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. for a single run based on the total number of COMPLETED runs
-//! and the total time.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getAverageTime()
-{
- return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
-}
-////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getDiffTime()
-{
- struct timeval t_time;
- gettimeofday(&t_time, 0);
-
- // time difference in milli-seconds
- return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
- + (0.001 * (t_time.tv_usec - start_time.tv_usec)));
-}
-#endif // WIN32
-
-////////////////////////////////////////////////////////////////////////////////
-//! Timer functionality exported
-
-////////////////////////////////////////////////////////////////////////////////
-//! Create a new timer
-//! @return true if a time has been created, otherwise false
-//! @param name of the new timer, 0 if the creation failed
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkCreateTimer(StopWatchInterface **timer_interface)
-{
- //printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
- *timer_interface = (StopWatchInterface *)new StopWatchWin();
-#else
- *timer_interface = (StopWatchInterface *)new StopWatchLinux();
-#endif
- return (*timer_interface != NULL) ? true : false;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Delete a timer
-//! @return true if a time has been deleted, otherwise false
-//! @param name of the timer to delete
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkDeleteTimer(StopWatchInterface **timer_interface)
-{
- //printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- delete *timer_interface;
- *timer_interface = NULL;
- }
-
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start the time with name \a name
-//! @param name name of the timer to start
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkStartTimer(StopWatchInterface **timer_interface)
-{
- //printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- (*timer_interface)->start();
- }
-
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop the time with name \a name. Does not reset.
-//! @param name name of the timer to stop
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkStopTimer(StopWatchInterface **timer_interface)
-{
- // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- (*timer_interface)->stop();
- }
-
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Resets the timer's counter.
-//! @param name name of the timer to reset.
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkResetTimer(StopWatchInterface **timer_interface)
-{
- // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- (*timer_interface)->reset();
- }
-
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Return the average time for timer execution as the total time
-//! for the timer dividied by the number of completed (stopped) runs the timer
-//! has made.
-//! Excludes the current running time if the timer is currently running.
-//! @param name name of the timer to return the time of
-////////////////////////////////////////////////////////////////////////////////
-inline float
-sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
-{
- // printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- return (*timer_interface)->getAverageTime();
- }
- else
- {
- return 0.0f;
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Total execution time for the timer over all runs since the last reset
-//! or timer creation.
-//! @param name name of the timer to obtain the value of.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-sdkGetTimerValue(StopWatchInterface **timer_interface)
-{
- // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
- if (*timer_interface)
- {
- return (*timer_interface)->getTime();
- }
- else
- {
- return 0.0f;
- }
-}
-
-#endif // HELPER_TIMER_H
diff --git a/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt b/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt
index 4f7763d..cadba3e 100644
--- a/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt
+++ b/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt
@@ -19,9 +19,7 @@
if(ENABLE_CUDA)
set(OPT_TRACKING_BLOCKS ${OPT_TRACKING_BLOCKS} gps_l1_ca_dll_pll_tracking_gpu_cc.cc)
- set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES}
- ${CUDA_INCLUDE_DIRS}
- ${CMAKE_SOURCE_DIR}/src/algorithms/libs/cudahelpers)
+ set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS})
set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} ${CUDA_LIBRARIES})
endif(ENABLE_CUDA)
diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
index 339c372..fea305f 100644
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
@@ -47,11 +47,9 @@
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "control_message_factory.h"
-#include <volk/volk.h> //volk_alignement
-// includes
+#include <volk/volk.h> // volk_alignment
#include <cuda_profiler_api.h>
-#include <helper_functions.h> // helper for shared functions common to CUDA Samples
-#include <helper_cuda.h> // helper functions for CUDA error checking and initialization
+
/*!
* \todo Include in definition header file
@@ -131,24 +129,24 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
multicorrelator_gpu->init_cuda_integrated_resampler(0, NULL, 2 * d_vector_length , GPS_L1_CA_CODE_LENGTH_CHIPS , N_CORRELATORS);
// Get space for the resampled early / prompt / late local replicas
- checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped ));
-
+ cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped );
//allocate host memory
//pinned memory mode - use special function to get OS-pinned memory
- checkCudaErrors(cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped ));
+ cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped );
//old local codes vector
- //checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
+ // (cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
//new integrated shifts
- //checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
+ // (cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
// correlator outputs (scalar)
- checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocWriteCombined ));
+ cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocWriteCombined );
+
//map to EPL pointers
d_Early = &d_corr_outs_gpu[0];
- d_Prompt = &d_corr_outs_gpu[1];
+ d_Prompt = &d_corr_outs_gpu[1];
d_Late = &d_corr_outs_gpu[2];
//--- Perform initializations ------------------------------
@@ -181,7 +179,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
systemName["G"] = std::string("GPS");
systemName["S"] = std::string("SBAS");
-
set_relative_rate(1.0/((double)d_vector_length*2));
d_channel_internal_queue = 0;
@@ -303,10 +300,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
{
// process vars
- float carr_error_hz=0.0;
- float carr_error_filt_hz=0.0;
- float code_error_chips=0.0;
- float code_error_filt_chips=0.0;
+ float carr_error_hz = 0.0;
+ float carr_error_filt_hz = 0.0;
+ float code_error_chips = 0.0;
+ float code_error_filt_chips = 0.0;
// Block input data and block output stream pointers
const gr_complex* in = (gr_complex*) input_items[0];
@@ -339,20 +336,20 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
// UPDATE NCO COMMAND
float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
- //code resampler on GPU (new)
+ //code resampler on GPU (new)
float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
cudaProfilerStart();
multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(
- d_corr_outs_gpu,
- in,
- d_rem_carr_phase_rad,
- phase_step_rad,
- code_phase_step_chips,
- rem_code_phase_chips,
- d_current_prn_length_samples,
- 3);
+ d_corr_outs_gpu,
+ in,
+ d_rem_carr_phase_rad,
+ phase_step_rad,
+ code_phase_step_chips,
+ rem_code_phase_chips,
+ d_current_prn_length_samples,
+ 3);
cudaProfilerStop();
// ################## PLL ##########################################################
diff --git a/src/algorithms/tracking/libs/CMakeLists.txt b/src/algorithms/tracking/libs/CMakeLists.txt
index ad2ca6d..a6a51cd 100644
--- a/src/algorithms/tracking/libs/CMakeLists.txt
+++ b/src/algorithms/tracking/libs/CMakeLists.txt
@@ -22,15 +22,11 @@ if(ENABLE_CUDA)
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math -default-stream per-thread")
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
- CUDA_INCLUDE_DIRECTORIES(
- ${CMAKE_CURRENT_SOURCE_DIR}
- ${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers
- )
-
+ CUDA_INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_SOURCE_DIR})
set(LIB_TYPE STATIC) #set the lib type
CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} CUDA_CORRELATOR_LIB)
- set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers)
+ set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} )
endif(ENABLE_CUDA)
diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
index 166bca3..43ffeed 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -49,9 +49,6 @@
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
-// helper functions and utilities to work with CUDA
-#include <helper_cuda.h>
-#include <helper_functions.h>
#define ACCUM_N 256
@@ -224,7 +221,6 @@ __global__ void scalarProdGPUCPXxN(
//int vectorBase = IMUL(elementN, vec);
//int vectorEnd = vectorBase + elementN;
-
////////////////////////////////////////////////////////////////////////
// Each accumulator cycles through vectors with
// stride equal to number of total number of accumulators ACCUM_N
@@ -392,28 +388,28 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
- //checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
+ // (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
- checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
- //checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
- checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
+ cudaMalloc((void **)&d_sig_in, size);
+ // (cudaMalloc((void **)&d_nco_in, size));
+ cudaMalloc((void **)&d_sig_doppler_wiped, size);
// old version: all local codes are independent vectors
- //checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
+ // (cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
- checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
- checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators));
+ cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes);
+ cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators);
//scalars
- checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
+ cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
@@ -481,30 +477,30 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
- //checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
+ // (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
- checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
- checkCudaErrors(cudaMemset(d_sig_in,0,size));
+ cudaMalloc((void **)&d_sig_in, size);
+ cudaMemset(d_sig_in,0,size);
- //checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
- checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
- checkCudaErrors(cudaMemset(d_sig_doppler_wiped,0,size));
+ // (cudaMalloc((void **)&d_nco_in, size));
+ cudaMalloc((void **)&d_sig_doppler_wiped, size);
+ cudaMemset(d_sig_doppler_wiped,0,size);
- checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips));
- checkCudaErrors(cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips));
+ cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips);
+ cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips);
d_code_length_chips=code_length_chips;
- checkCudaErrors(cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators));
- checkCudaErrors(cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators));
+ cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators);
+ cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators);
//scalars
- checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
- checkCudaErrors(cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators));
+ cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
+ cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators);
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
@@ -523,12 +519,12 @@ bool cuda_multicorrelator::set_local_code_and_taps(
)
{
// local code CPU -> GPU copy memory
- checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1));
+ cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
d_code_length_chips=(float)code_length_chips;
// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
- checkCudaErrors(cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
- cudaMemcpyHostToDevice,stream1));
+ cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
+ cudaMemcpyHostToDevice,stream1);
return true;
}
@@ -550,40 +546,40 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
// input signal CPU -> GPU copy memory
- checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
- cudaMemcpyHostToDevice, stream1));
+ cudaMemcpyAsync(d_sig_in, sig_in, memSize,
+ cudaMemcpyHostToDevice, stream1);
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
- //checkCudaErrors(cudaMemcpyAsync(d_nco_in, nco_in, memSize,
+ // (cudaMemcpyAsync(d_nco_in, nco_in, memSize,
// cudaMemcpyHostToDevice, stream1));
// old version: all local codes are independent vectors
- //checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
+ // (cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
// cudaMemcpyHostToDevice, stream2));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
// local code CPU -> GPU copy memory
- checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
- cudaMemcpyHostToDevice, stream2));
+ cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
+ cudaMemcpyHostToDevice, stream2);
// Correlator shifts vector CPU -> GPU copy memory
- checkCudaErrors(cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
- cudaMemcpyHostToDevice, stream2));
+ cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
+ cudaMemcpyHostToDevice, stream2);
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
- checkCudaErrors(cudaStreamSynchronize(stream1));
+ cudaStreamSynchronize(stream1);
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream1>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
//wait for Doppler wipeoff end...
- checkCudaErrors(cudaStreamSynchronize(stream1));
- checkCudaErrors(cudaStreamSynchronize(stream2));
- //checkCudaErrors(cudaDeviceSynchronize());
+ cudaStreamSynchronize(stream1);
+ cudaStreamSynchronize(stream2);
+ // (cudaDeviceSynchronize());
//old
// scalarProdGPUCPXxN<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
@@ -604,15 +600,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
n_correlators,
signal_length_samples
);
- checkCudaErrors(cudaGetLastError());
+ cudaGetLastError();
//wait for correlators end...
- checkCudaErrors(cudaStreamSynchronize(stream2));
+ cudaStreamSynchronize(stream2);
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
- checkCudaErrors(cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
- cudaMemcpyDeviceToHost));
+ cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
+ cudaMemcpyDeviceToHost);
return true;
}
@@ -629,19 +625,19 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
size_t memSize = signal_length_samples * sizeof(std::complex<float>);
// input signal CPU -> GPU copy memory
- checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
- cudaMemcpyHostToDevice, stream2));
+ cudaMemcpyAsync(d_sig_in, sig_in, memSize,
+ cudaMemcpyHostToDevice, stream2);
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
- checkCudaErrors(cudaStreamSynchronize(stream2));
+ cudaStreamSynchronize(stream2);
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream2>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//wait for Doppler wipeoff end...
- checkCudaErrors(cudaStreamSynchronize(stream1));
- checkCudaErrors(cudaStreamSynchronize(stream2));
+ cudaStreamSynchronize(stream1);
+ cudaStreamSynchronize(stream2);
//launch the multitap correlator with integrated local code resampler!
@@ -657,16 +653,16 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
signal_length_samples
);
- checkCudaErrors(cudaGetLastError());
+ cudaGetLastError();
//wait for correlators end...
- checkCudaErrors(cudaStreamSynchronize(stream1));
+ cudaStreamSynchronize(stream1);
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
- checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
- cudaMemcpyDeviceToHost,stream1));
- checkCudaErrors(cudaStreamSynchronize(stream1));
+ cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
+ cudaMemcpyDeviceToHost,stream1);
+ cudaStreamSynchronize(stream1);
return true;
}
@@ -708,7 +704,7 @@ bool cuda_multicorrelator::free_cuda()
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
- //checkCudaErrors(cudaDeviceReset());
+ // (cudaDeviceReset());
return true;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list