Why using cufft cause a fatal error LNK1120 in MEX cuda?

4 ビュー (過去 30 日間)
Moein Mozaffarzadeh
Moein Mozaffarzadeh 2021 年 7 月 6 日
Hi,
I'm trying to write a MEX gateway (in cuda) function to add two arrays in GPU. I would like to filter one of the signals (MediumX in the following code) before i do the summation in GPU. However, when I use "cufftPlan1d" to generate a fft plan, I get error durring compiling in Matlab. The error is :
Error using mex
Creating library test2_GPUArray.lib and object test2_GPUArray.exp
test2_GPUArray.obj : error LNK2019: unresolved external symbol cufftPlan1d referenced in function mexFunction
test2_GPUArray.mexw64 : fatal error LNK1120: 1 unresolved externals
Error in mexcuda (line 168)
[varargout{1:nargout}] = mex(mexArguments{:});
Error in test2_GPUArray_matlabRunner (line 3)
mexcuda('-v', 'test2_GPUArray.cu' , 'NVCCFLAGS=-gencode=arch=compute_50,code=sm_50 -Xptxas -dlcm=cg ');
Here is my MEX gateway code:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <stdio.h>
#include "cuda.h"
#include <iostream>
#include <mex.h>
#include "gpu/mxGPUArray.h"
#include <cuComplex.h>
#include <cublas_v2.h>
#include <thrust/complex.h>
#include <cufft.h>
//#define NRANK 1 // signals are 1-dimensional (NX spatial points).
//typedef thrust::complex<float> fcomp;
//#define NX 256
//#define /*BATCH 10*/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void add(int* Device_Data_Added,int* Device_Data, int* Device_MediumX, cudaTextureObject_t tex, int N) {
int TID = threadIdx.y * blockDim.x + threadIdx.x;
int BlockOFFset = blockDim.x * blockDim.y * blockIdx.x;
int RowOFFset = blockDim.x * blockDim.y * gridDim.x * blockIdx.y;
int GID_RowBased = BlockOFFset + TID;
if (GID_RowBased < N) {
Device_Data_Added[GID_RowBased] = Device_Data[GID_RowBased] + Device_MediumX[GID_RowBased];
///Device_Data_Added[GID_RowBased] = tex1Dfetch<int>(tex, GID_RowBased + 1.0f) + Device_MediumX[GID_RowBased];
}
}
void mexFunction(int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
int N = 1024;
int BATCH = 1;
int ArrayByteSize = sizeof(int) * N;
int* Data;
int* Data_New;
int* MediumX;
int * Device_MediumX;
//mxGPUArray const* MediumX = mxGPUCreateFromMxArray(prhs[0]); // Can be CPU or GPU, will copy to GPU if its not already there
//int* Device_MediumX = static_cast<int*>((int*)mxGPUGetDataReadOnly(MediumX)); // get the pointer itself (assuming float data)
MediumX = (int*)mxGetPr(prhs[0]);
// filtering
// fcomp* MediumX_Complex =new fcomp[N];
// for (int i = 0; i < N; i++) {
//reinterpret_cast<float *> (MediumX_Complex)[2*i]= static_cast <float> (MediumX[i]);
//reinterpret_cast<float*> (MediumX_Complex)[2 * i+1] = static_cast <float> (0);
// }
cufftHandle plan;
cufftReal* MediumXF;
cudaMalloc((void**)&MediumXF, sizeof(cufftReal) * N * BATCH);
cufftPlan1d(&plan, N, CUFFT_R2C, BATCH);
//
(cudaMalloc((void**)&Device_MediumX, sizeof(int) * N));
(cudaMemcpy(Device_MediumX, MediumX, sizeof(int) * N, cudaMemcpyHostToDevice));
Data = (int*)mxGetPr(prhs[1]);
int* Device_Data; // device pointer to the X coordinates of the medium
gpuErrchk(cudaMalloc((void**)&Device_Data, ArrayByteSize));
gpuErrchk(cudaMemcpy(Device_Data, Data, ArrayByteSize, cudaMemcpyHostToDevice));
plhs[0] = mxCreateNumericMatrix(N, 1, mxINT32_CLASS, mxREAL);
Data_New = (int*)mxGetData(plhs[0]);
int* Device_Data_Added; // device pointer to the X coordinates of the medium
gpuErrchk(cudaMalloc((void**)&Device_Data_Added, ArrayByteSize));
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr = Device_Data;
resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
resDesc.res.linear.desc.x = 32; // bits per channel
resDesc.res.linear.sizeInBytes = ArrayByteSize;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType;
// create texture object: we only have to do this once!
cudaTextureObject_t tex = 0;
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
dim3 block(1024);
int GridX = (N / block.x+1);
dim3 grid(GridX);//SystemSetup.NumberOfTransmitter
add << <grid, block >> > (Device_Data_Added,Device_Data, Device_MediumX, tex, N);
(cudaMemcpy(Data_New, Device_Data_Added, ArrayByteSize, cudaMemcpyDeviceToHost));
cudaFree(Device_Data);
cudaFree(Device_Data_Added);
cudaFree(Device_MediumX);
cudaFree(MediumXF);
//mxGPUDestroyGPUArray(MediumX);
cudaDestroyTextureObject(tex);
//delete[] MediumX_Complex;
}
There is no filtering defined in this code for now, but i think in need to be able to get the cufftPlan1d running first. Could you please let me know what is wrong? I alreayd have the "#include <cufft.h>" at the beginning of my code!
Moein.

採用された回答

Linda Koletsou Soulti
Linda Koletsou Soulti 2021 年 7 月 8 日
Hello Moein,
you will need to also link against cuFFT library using -lcufft in a simlar way as NPP is used in the following example:
Cheers,
Linda
  3 件のコメント
Linda Koletsou Soulti
Linda Koletsou Soulti 2021 年 7 月 9 日
Hello Moein,
the following command should provide linking against cuFFT:
mexcuda('-v','test2_GPUArray.cu', 'NVCC_FLAGS=-gencode=arch=compute_50,code=sm_50 -Xptxas -dlcm=cg', "-LC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\lib\x64",'-lcufft');
Cheers,
Linda
Moein Mozaffarzadeh
Moein Mozaffarzadeh 2021 年 7 月 9 日
Yes, it works now. Thank you Linda.

サインインしてコメントする。

その他の回答 (0 件)

カテゴリ

Help Center および File ExchangeGPU Computing についてさらに検索

製品


リリース

R2021a

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by