Mex file c. Getting different results from matlab

6 ビュー (過去 30 日間)
Abdel Darwich
Abdel Darwich 2020 年 11 月 16 日
コメント済み: Abdel Darwich 2020 年 11 月 18 日
Hi all
I have recently started to use Mex n c,
I have created a mex file using the auotgenrated function in MATLAB, however I started writing mine to make it more efficient as my code still expensive.
For some reason I do get a discrepancy between my c code and MATLAB function which can reach 20%. I do not think I have abog in the code. I do think is something to do with memory allocation and index handling It could be a bug also. I would appreciate any help. Thanks a lot.
I will add the matlab code and the c code.
========================== % .m code
function [Vxs,Vys,Vzs] = Induced_Velocity_Tret_double(X0,Y0,Z0 ,X,Y,Z,Gamma, dt)
% allocate meomery
[Nc,Mc] = size(X0);
[Ns,Ms] = size(X);
Vxs = zeros(Nc,Mc); Vys = zeros(Nc,Mc); Vzs = zeros(Nc,Mc);
cnst1 = 4 .* pi;
%% Correction terms
alpha = 1.25643;
visc = 1.802e-5; % static ari viscuicty at 15 deg
r02 = 0.0001;
a1 = 2e-4;
%% start calcauting induced veocity
for i0 = 1:Nc
for j0 = 1:Mc
for i = 1:Ns-1
t = double(i) .* dt ;
for j = 1:Ms-1
cnst2 = Gamma(i,j)./ cnst1;
% correction terms
delta = 1 + a1 .* sqrt(Gamma(i,j).^2./visc);
rc = sqrt(r02+(4.*alpha.*t.*visc.*delta));
%------ line 1
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j), Y(i,j), Z(i,j), X(i,j+1), Y(i,j+1), Z(i,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line 2
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j+1), Y(i,j+1), Z(i,j+1), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line 3
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1),X(i+1,j), Y(i+1,j), Z(i+1,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line4
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j), Y(i+1,j), Z(i+1,j), X(i,j), Y(i,j), Z(i,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
end
end
end
end
end
function [Vx,Vy,Vz] = Vind_LINE(x0,y0,z0,x1,y1,z1,x2,y2,z2,cnst, rc)
tol = 1e-8;
Vx = 0.;Vy = 0.;Vz = 0.;
rx1 = x0-x1; ry1 = y0-y1; rz1 = z0-z1;
rx2 = x0-x2; ry2 = y0-y2; rz2 = z0-z2;
crsr1r2x = (ry1 .* rz2) - (rz1 .* ry2);
crsr1r2y = (rz1 .* rx2) - (rx1 .* rz2);
crsr1r2z = (rx1 .* ry2) - (ry1 .* rx2);
magcrsr1r2 = (crsr1r2x.^2 + crsr1r2y.^2 + crsr1r2z.^2);
if magcrsr1r2 > tol
Frc1x = crsr1r2x ./ magcrsr1r2;
Frc1y = crsr1r2y ./ magcrsr1r2;
Frc1z = crsr1r2z ./ magcrsr1r2;
magr1 = sqrt(rx1.^2 + ry1.^2 + rz1.^2);
magr2 = sqrt(rx2.^2 + ry2.^2 + rz2.^2);
r1minusr2x = rx1-rx2;
r1minusr2y = ry1-ry2;
r1minusr2z = rz1-rz2;
Frc2 = r1minusr2x.* ((rx1./magr1)-(rx2./magr2)) + r1minusr2y.*((ry1./magr1)-(ry2./magr2)) + r1minusr2z.*((rz1./magr1)-(rz2./magr2)) ;
%% correction term
magr0 = sqrt(r1minusr2x.^2 + r1minusr2y.^2 + r1minusr2z.^2);
h = magcrsr1r2 ./ magr0;
k = h.^2./(rc.^2 +h.^2);
%%
Vx = cnst .* Frc1x .* Frc2 .* k;
Vy = cnst .* Frc1y .* Frc2 .* k;
Vz = cnst .* Frc1z .* Frc2 .* k;
end
end
========================== % .c mex code
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <omp.h>
static void Vind_LINE(double x0, double b_y0, double z0, double x1, double b_y1,
double z1, double x2, double y2, double z2, double cnst,
double rc, double *Vx, double *Vy, double *Vz)
{
double rx1; double ry1; double rz1; double rx2; double ry2; double rz2;
double crsr1r2x; double crsr1r2y; double crsr1r2z; double magcrsr1r2;
double magr1; double magr2; double r1minusr2x; double r1minusr2y; double r1minusr2z;
*Vx = 0.0;
*Vy = 0.0;
*Vz = 0.0;
/* Bounde vortex */
/* r0x = x2-x1; r0y = y2-y1; r0z = z2-z1; */
rx1 = x0 - x1;
ry1 = b_y0 - b_y1;
rz1 = z0 - z1;
rx2 = x0 - x2;
ry2 = b_y0 - y2;
rz2 = z0 - z2;
crsr1r2x = ry1 * rz2 - rz1 * ry2;
crsr1r2y = rz1 * rx2 - rx1 * rz2;
crsr1r2z = rx1 * ry2 - ry1 * rx2;
magcrsr1r2 = (crsr1r2x * crsr1r2x + crsr1r2y * crsr1r2y) + crsr1r2z * crsr1r2z;
if (magcrsr1r2 > 1.0E-8) {
magr1 = sqrt((rx1 * rx1 + ry1 * ry1) + rz1 * rz1);
magr2 = sqrt((rx2 * rx2 + ry2 * ry2) + rz2 * rz2);
r1minusr2x = rx1 - rx2;
r1minusr2y = ry1 - ry2;
r1minusr2z = rz1 - rz2;
magr2 = (r1minusr2x * (rx1 / magr1 - rx2 / magr2) + r1minusr2y * (ry1 /
magr1 - ry2 / magr2)) + r1minusr2z * (rz1 / magr1 - rz2 / magr2);
/* %% correction term */
magr1 = magcrsr1r2 / sqrt((r1minusr2x * r1minusr2x + r1minusr2y * r1minusr2y)
+ r1minusr2z * r1minusr2z);
magr1 *= magr1;
magr1 /= rc * rc + magr1;
/* %% */
*Vx = cnst * (crsr1r2x / magcrsr1r2) * magr2 * magr1;
*Vy = cnst * (crsr1r2y / magcrsr1r2) * magr2 * magr1;
*Vz = cnst * (crsr1r2z / magcrsr1r2) * magr2 * magr1;
}
}
/*
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
double *X0, *Y0, *Z0, *X, *Y, *Z, *Gamma;
double dt;
int Nc, Mc , Ns, Ms, NGamma, Nss, NV;
X0 = mxGetDoubles (prhs[0]);
Nc = mxGetM(prhs[0]);
Mc = mxGetN(prhs[0]);
Y0 = mxGetDoubles (prhs[1]);
Z0 = mxGetDoubles (prhs[2]);
X = mxGetDoubles (prhs[3]);
Ns = mxGetM(prhs[3]);
Ms = mxGetN(prhs[3]);
Y = mxGetDoubles (prhs[4]);
Z = mxGetDoubles (prhs[5]);
Gamma = mxGetDoubles (prhs[6]);
NGamma = mxGetM(prhs[6]);
dt = mxGetScalar (prhs[7]);
/* OUTPUTS */
plhs[0] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[1] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[2] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
//double Vxs[Nc][Mc], Vys[Nc][Mc], Vzs[Nc][Mc];
// double *Vxs, *Vys, *Vzs;
double* Vxs = (double*) mxGetDoubles(plhs[0]);
double* Vys = (double*) mxGetDoubles(plhs[1]);
double* Vzs = (double*) mxGetDoubles(plhs[2]);
int i0; int loop_ub; int i1; int b_j0; int i2; int i; double t; int i3; int j;
double cnst2; double rc; double Vx; double Vy; double Vz;
for (loop_ub = 0; loop_ub < Nc; loop_ub++) {
for (b_j0 = 0; b_j0 < Mc; b_j0++) {
for (i = 0; i <= Ns - 2; i++) {
i3 = Ms;
for (j = 0; j <= i3 - 2; j++) {
cnst2 = Gamma[i + NGamma * j] / 12.566370614359172;
/* correction terms */
rc = Gamma[i + NGamma * j] ;
rc = sqrt(0.0001 + 5.02572 * t * 1.802E-5 * (1.0 + 0.0002 * sqrt(rc *
rc / 1.802E-5)));
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
X[i + Ns * (j + 1)], Y[i + Ns * (j + 1)], Z[i +Ns * (j + 1)],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * (j + 1)], Y[i + Ns *(j + 1)], Z[i + Ns * (j + 1)],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j + 1)) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j +1)) + 1],
X[(i + Ns * j) + 1], Y[(i + Ns * j) + 1], Z[(i + Ns * j) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i +Ns * j) + 1], Y[(i+ Ns * j) + 1], Z[(i + Ns * j) + 1],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
}
}
}
}
mxFree(Vxs);
mxFree(Vys);
mxFree(Vzs);
}
/*
* File trailer for Induced_Velocity_Tret3.c
*
* [EOF]
*/
  3 件のコメント
Abdel Darwich
Abdel Darwich 2020 年 11 月 16 日
編集済み: Abdel Darwich 2020 年 11 月 16 日
Hi James
I did what you said, the difference comes only in the output. Vxs, Vys and Vzs. I will edit my answer and make the code mre readable.
I did spend a lot of time trying to understand where discrepancy is coming from by printing the inputs and outputs. It can only be seen in
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
I have large matrrices which is around 60x200 so I am wondering if MATLAB is not string the data in double precesion.
Abdel Darwich
Abdel Darwich 2020 年 11 月 16 日
Hi James
I found out that my problem is the output indiuces, [loop_ub + Nc * b_j0] m the data is not stored in the order I intended to have it in.
My outout is an array nxm, I guess i am still unsure how mex deal with matlab indices.
I am wondering would I be able to use openmp in mex ?

サインインしてコメントする。

採用された回答

James Tursa
James Tursa 2020 年 11 月 17 日
MATLAB stores matrices in column order. E.g., if A is a 2x3 matrix, and p is a pointer to the data area of A in a mex routine, then MATLAB will store the elements in memory as follows:
p[0] = A(1,1)
p[1] = A(2,1)
p[2] = A(1,2)
p[3] = A(2,2)
p[4] = A(1,3)
p[5] = A(2,3)
If real double full matrix A had generic dimensions of MxN, then accessing the elements linearly would be as follows with appropriate variable definitions:
M = mxGetM(prhs[0]);
N = mxGetN(prhs[0]);
p = mxGetDoubles(prhs[0]);
k = 0;
for( j=0; j<N; j++ ) {
for( i=0; i<M; i++ ) {
mexPrintf("p[%d] = A(%d,%d) = %g\n",k++,i+1,j+1,p[i+M*j]);
}
}
You can use OpenMP in mex routines, but you cannot do anything that invokes the MATLAB Memory Manager inside your parallel threads. So calling inspection routines such as the following would be OK inside these threads:
mxGetM
mxGetN
mxGetNumberOfDimensions
etc.
But doing any allocation/deallocation would not be OK inside the parallel threads:
mxCreateDoubleMatrix
mxDestroyArray
etc.
You must do all MATLAB API allocation/deallocation outside the parallel threads.
  1 件のコメント
Abdel Darwich
Abdel Darwich 2020 年 11 月 18 日
Thanks for your help. That made it a lot more clearer.

サインインしてコメントする。

その他の回答 (0 件)

カテゴリ

Help Center および File ExchangeWrite C Functions Callable from MATLAB (MEX Files) についてさらに検索

タグ

製品

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by