- Try to pre-allocate the arrays with NaN or zero values before using them in the "parfor" loop. This will prevent dynamic resizing of arrays in each iteration and improve the performance.
- Try using "parfeval" as it allows to submit multiple independent function evaluations to the parallel pool and can improve overall efficiency.
- Whenever possible try to use vectorized operations instead of loops for better performance.
- If there is the availability of the GPUs can use the "spmd" in the parallel pool to execute code parallelly in different workers.
- Try to use preexisting libraries which have better performance as compared to working on creating your own libraries.
How to speed up this code? How to improve its performance and efficiency?
2 ビュー (過去 30 日間)
古いコメントを表示
% creating toy data
rng('default')
nsimul = 10; % in the actual code I would like to have at least nsumul = 1000;
y = trnd(5,500,1); % in the actual code y = trnd(5,2500,1);
x = y(y < 0);
tic
thresh = autothreshold(x,nsimul,0.05);
toc
% the variable which I am interested in is thresh
disp(thresh)
% from here you can find the local functions involved in the calculation of
% thresh
% 1) first local function
function [threshold,pvalues,forwardstop,shape,scale] = autothreshold(x,nsimul,alpha)
% column vector
if size(x,1) < size(x,2)
x = x';
end
% eliminate zeros and missing data, sort data in descending order
x = sort(rmmissing(nonzeros(x)),'descend');
% sample size
N = numel(x);
% pre allocate variables
ADp = nan(N-1, 1);
shape = nan(N-1, 1);
scale = nan(N-1, 1);
% the minimum threhsold is 2
parfor k = 2:N
warning off
[~,ADp(k-1,:),shape(k-1,:),scale(k-1,:)] = GPDGoF(x(1:(k-1))-x(k),nsimul);
end
pvalues = ADp;
pvalues(isnan(pvalues)) = 1;
[threshold,forwardstop] = ForwardStop(pvalues,alpha);
end
% 2) second local function implied in the first
function [AD,ADp,shape,scale] = GPDGoF(x,nsimul)
% sample size
N = numel(x);
% parameters estimation
parmhat = gpfit(x);
shape = parmhat(1);
scale = parmhat(2);
if any(isnan(parmhat)) == 1;
AD = nan;
ADp = nan;
else
% Theoretical distribution function
CDF = 1 - ( 1 + shape./scale .* x) .^(-1./shape);
% Anderson-Darling GoF test
AD = - N - sum( ( 2 .*(N:-1:1)' - 1 ) ./N .* ( log(CDF) + log(1 - sort(CDF,'ascend') ) ) );
% Bootstrap p - values
simgpd = (scale/shape) .* (((1-sort(rand(N,nsimul),'descend')).^(-shape)) - 1);
% Parameter estimates of simulated RV
parmhatsim = arrayfun(@(i) gpfit(simgpd(:,i))', 1:nsimul, 'UniformOutput',false);
parmhatsim = cat(2, parmhatsim{:});
% Theoretical distribution function of simulated RV
CDFs = sort(1 - ( 1 + parmhatsim(1,:)./parmhatsim(2,:) .* simgpd) .^(-1./parmhatsim(1,:)),'descend');
% Bootstrap Anderson-Darling GoF test and computing p-value
AD_boot = - N - sum( repmat(( 2 .*(N:-1:1)' - 1 ) ./N,1,nsimul) .* ( log(CDFs) + log(1 - sort(CDFs,'ascend') ) ), 1);
ADp = mean(AD_boot > AD);
end
end
% 3) third function implied in the first function
function [threshold,forwardstop] = ForwardStop(pvalues,alpha)
if ~exist('alpha','var')
alpha = 0.05;
end
N = size(pvalues,1);
pvalues = flip(pvalues,1);
forwardstop = ( (1:N)'.^-1 ) .* cumsum( - log( 1 - pvalues),1);
if isempty(find(forwardstop <= alpha,1,'last'))
threshold = N + 1;
else
threshold = N - find(forwardstop <= alpha,1,'last') + 2;
end
forwardstop = flip(forwardstop,1);
end
I would like to speed my code as much as possible. Currently it takes days/weeks to run. Obviously the result should be the same (within the rng('default') seed).
0 件のコメント
回答 (1 件)
Ashutosh
2023 年 8 月 21 日
These steps can be followed to improve the performance:
0 件のコメント
参考
カテゴリ
Help Center および File Exchange で Creating and Concatenating Matrices についてさらに検索
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!