%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simulated data for Finite Mixture Regression Model (FMRM)
% This follows precisely KPT Ex. 15.5, p. 262/263
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rand('state',37); % set arbitrary seed for uniform draws
randn('state',37); % set arbitrary seed for normal draws
n=10000; %sample size
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% lognormal data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
yln=lognrnd(log(10),sqrt(0.04),n,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% chi-2 data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
ychi=chi2rnd(10,n,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% mixture data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
G=2; % 2 latent classes (regimes, components)
cptrue=[0.4;0.6]; %true class membership probabilities
cp1=cptrue(1);
% we need some ordering to identify the model - this is due
% to the infamous "labeling switching" problem in these models
%set betas for each regime and build regression models
beta1true=0;
beta2true=2;
sig11true=1;
sig22true=0.5;
X=ones(n,1);
int=randn(n,1);
% Procedure 1:
%Draw y's - my approach using assignment matrix; use mnrnd, with only 2
%bins; this should automatically reduce to the binomial
e1=sqrt(sig11true)*int;
e2=sqrt(sig22true)*int;
y1=X*beta1true+e1;
y2=X*beta2true+e2;
% now assign each observation to one of the two models
Z=mnrnd(1,cptrue,n); %n by 2 matrix of rows with 1 zero and 1 one
%check distribution
check=mean(Z) % should be similar to cp
y=sum([y1 y2].*Z,2); %picks the y assigned by the Z matrix for each obs.
% ytest1=y;
% % % Procedure 2:
% %Draw y's - my approach using assignment matrix; use binomial, with only 2
% %bins
% int=binornd(1,cp1,n,1); %n by 1 assignment vector
% Z=[int 1-int];
% %check distribution
% check=mean(Z) % should be similar to cp1
% y=sum([y1 y2].*Z,2); %picks the y assigned by the Z matrix for each obs.
% ytest2=y;
%
%
%Alternative approach proposed in KPT script "bimod_mix":
% y=zeros(n,1);
% for i=1:n
% u=rand;
% if u <= cp1
% y(i) =beta1true + sqrt(sig11true)*int(i);
% else
% y(i) =beta2true + sqrt(sig22true)*int(i);
% end;
% end
% ytest3=y;
ymix=y;
% [mean(ytest1) mean(ytest2) mean(ytest3)];
% %all very close, no systematic ordering when I change random seed
% %so all 3 procedures for drawing y should be equivalent.
save c:\klaus\AAEC6564\mlab\worksp\2CMM_data beta1true beta2true ...
sig11true sig22true cptrue X yln ychi ymix;