function tsp = multilocus_tests(y,s1,s2,model,B,outfilep,outfilen);

% Applies the tests described in 'Powerful Multilocus Tests of Genetic
% Association in the Presence of Gene-Gene and Gene-Environment
% Interactions,' by Chatterjee N., Kalaylioglu Z., Moslehi R., Peters U., Wacholder S., 
% American Journal of Human Genetics, Vol. 79, December (2006)

% Program:  written by Zeynep Kalaylioglu, Ph.D., last updated on 22 December 2006

ns1 = length(s1(1,:));
ns2 = length(s2(1,:));
dof1 = ns1; % degrees of freedom for LogMain test statistic
dof2 = ns1 + ns2*ns1;  % degrees of freedom for LogMain&Int test statistic

disp('running...');

% test statistic 
ts = stmainstinterlatent_teststats_2g(y,[s1 s2],[ns1 ns2],model);

% permutation based p values
tp = perm_based_null_dist_2g(y,[s1 s2],[ns1 ns2],B,outfilep,model);
pp = sum((tp >= repmat(ts,B,1)),1)/B;

% asymptotic based p values
ta = nulldist_latent(y,[s1 s2],[ns1 ns2],outfilen,B);
if model==1
     pa = [1-chi2cdf(ts,dof1)];       
elseif model==2
     pa = [1-chi2cdf(ts,dof2)];       
elseif model==3
     pa = [sum((ta >= repmat(ts,B,1)))/B];       
elseif isequal(model,[1 2])
     pa = [1-chi2cdf(ts(1),dof1) 1-chi2cdf(ts(2),dof2)];
elseif isequal(model,[1 3])
     pa = [1-chi2cdf(ts(1),dof1) sum((ta >= repmat(ts(2),B,1)))/B];
elseif isequal(model,[2 3])
     pa = [1-chi2cdf(ts(1),dof2) sum((ta >= repmat(ts(2),B,1)))/B];
elseif isequal(model,[1 2 3])
     pa = [1-chi2cdf(ts(1),dof1)  1-chi2cdf(ts(2),dof2) sum((ta >= repmat(ts(3),B,1)))/B];
end;

% displaying the results
disp('model=1 -> LogMain, model=2 -> LogMain&Int, model=3 -> TukAssoc');
disp('model, test statistic, permutation based p-value, asymptotics based p-value:');
tsp = [model' ts' pp' pa'];
disp(tsp);


% FUNCTIONS: 

function t=stmainstinterlatent_teststats_2g(y,snpdata,nsnp,model)    
% This function computes the  Score Test statistic based on different models for the given data set
vtheta=(-10:0.1:10) ;
n = length(y);
m = sum(nsnp);
ngene = length(nsnp);
% estimation under the null
% designnull: design matrix under the null w/o the intercept term
designnull = [snpdata(:,nsnp(1)+1:m)];
[beta,covmat] = logistic_regression_poly(y,designnull);
gg = [ones(n,1) designnull] * beta;
p  = logistic(gg);
% SCORE TEST STATISTIC
    if sum(ismember(model,1))>0      % 1. Model1 (modified,i.e.has only the SNP of 
%         the gene being tested in the model, in our case it is the SNPs of the first gene)
        designnull1 = [];
        [beta1,covmat] = logistic_regression_poly(y,designnull1);
        gg1 = [ones(n,1) designnull1] * beta1;
        p1  = logistic(gg1);
        % test stat
        d  = snpdata(:,1:nsnp(1));
        design = [ones(n,1) snpdata(:,1:nsnp(1)) ];
        score=score_logistic(d',y,p1);
        info=information_logistic(design,p1);
        a=inv(info);
        v1=a(2:nsnp(1)+1,2:nsnp(1)+1);       
        scoreteststatstmain=score' * v1 * score;     
    end;        
    
    if sum(ismember(model,2))>0 % 2. Model2    
        % estimation under the null: same as model 1
        % test stat
        if ngene>1
            d1=repmat(snpdata(:,1),1,nsnp(2));
            for i=2:nsnp(1)
                d1=[d1 repmat(snpdata(:,i),1,nsnp(2))];
            end;
            d2=repmat(snpdata(:,nsnp(1)+1:m),1,nsnp(1));
            d12=d1 .* d2;   %interaction design matrix
        end;
        d  = [snpdata(:,1:nsnp(1)) d12];
        design = [ones(n,1) snpdata d12];
        score=score_logistic(d',y,p);
        info=information_logistic(design,p);
        a=inv(info);
        v1=a([2:nsnp(1)+1 m+1+1:length(info(1,:)) ],[2:nsnp(1)+1 m+1+1:length(info(1,:)) ]);                
        scoreteststatstinter=score' * v1 * score;            
    end;
        
    if sum(ismember(model,3))>0 % 3. Model3
        t=zeros(length(vtheta),1);
        k=0;
        for theta=vtheta;
            k=k+1;
            % estimation under the null:same as Model 2 above
            %test stat
            tt = snpdata(:,nsnp(1)+1:m) * beta(2:length(beta));
            d  = snpdata(:,1:nsnp(1)) .* (1 + theta * repmat(tt,1,nsnp(1)));
            design=[ones(n,1) d  snpdata(:,nsnp(1)+1:m)];
            score=score_logistic(d',y,p);
            info=information_logistic(design,p);
            a=inv(info);
            v1=a(2:nsnp(1)+1,2:nsnp(1)+1);          
            t(k)=score' * v1 * score;
        end;
        scoreteststatlatent=max(t);
    end;        
 
   if model==1
       t = [scoreteststatstmain];       
   elseif model==2
       t = [scoreteststatstinter];       
   elseif model==3
       t = [scoreteststatlatent];       
   elseif isequal(model,[1 2])
       t = [scoreteststatstmain scoreteststatstinter];
   elseif isequal(model,[1 3])
       t = [scoreteststatstmain scoreteststatlatent];
   elseif isequal(model,[2 3])
       t = [scoreteststatstinter scoreteststatlatent];
   elseif isequal(model,[1 2 3])
       t = [scoreteststatstmain scoreteststatstinter scoreteststatlatent];
   end;
       

   
function score=individual_score_logistic(d,y,p)
% Scores of parameters being tested in a logistic regression evaluated
% under the null.
%
% INPUT:
%       y = binary response        (n x 1)
%       p = P(Y=1|covariates)      (n x 1)  evaluated under the null
%       d = matrix of d_ij where   (m x n)
%        d_ij= derivative of logit(P(Y_j=1|covariates)) wrt beta_i
%        where beta_i is the i'th. parameter being tested (d is evaluated
%        under the null)

% OUTPUT:
%       score= an m x n matrix. (i,j)th. element is the contribution of
%       jth. subject to score of ith. parameter being tested.
[m,n] = size(d);     %m: no. of prms being tested, n: size of the data set
rr = y - p;
score = d .* repmat(rr',m,1); 


    
function information=information_logistic(design,p)
% information matrix for all the parameters of a logistic regression
% INPUT:
%       p = P(Y=1|covariates)      (n x 1) evaluated under the null.
%       design = the ("design") matrix (including intercept) used in the
%       information calculation  (n x m). i.e. 
%       I=design' * design * p * (1-p) 

% OUTPUT:
%       information = information matrix for all the parameters in the
%       model   (m x m)
[n,m] = size(design);     %n: size of the data set, m: no. of prms being tested
rr = p .* (1-p);
information = design' * (design .* (rr * ones(1,m)));


function score=score_logistic(d,y,p)
% Scores of parameters being tested in a logistic regression evaluated
% under the null.
% INPUT:
%       y = binary response        (n x 1)
%       p = P(Y=1|covariates)      (n x 1)  evaluated under the null
%       d = matrix of d_ij where   (m x n)
%        d_ij= derivative of logit(P(Y_j=1|covariates)) wrt beta_i
%        where beta_i is the i'th. parameter being tested (d is evaluated
%        under the null)
% OUTPUT:
%       score= a vector of scores for each parameter being tested (m x 1)
%       evaluated under the null.
[m,n] = size(d);     %m: no. of prms being tested, n: size of the data set
rr = y - p;
score= d * rr; 


function [beta,covmat] = logistic_regression_poly(yy,zz);
% Logistic regression of Y (mle of coefficients in logistic regression by
% solving the score function)
%
% INPUT:
%            yy = response
%            zz = design matrix (w/o columns of 1)
%         
%
% OUTPUT:
%          beta = Estimated regression coefficient (intercept, Z)
%        covmat = estimated covariance matrix
%
% NOTE:
%         If there is no Z, set zz = [];
%
%
design   = [ones(length(yy(:,1)),1) zz];
mm       = length(design(1,:));
beta_old = zeros(mm,1);
jj       = 0;
while jj < 999;
jj = jj + 1;
    gg   = design * beta_old;
    pp   = logistic(gg);
    rr   = pp .* (1 - pp);
    rr   = max(0.0001,min(0.9999,rr));
    hh   = design .* (rr * ones(1,mm));
    aa   = design' * hh;
    bb   = design' * (yy - pp);
    beta_new = beta_old + (inv(aa) * bb);
    epss = sum(abs(beta_old - beta_new));
    beta_old = beta_new;
    if epss < 0.00001;
        jj = 99999999;
     end;
     jj;
     epss;
end;
beta = beta_new;
covmat = inv(aa);



function t2=logistic(gg)
% computes the event probability
 t2=exp(gg) ./ (1+exp(gg));
  
  
function tp = perm_based_null_dist_2g(yy,ssnpdata,nsnp,B,outfile,model)    
fid=fopen(outfile,'a');
n = length(yy);
m = sum(nsnp);
ngene = length(nsnp);
for iperm=1:B
        % PERMUTATION
        permutedindex = randperm(n);
        y = yy;
        snpdata = ssnpdata;
        snpdata(:,1:nsnp(1)) = snpdata(permutedindex,1:nsnp(1));
    
        % TEST STATS
        tp(iperm,:) = stmainstinterlatent_teststats_2g(y,snpdata,nsnp,model);
    
        %    writing out
        if (length(model)==1)
            fprintf(fid,'%g   \n',tp);       
        elseif (length(model)==2)    
            fprintf(fid,'%g %g   \n',[tp(1) tp(2)]);
        elseif (length(model)==3)    
           fprintf(fid,'%g %g %g  \n',[tp(1) tp(2) tp(3)]);
        end;   
end;
fclose(fid);


function tn = nulldist_latent(y,snpdata,nsnp,outfile,nrgen)    
%  INPUT
%     y       = case-control status data
%     snpdata = snip data (first nsnp(1) is for the first gene, next
%                 nsnp(2)for the second)
%     nsnp    = vector of number of snips for each gene
%     nrgen   = number of set of n numbers generated from normal dist.
%  OUTPUT
%     outfile  = name of the file for which the null dist. of Tmax in
%                  latent model is written.)
% This function constructs the null dist. of the test statistic Tmax, used
% in latent model, by regenerating normal random variables. (the method is
% outlined on Page 12 of the paper)

% NOTE THAT: currently the gene being tested in this program is the first
%     gene in the model

fid=fopen(outfile,'a');
n = length(y);
m = sum(nsnp);
ngene = length(nsnp);
vtheta=(-10:0.1:10) ;
nv=1; %number of theta sets 
ui=zeros(nsnp(1),length(y),length(vtheta));
info=zeros(m+1,m+1,length(vtheta));
 % estimation under the null
 % designnull: design matrix under the null w/o the intercept term
 designnull = [snpdata(:,nsnp(1)+1:m)];
 [beta,covmat] = logistic_regression_poly(y,designnull);
 gg = [ones(n,1) designnull] * beta;
 p  = logistic(gg);
 % COMPUTING THE LATENT SCORE TEST STATISTIC (TMAX) 
 k=0;
 for theta=vtheta;
            k=k+1;
            tt = snpdata(:,nsnp(1)+1:m) * beta(2:length(beta));
            d  = snpdata(:,1:nsnp(1)) .* (1 + theta * repmat(tt,1,nsnp(1)));
            dother = [ones(n,1)  snpdata(:,nsnp(1)+1:m)];
            design=[ones(n,1) d  snpdata(:,nsnp(1)+1:m)];
            thisindex = [2:nsnp(1)+1];
            otherindex = [1 nsnp(1)+2:m+1];
            scoretestprm=individual_score_logistic(d',y,p);
            score=sum(scoretestprm,2);
            scoreother=individual_score_logistic(dother',y,p);
            info(:,:,k)=information_logistic(design,p);
            ui(:,:,k)=scoretestprm-info(thisindex,otherindex,k)*inv(info(otherindex,otherindex,k))*scoreother;
  end;
%THE NULL DISTRIBUTION of TMAX 
t10 = zeros(nrgen,1); %t10:null distribution
t = zeros(length(vtheta),1);
a = zeros(m+1,m+1);
for i=1:nrgen
        g=normrnd(0,1,n,1);
        t=zeros(length(vtheta),1);
        k=0;
        for theta=vtheta;
                k=k+1;                
                u0=ui(:,:,k) * g;
                a = inv(info(:,:,k));
                t(k)=u0' * a(thisindex,thisindex) *u0;
        end; %theta
        t10(i)=max(t); 
        fprintf(fid,'%g   \n',t10(i));       
end; %i
tn = t10;
fclose(fid);

  





    
  
       
             
     




  








  
  
  
  
  
  
  
  
  
  
  
