# gencsv: gene csv file # phecsv: phenotype csv file # phecsv: Covariate variables # islinear: TRUE: continuous; FALSE: non-continuous(default) (optional) # isexact: TRUE: exact method; FALSE: PSR (Pseudo response) method (default) (optional) # phename: phename: name of the trait to be calculated # phetype: trait type: binary (default), poisson, binomial (optional) # phenameother: if the trait type is binomial, this trait needs to be specified, the actual program automatically uses phetype/phenameother (optional) # cores: number of threads used, default is 1 # outpath: output location for result files (optional) # gen;phe;kindship;exec_time; if(!require('foreach')){ install.packages('foreach') } if(!require('doParallel')){ install.packages('doParallel') } library(foreach) library(doParallel) #no_cores <- detectCores() - 1 ggwas<-function(gencsv,phecsv,cvcsv=NULL,islinear=FALSE,isexact=FALSE,phename,phetype="islinear",phenameother=NULL,cores=1,outpath=getwd()){ islinear<<-islinear; getTime(); check(gencsv,phecsv,phetype,outpath); no_cores <- 1 registerDoParallel(cores=no_cores) mixed(gen,phe,phename,outpath); if(!islinear && !isexact){ glmm_approximate(gen,phe,cvcsv,kindship,phetype,phename,phenameother,outpath); }else if(!islinear && isexact){ glmm_exact(gen,phe,cvcsv,kindship,phetype,phename,phenameother,outpath); } } glm<-function(gencsv,phecsv,cvcsv=NULL,phename,phetype="binary",phenameother=NULL,outpath=getwd()){ getTime(); check(gencsv,phecsv,phetype,outpath); glm_exec(gen,phe,cvcsv,phetype,phename,phenameother,outpath) } ordinal<-function(gencsv,phecsv, phename, phenameother, outpath=getwd()){ getTime(); phetype<-"ordinal" #check(gencsv,phecsv,phetype,outpath); ordinal_exec(gencsv, phecsv, phename, phenameother, outpath); } pseudo_exact<-function(gencsv, phecsv, kkcsv, outpath=getwd()){ getTime(); pseudo_exact_exec(gencsv, phecsv, kkcsv, outpath) } pseudo_approximate_fast<-function(gencsv, phecsv, kkcsv, outpath){ getTime(); pseudo_null_exec(gencsv, phecsv, kkcsv, outpath); } method_c_binary_binomial_possion<-function(gencsv, phecsv, kkcsv,phetype,phename,phenameother, outpath){ getTime(); method_c_binary_binomial_possion_exec(gencsv, phecsv, kkcsv,phetype,phename,phenameother, outpath); } method_c_ordinal<-function(gencsv, phecsv, kkcsv, outpath){ getTime(); method_c_ordinal_exec(gencsv, phecsv, kkcsv, outpath); } check<- function(gencsv,phecsv,phetype,outpath){ ######################################Start of check section############################ sink(paste(outpath,'/log',exec_time,'.txt',sep = ""), split = TRUE) # The gene file format is compatible with txt and csv gen_suffix <- filename_with_extension <- sub(".*\\.(.*)", "\\1", gencsv); if(gen_suffix == "csv") { gen<-read.csv(file=gencsv,header=TRUE,check.names = FALSE) }else if(gen_suffix == "txt"){ gen<-read.table(file=gencsv, header = TRUE,check.names = FALSE) }else{ cat(paste("[INFO]", "The gene file type error!"),"\n") stop("gene file type error!") } # The phenotype file format is compatible with txt and csv phe_suffix <- filename_with_extension <- sub(".*\\.(.*)", "\\1", phecsv); if(phe_suffix == "csv"){ phe<-read.csv(file=phecsv,header=TRUE,check.names = FALSE); }else if(phe_suffix == "txt"){ phe<-read.table(file=phecsv,header=TRUE,check.names = FALSE); }else{ cat(paste("[INFO]", "The phenotype file type error!"),"\n") stop("phenotype file type error!") } # Determine if the gene file has column headers gen_column_head <- colnames(gen) if(gen_column_head[2] == "0" || gen_column_head[2] == "1"){ cat(paste("[INFO]", "The gene file has no column header,insert column header"),"\n") #head <- phe[,1] #XX <- c("XX", head) colnames(gen) <- phe[,1] }else{ cat(paste("[INFO]", "The gene file has a column header"),"\n") } # clear rows with all 0|1|2 non_special_rows <- apply(gen[, -1], 1, function(x) !(all(x == 0) | all(x == 1) | all(x == 2))) gen <- gen[non_special_rows, ] # Determine if the gene file has row headers gen_row_head<- gen[,1]; if(gen_row_head[2] == "0" || gen_row_head[2] == "1"){ cat(paste("[INFO]", "The gene file has no row header"),"\n") genn<<-gen z<-as.matrix(gen) }else{ cat(paste("[INFO]", "The gene file has a row header"),"\n") genn<<-gen[,-1] z<-as.matrix(gen[,-1]) } # Take the intersection of gene file and phenotype file myGDOrder = match(colnames(z),phe[,1]); myGDOrder<-na.omit(myGDOrder) phe <- phe[myGDOrder,] phe <<-subset(phe) gen <<- subset(z, select = phe[,1]) cat(paste("[INFO]", "Phenotype number:" ,length(colnames(phe))-1),"\n") cat(paste("[INFO]", "Number of effective individuals:" ,length(myGDOrder)),"\n") cat(paste("[INFO]", "Gene file bit number:" ,length(z[,1])),"\n") cat(paste("[INFO]", "Phenotype is:" ,phetype),"\n") sink() ######################################End of check section exportation z####################### } mixed <- function(gen,phe,phename,outpath=getwd()){ #y<-phe$kgw y<-as.matrix(eval(parse(text = paste0("phe$",phename)))) #dfL #z<-as.matrix(gen[,-1]) z<-gen m<-nrow(z) n<-ncol(z) kk<-t(z)%*%z d<-mean(diag(kk)) kk<-kk/d kkpath<-paste(outpath,'/kindship',exec_time,'.csv',sep="") #dfL kindship<<-kk; rownames(kindship)<<-NULL; if(islinear){ write.csv(x=kk,file=kkpath,row.names=F) qq<-eigen(kk,symmetric=T) x1<-as.matrix(rep(1,n)) estimate<-numeric(m) variance<-numeric(m) wald<-numeric(m) p<-numeric(m) parm<-mixed_exec(x=x1,y=y,qq=qq,kk=kk) for(k in 1:m){ x2<-as.matrix(z[k,]) x<-as.matrix(cbind(x1,x2)) parm<-mixed_exec(x=x,y=y,qq=qq,kk=kk) estimate[k]<-parm$beta[2] variance[k]<-parm$stderr[2]^2 wald[k]<-estimate[k]^2/variance[k] p[k]<-1-pchisq(wald[k],1) } result<-data.frame(estimate,variance,wald,p) threshold<-0.05/m pos<-which(p < threshold) significant<-result[pos,] sigout<-cbind(pos,significant) resultpath<-paste(outpath,'/GWAS-result',exec_time,'.csv',sep="") significantpath<-paste(outpath,'/GWAS-significant-SNPs',exec_time,'.csv',sep="") write.csv(x=result,file=resultpath,row.names=FALSE) write.csv(x=sigout,file=significantpath,row.names=FALSE) } } mixed_exec<-function(x,y,qq,kk){ loglike<-function(theta){ lambda<-exp(theta) logdt<-sum(log(lambda*delta+1)) h<-1/(lambda*delta+1) yy<-sum(yu*h*yu) yx<-matrix(0,q,1) xx<-matrix(0,q,q) for(i in 1:q){ yx[i]<-sum(yu*h*xu[,i]) for(j in 1:q){ xx[i,j]<-sum(xu[,i]*h*xu[,j]) } } loglike<- -0.5*logdt-0.5*(n-q)*log(yy-t(yx)%*%solve(xx)%*%yx)-0.5*log(det(xx)) return(-loglike) } fixed<-function(lambda){ h<-1/(lambda*delta+1) yy<-sum(yu*h*yu) yx<-matrix(0,q,1) xx<-matrix(0,q,q) for(i in 1:q){ yx[i]<-sum(yu*h*xu[,i]) for(j in 1:q){ xx[i,j]<-sum(xu[,i]*h*xu[,j]) } } beta<-solve(xx,yx) sigma2<-(yy-t(yx)%*%solve(xx)%*%yx)/(n-q) sigma2<-drop(sigma2) var<-diag(solve(xx)*sigma2) stderr<-sqrt(var) outfixed<-c(beta,stderr,sigma2) return(outfixed) } delta<-qq$values uu<-qq$vectors q<-ncol(x) n<-ncol(kk) vp<-var(y) yu<-t(uu)%*%y xu<-t(uu)%*%x theta<-0 parm<-optim(par=theta,fn=loglike,hessian = TRUE,method="L-BFGS-B",lower=-50,upper=10) lambda<-exp(parm$par) conv<-parm$convergence fn1<-parm$value fn0<-loglike(-Inf) lrt<-2*(fn0-fn1) hess<-parm$hessian parmfix<-fixed(lambda) beta<-parmfix[1:q] stderr<-parmfix[(q+1):(2*q)] sigma2<-parmfix[2*q+1] lod<-lrt/4.61 p_value<-1-pchisq(lrt,1) sigma2g<-lambda*sigma2 goodness<-(vp-sigma2)/vp par<-data.frame(lrt,beta,stderr,sigma2,lambda,sigma2g,lod,p_value) return(par) } glmm_approximate<-function(gen,phe,cvcsv=NULL,kindship,phetype,phename,phenameother,outpath){ #kk<-as.matrix(kk) kk <- kindship if(phetype=="binary" || phetype=="poisson"){ y<-as.matrix(eval(parse(text = paste0("phe$",phename)))) }else{ phe1<-as.matrix(eval(parse(text = paste0("phe$",phename)))) phe2<-as.matrix(eval(parse(text = paste0("phe$",phenameother)))) y<-phe1/phe2 } #z<-as.matrix(gen[,-1]) z<-gen n<-nrow(kk) m<-nrow(z) if (is.null(cvcsv)){ mm <- 1 x<-matrix(1,n,1) }else{ cv<-read.csv(file=cvcsv,header=T) cv<-as.matrix(cv) cvcol<-ncol(cv) mm<-cvcol column1 <- rep(1, n) x <- cbind(column1, apply(cv[, 2:mm, drop = FALSE], 2, as.numeric)) } iblup<-function(par){ err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 #b<-matrix(0,1,1) b<-matrix(0,mm,1) g<-matrix(0,n,1) while(iter < maxiter & err > minerr){ nv<-x%*%b+g if(phetype=="binary" || phetype=="binomial"){ d<-dnorm(nv) #binary or binomial trait mu<-pnorm(nv) #binary or binomial trait }else{ d<-exp(nv) #Poisson trait mu<-exp(nv)#Poisson } if(phetype=="binary"){ r<-c(mu*(1-mu)/(d^2+1e-10)) #binary trait }else if(phetype=="binomial"){ r<-c(mu*(1-mu)/(d^2+1e-10)/phe2) #binomial trait }else{ r<-c(mu/(d^2+1e-10)) #poisson trait } p<-(y-mu)/(d+1e-10)+nv v<-kk*par+diag(r) vi<-solve(v) xx<-t(x)%*%vi%*%x xy<-t(x)%*%vi%*%p b1<-solve(xx,xy) g1<-par*kk%*%vi%*%(p-x%*%b1) iter<-iter+1 err<-sum((b1-b)^2)+sum((g1-g)^2)/n b<-b1 g<-g1 } return(cbind(p,r)) } reml<-function(par){ v<-kk*par+diag(c(r)); vi<-solve(v) b<-solve(t(x)%*%vi%*%x,t(x)%*%vi%*%p) det1<-determinant(v) det2<-determinant(t(x)%*%vi%*%x) e<-p-x%*%b like<- -0.5*det1[[1]]-0.5*det2[[1]]-0.5*t(e)%*%vi%*%e return (-like) } err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ blup<-iblup(par0) p<-blup[,1] r<-blup[,2] parm<-optim(par=par0,fn=reml,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } id<-seq(1:n) pseudo<-data.frame(id,y,p,r,par) write.csv(x=pseudo,file=paste(outpath,'/pseudo',exec_time,'.csv',sep=""),row.names=F) #Data transformation n<-ncol(z) m<-nrow(z) r<-c(pseudo$r) s<-diag(1/sqrt(r)) aa<-s%*%kk%*%s qq<-eigen(aa,symmetric=T) d<-qq$values u<-qq$vectors x<-t(u)%*%s%*%matrix(1,n,1) psd<-t(u)%*%s%*%as.matrix(pseudo$p) widedata<-cbind(seq(1,n),x,y,psd,d) #for (k in 1:m){ # zk<-t(u)%*%s%*%as.matrix(z[k,]) # widedata<-cbind(widedata,zk) #} #widedata<-data.frame(widedata) ###并行化开始### # 预分配结果列表(可选,有助于提高效率) result_list <- vector("list", m) # 使用 foreach 进行并行计算 result_list <- foreach(k = 1:m, .combine='cbind') %dopar% { zk <- t(u) %*% s %*% as.matrix(z[k,]) return(zk) } # 将结果合并到 widedata widedata <- cbind(widedata, result_list) # 确保 widedata 是数据框 widedata <- as.data.frame(widedata) #停止并行后端(完成并行计算后执行) stopImplicitCluster() ###并行化结束### varname<-c("id","x","y","p","d") for(k in 1:m){ varname<-c(varname,paste("z",k,sep="")) } names(widedata)<-varname write.csv(x=widedata,file=paste(outpath,'/widedata',exec_time,'.csv',sep=""),row.names=F) x<-as.matrix(widedata$x) p<-as.matrix(widedata$p) d<-as.matrix(widedata$d) g<-as.matrix(widedata[,-(1:5)]) n<-nrow(g) m<-ncol(g) mixed<-function(par){ q<-ncol(z) zz<-matrix(0,q,q) zp<-matrix(0,q,1) h<-1/(par*d+1) for(i in 1:q){ zp[i]<-sum(z[,i]*p*h) for(j in 1:q){ zz[i,j]<-sum(z[,i]*z[,j]*h) zz[j,i]<-zz[i,j] } } t1<-sum(log(par*d+1)) t2<-log(det(zz)) g<-solve(zz,zp) t3<-sum((p-z%*%g)^2*h) like<- -(t1+t2+t3)/2 return (-like) } fixed<-function(par){ q<-ncol(z) zz<-matrix(0,q,q) zp<-matrix(0,q,1) h<-1/(par*d+1) for(i in 1:q){ zp[i]<-sum(z[,i]*p*h) for(j in 1:q){ zz[i,j]<-sum(z[,i]*z[,j]*h) zz[j,i]<-zz[i,j] } } zzi<-solve(zz) g<-zzi%*%zp wald<-g[2]^2/zzi[2,2] p<-1-pchisq(wald,1) return (c(g[2],wald,p)) } www<-NULL for(k in 1:m){ z<-cbind(x,g[,k]) par<-1 parm<-optim(par=par,fn=mixed,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par test<-fixed(par) effect<-test[1] wald<-test[2] pvalue<-test[3] www<-rbind(www,c(k,par,effect,wald,pvalue)) } varnames<-c("marker","variance", "effect", "wald", "p") www<-data.frame(www) names(www)<-varnames write.csv(x=www,file=paste(outpath,'/GWAS-result',exec_time,'.csv',sep=""),row.names=F) } glmm_exact<-function(gen,phe,cvcsv=NULL,kindship,phetype,phename,phenameother,outpath){ kk<-kindship #y<-as.matrix(phe$OsC1) if(phetype=="binary" || phetype=="poisson"){ y<-as.matrix(eval(parse(text = paste0("phe$",phename)))) }else{ phe1<-as.matrix(eval(parse(text = paste0("phe$",phename)))) phe2<-as.matrix(eval(parse(text = paste0("phe$",phenameother)))) y<-phe1/phe2 } #z<-gen[,-1] z<-gen n<-nrow(kk) m<-nrow(z) if (is.null(cvcsv)){ mm <- 2 x0<-matrix(1,n,1) }else{ mm <- 2 x0<-matrix(1,n,1) #cv<-read.csv(file=cvcsv,header=T) #cv<-as.matrix(cv) #cvcol<-ncol(cv) #mm<-cvcol #column1 <- rep(1, n) #x0 <- cbind(column1, apply(cv[, 2:mm, drop = FALSE], 2, as.numeric)) } iblup_exact<-function(par){ err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 #b<-matrix(0,2,1) b<-matrix(0,mm,1) g<-matrix(0,n,1) while(iter < maxiter & err > minerr){ nv<-x%*%b+g if(phetype=="binary" || phetype=="binomial"){ d<-dnorm(nv) #binary or binomial trait mu<-pnorm(nv) #binary or binomial trait }else{ d<-exp(nv) #Poisson trait mu<-exp(nv)#Poisson } if(phetype=="binary"){ r<-c(mu*(1-mu)/(d^2+1e-10)) #binary trait }else if(phetype=="binomial"){ r<-c(mu*(1-mu)/(d^2+1e-10)/phe2) #binomial trait }else{ r<-c(mu/(d^2+1e-10)) #poisson trait } p<-(y-mu)/(d+1e-10)+nv v<-kk*par+diag(r) vi<-solve(v) xx<-t(x)%*%vi%*%x xy<-t(x)%*%vi%*%p xxi<-solve(xx) sb<-as.matrix(sqrt(diag(xxi))) b1<-xxi%*%xy g1<-par*kk%*%vi%*%(p-x%*%b1) iter<-iter+1 err<-sum((b1-b)^2)+sum((g1-g)^2)/n b<-b1 g<-g1 } return(list(cbind(p,r),cbind(b,sb))) } reml<-function(par){ v<-kk*par+diag(c(r)); vi<-solve(v) b<-solve(t(x)%*%vi%*%x,t(x)%*%vi%*%p) det1<-determinant(v) det2<-determinant(t(x)%*%vi%*%x) e<-p-x%*%b like<- -0.5*det1[[1]]-0.5*det2[[1]]-0.5*t(e)%*%vi%*%e return (-like) } start<-Sys.time() gwas<-NULL for(k in 1:m){ x<-cbind(x0,as.matrix(z[k,])) err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ blup<-iblup_exact(par0) p<-blup[[1]][,1] r<-blup[[1]][,2] parm<-optim(par=par0,fn=reml,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } beta<-blup[[2]][,1] sb<-blup[[2]][,2] wald<-(beta[2]/sb[2])^2 p<-1-pchisq(wald,1) out<-c(k,par,beta,sb,wald,p) gwas<-rbind(gwas,out) } gwas<-data.frame(gwas) names(gwas)<-c("snp","lambda","b0","b1","s0","s1","wald","p") write.csv(x=gwas,file=paste(outpath,'/GWAS-result',exec_time,'.csv',sep=""),row.names=F) end<-Sys.time() c(start,end) #b1 is the QTL effect #s1 is the standard error #wald test = b1^2/s1^2 #p is the p-value } glm_exec<-function(gen,phe,cvcsv=NULL,phetype,phename,phenameother,outpath){ if(phetype=="binary" || phetype=="poisson"){ y<-as.matrix(eval(parse(text = paste0("phe$",phename)))) }else{ phe1<-as.matrix(eval(parse(text = paste0("phe$",phename)))) phe2<-as.matrix(eval(parse(text = paste0("phe$",phenameother)))) y<-phe1/phe2 } n<-length(y) if (is.null(cvcsv)){ x0<-matrix(1,n,1) }else{ x0<-matrix(1,n,1) #cv<-read.csv(file=cvcsv,header=T) #cv<-as.matrix(cv) #mm <- 2 #column1 <- rep(1, n) #x0 <- cbind(column1, apply(cv[, 2:mm, drop = FALSE], 2, as.numeric)) } #gen<-gen[,-1] m<-nrow(genn) b0<-as.matrix(c(0,0)) aaa<-NULL for(k in 1:m){ z<-t(genn[k,]) x<-cbind(x0,z) iter<-0 maxiter<-100 err<-1e8 minerr<-1e-8 while(err > minerr & iter < maxiter){ if(b0[2] < -8){b0[2]<- -8} if(b0[2] > 8) {b0[2] <- 8} nv<-x%*%b0 #mu<-pnorm(nv) #binary and binomial #d<-dnorm(nv) #binary and binomial #v<-mu*(1-mu) #binary #v<-mu*(1-mu)/trial #binomial #mu<-exp(nv) #POISSON #d<-exp(nv) #POISSON #v<-exp(nv)#POISSON if(phetype=="binary" || phetype=="binomial"){ d<-dnorm(nv) #binary or binomial trait mu<-pnorm(nv) #binary or binomial trait if(phetype=="binary"){ v<-mu*(1-mu) }else{ v<-mu*(1-mu)/phe2#binomial } }else{ d<-exp(nv) #Poisson trait mu<-exp(nv)#Poisson v<-exp(nv) } p<-(y-mu)/(d+1e-5)+nv r<-v/(d^2+1e-5) r<-diag(c(r)) xx<-t(x)%*%r%*%x xy<-t(x)%*%r%*%p xxi<-solve(xx+diag(2)*1e-5) b<-xxi%*%xy err<-sum((b0-b)^2) iter<-iter+1 b0<-b } g<-b[2] s<-sqrt(xxi[2,2]) wald<-g^2/s^2 pvalue<-1-pchisq(wald,1) out<-c(k,iter,err,g,s,wald,pvalue) aaa<-rbind(aaa,out) } aaa<-data.frame(aaa) names(aaa)<-c("SNP","iter","err","effect","stderr","wald","p") #write.csv(x=aaa,file="scan-Binary.csv",row.names=F) #write.csv(x=aaa,file="scan-Poisson.csv",row.names=F) write.csv(x=aaa,file=paste(outpath,'/GWAS-result',exec_time,'.csv',sep=""),row.names=F) } ordinal_exec<-function(gencsv, phecsv, phename, phenameother, outpath){ phe<-read.csv(phecsv) gen<-read.csv(gencsv) n<-nrow(phe) m<-nrow(gen) z<-gen[,-c(1:5)] #ordinal<-phe$phename ordinal<-eval(parse(text = paste0("phe$",phename))); s<-factor(ordinal) y<-model.matrix(~s-1) c<-ncol(y) #x0<-phe$phenameother x0<-ordinal<-eval(parse(text = paste0("phe$",phenameother))); x<-cbind(as.matrix(x0),t(z[1,])) probit<-function(x,y){ c<-ncol(y) n<-nrow(x) q<-ncol(x) a<-matrix(0,c+1,1) mu<-matrix(1,c,0) b<-matrix(0,q,1) a[1]<--1e10 a[c+1]<-1e10 g0<-rbind(as.matrix(a[2:c]),b) maxiter<-100 minerr<-1e-8 err<-1e8 iter<-0 while(iterminerr){ dwd<-matrix(0,c-1+q,c-1+q) dwy<-matrix(0,c-1+q,1) for(i in 1:n){ for(k in 1:c){ mu[k]<-pnorm(a[k+1]+x[i,]%*%b)-pnorm(a[k]+x[i,]%*%b) if (mu[k]<1e-8){mu[k]<-1e-8} if (mu[k]>(1-1e-8)){mu[k]<-(1-1e-8)} } mu<-mu/sum(mu) d<-matrix(0,c,c-1+q) for(k in 2:(c-1)){ d[k,k-1]<- -dnorm(a[k]+x[i,]%*%b) d[k,k]<-dnorm(a[k+1]+x[i,]%*%b) } d[1,1]<-dnorm(a[2]+x[i,]%*%b) d[c,c-1]<--dnorm(a[c]+x[i,]%*%b) for(k in 1:c){ d[k,c:(c+q-1)]<-x[i,]*(dnorm(a[k+1]+x[i,]%*%b)-dnorm(a[k]+x[i,]%*%b)) } w<-matrix(diag(1/mu),ncol=c) dwd<-dwd+t(d)%*%w%*%d dwy<-dwy+t(d)%*%w%*%matrix(y[i,]-mu,c,1) } v<-solve(dwd+diag(c-1+q)*1e-8) g<-g0+v%*%dwy w<-g[c-1+q]^2/v[c-1+q,c-1+q] p<-1-pchisq(w,1) a[2:c]<-g[1:(c-1)] b<-g[c:(c-1+q)] iter<-iter+1 err<-sum((g-g0)^2) out<-c(iter,err,c(g),w,p) g0<-g } return(out) } scan<-numeric(0) for(k in 1:m){ x<-cbind(x0,t(z[k,])) out<-probit(x,y) scan<-rbind(scan,c(k,out)) } scan<-data.frame(scan) names(scan)<-c("marker","iter", "err","intercept1","intercept2","x","effect","Wald","p") write.csv(x=scan,file=paste(outpath,'/GWAS-result',exec_time,'.csv',sep=""),row.names=F) } pseudo_exact_exec<-function(gencsv, phecsv, kkcsv, outpath){ iblup_pseudo<-function(vg){ q<-ncol(x) a<-matrix(0,c+1,1) mu<-matrix(0,1,c) b<-matrix(0,q,1) a[1]<--1e3 a[c+1]<-1e3 a[2:c]<-0 g0<-matrix(0,c-1+q,1) h0<-matrix(0,n0,1) maxiter<-100 minerr<-1e-8 err<-1e8 iter<-0 out<-NULL while((iterminerr)){ rr<-NULL ps<-NULL eta<-NULL pp<-list() for(i in 1:n){ zh<-sum(Z[i*(c-1)-1,]*h0) xb<-sum(x[i,]*b) for(k in 1:c){ mu[k]<-pnorm(c(a[k+1]+xb+zh))-pnorm(c(a[k]+xb+zh)) if(mu[k]<1e-4){mu[k]<-1e-4} if(mu[k]>(1-1e-4)){mu[k]<-(1-1e-4)} } mu<-mu/sum(mu) d<-matrix(0,c,c-1) for(k in 2:(c-1)){ d[k,k-1]<--dnorm(a[k]+xb+zh) d[k,k]<-dnorm(a[k+1]+xb+zh) } d[1,1]<-dnorm(a[2]+xb+zh) d[c,c-1]<--dnorm(a[c]+xb+zh) w<-diag(1/c(mu)) dwd<-solve(t(d)%*%w%*%d+diag(c-1)*1e-5) eta<-rbind(eta,(diag(c-1)%*%as.matrix(a[2:c])+matrix(1,c-1,1)%*%xb+matrix(1,c-1,1)%*%zh)) ps<-rbind(ps,dwd%*%t(d)%*%w%*%as.matrix(c(y[i,]-mu))) pp[[i]]<-dwd } rr<-as.matrix(bdiag(pp)) v<-Z%*%t(Z)*vg+rr vi<-solve(v) g<-solve(t(X)%*%vi%*%X)%*%t(X)%*%vi%*%as.matrix(ps+eta) g<-as.matrix(g) h<-vg*t(Z)%*%vi%*%(ps+eta-X%*%g) a[2:c]<-g[1:(c-1)] b<-as.matrix(g[c:(c-1+q)]) iter<-iter+1 err<-sum((g-g0)^2)+sum((h-h0)^2) out<-rbind(out,cbind(iter,err,t(g))) g0<-g h0<-h } pe<-ps+eta pseudo<-list(pe,rr) return(pseudo) } #Define the restricted log likelihood function reml_pseudo<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vv<-t(X)%*%vi%*%X b<-solve(vv)%*%t(X)%*%vi%*%p det1<-unlist(determinant(v))[1] det2<-unlist(determinant(vv))[1] e<-p-X%*%b like<--0.5*det1-0.5*det2-0.5*t(e)%*%vi%*%e return (-as.matrix(like)) } #Generate fixed effects. Under the null model, fixed effects are the intercepts or threshold values fixed_pseudo<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vb<-solve(t(X)%*%vi%*%X) b<-vb%*%t(X)%*%vi%*%p return(list(b,vb)) } phe<-read.csv(file=phecsv) kk<-read.csv(file=kkcsv) gen<-read.csv(file=gencsv) #rm(list=ls()) #Read data library(Matrix) #dir<-"/Users/defuliu/workspace/Rproject/GLMMGWAS" #setwd(dir) G<-gen[,-c(1:5)] m<-nrow(G) #Eigen decomposition kk<-as.matrix(kk) n<-nrow(phe) qq<-eigen(kk,symmetric=T) dd<-qq$values uu<-qq$vectors indx<-which(dd>1e-8) n0<-length(indx) dd<-dd[indx] uu<-uu[,indx] u<-uu%*%diag(sqrt(dd)) s<-as.factor(phe$Ordinal) y<-model.matrix(~s-1) c<-ncol(y) X1<-NULL for(i in 1:n){ X1<-rbind(X1,diag(c-1)) } X2<-as.matrix(bdiag(replicate(n,matrix(1,c-1,1),simplify=FALSE))) Z<-X2%*%u #Doubly iterative algorithm to generate the pseudo response variable and the residual variance matrix Sys.time() www<-NULL for(k in 1:m){ x<-X2%*%t(G[k,]) X<-cbind(X1,x) f<-ncol(X) err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ pp<-iblup_pseudo(par0) p<-pp[[1]] rr<-pp[[2]] parm<-optim(par=par0,fn=reml_pseudo,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } #Output of the fixed effects (interecepts or thresholds) bb<-fixed_pseudo(par) effect<-bb[[1]][f] #fixed effects (thresholds or intercepts) stderr<-sqrt(bb[[2]][f,f]) #variable matrix of the fixed effects wald<-(effect/stderr)^2 pvalue<-1-pchisq(wald,1) www<-rbind(www,c(k,par,effect,stderr,wald,pvalue)) } www<-data.frame(www) names(www)<-c("SNP","Variance","Effect","Stderr","Wald","p") write.csv(x=www,file=paste(outpath,'/scan-exact-trial.csv',exec_time,'.csv',sep=""),row.names=F) Sys.time() } pseudo_null_exec<-function(gencsv, phecsv, kkcsv, outpath){ iblup0<-function(vg){ a<-matrix(0,c+1,1) mu<-matrix(0,1,c) a[1]<--1e3 a[c+1]<-1e3 a[2:c]<-0 g0<-a[2:c] h0<-matrix(0,n0,1) maxiter<-100 minerr<-1e-8 err<-1e8 iter<-0 out<-NULL while((iterminerr)){ rr<-NULL ps<-NULL eta<-NULL pp<-list() for(i in 1:n){ zh<-sum(Z[i*(c-1)-1,]*h0) for(k in 1:c){ mu[k]<-pnorm(c(a[k+1]+zh))-pnorm(c(a[k]+zh)) if(mu[k]<1e-4){mu[k]<-1e-4} if(mu[k]>(1-1e-4)){mu[k]<-(1-1e-4)} } mu<-mu/sum(mu) d<-matrix(0,c,c-1) for(k in 2:(c-1)){ d[k,k-1]<--dnorm(a[k]+zh) d[k,k]<-dnorm(a[k+1]+zh) } d[1,1]<-dnorm(a[2]+zh) d[c,c-1]<--dnorm(a[c]+zh) w<-diag(1/c(mu)) dwd<-solve(t(d)%*%w%*%d+diag(c-1)*1e-5) eta<-rbind(eta,(diag(c-1)%*%as.matrix(a[2:c])+matrix(1,c-1,1)%*%zh)) ps<-rbind(ps,dwd%*%t(d)%*%w%*%as.matrix(c(y[i,]-mu))) pp[[i]]<-dwd } rr<-as.matrix(bdiag(pp)) v<-Z%*%t(Z)*vg+rr vi<-solve(v) g<-solve(t(X)%*%vi%*%X)%*%t(X)%*%vi%*%as.matrix(ps+eta) h<-vg*t(Z)%*%vi%*%(ps+eta-X%*%g) a[2:c]<-g[1:(c-1)] iter<-iter+1 err<-sum((g-g0)^2)+sum((h-h0)^2) out<-rbind(out,cbind(iter,err,t(g))) g0<-g h0<-h } pe<-ps+eta pseudo<-list(pe,rr) return(pseudo) } #Define the restricted log likelihood function reml<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vv<-t(X)%*%vi%*%X b<-solve(vv)%*%t(X)%*%vi%*%p det1<-unlist(determinant(v))[1] det2<-unlist(determinant(vv))[1] e<-p-X%*%b like<--0.5*det1-0.5*det2-0.5*t(e)%*%vi%*%e return (-as.matrix(like)) } #Generate fixed effects. Under the null model, fixed effects are the intercepts or threshold values fixed<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vb<-solve(t(X)%*%vi%*%X) b<-vb%*%t(X)%*%vi%*%p return(list(b,vb)) } #Read data library(Matrix) #setwd(dir) phe<-read.csv(file=phecsv) kk0<-read.csv(file=kkcsv) #Eigen decomposition kk<-as.matrix(kk0) n<-nrow(phe) qq<-eigen(kk,symmetric=T) dd<-qq$values uu<-qq$vectors indx<-which(dd>1e-8) n0<-length(indx) dd<-dd[indx] uu<-uu[,indx] u<-uu%*%diag(sqrt(dd)) s<-as.factor(phe$Ordinal) y<-model.matrix(~s-1) c<-ncol(y) X1<-NULL for(i in 1:n){ X1<-rbind(X1,diag(c-1)) } X2<-as.matrix(bdiag(replicate(n,matrix(1,c-1,1),simplify=FALSE))) X<-X1 Z<-X2%*%u #Doubly iterative algorithm to generate the pseudo response variable and the residual variance matrix err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ pp<-iblup0(par0) p<-pp[[1]] rr<-pp[[2]] parm<-optim(par=par0,fn=reml,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } par #Collect the parameters into a list output0<-list(par,p,rr) #Output of the fixed effects (interecepts or thresholds) bb<-fixed(par) bb[[1]] #fixed effects (thresholds or intercepts) bb[[2]] #variable matrix of the fixed effects #file=paste(outpath,'/X2.csv',exec_time,'.csv',sep="") write.csv(x=par,file=paste(outpath,'/parameter',exec_time,'.csv',sep=""),row.names=F) write.csv(x=p,file=paste(outpath,'/pseudo',exec_time,'.csv',sep=""),row.names=F) write.csv(x=rr,file=paste(outpath,'/residual',exec_time,'.csv',sep=""),row.names=F) write.csv(x=X1,file=paste(outpath,'/X1',exec_time,'.csv',sep=""),row.names=F) write.csv(x=X2,file=paste(outpath,'/X2',exec_time,'.csv',sep=""),row.names=F) pseudo_approximate_exec(gencsv, phecsv, kkcsv, outpath, par, p, rr, X1, X2) } pseudo_approximate_exec<-function(gencsv, phecsv, kkcsv, outpath, par, p, rr, X1, X2){ mixed<-function(par){ q<-ncol(z) zz<-matrix(0,q,q) zp<-matrix(0,q,1) h<-1/(par*d+1) for(i in 1:q){ zp[i]<-sum(z[,i]*y*h) for(j in 1:q){ zz[i,j]<-sum(z[,i]*z[,j]*h) zz[j,i]<-zz[i,j] } } t1<-sum(log(par*d+1)) t2<-log(det(zz+diag(q)*1e-5)) g<-solve(zz+diag(q)*1e-5,zp) t3<-sum((y-z%*%g)^2*h) like<- -(t1+t2+t3)/2 return (-like) } fixed<-function(par){ q<-ncol(z) zz<-matrix(0,q,q) zp<-matrix(0,q,1) h<-1/(par*d+1) for(i in 1:q){ zp[i]<-sum(z[,i]*y*h) for(j in 1:q){ zz[i,j]<-sum(z[,i]*z[,j]*h) zz[j,i]<-zz[i,j] } } vi<-solve(zz+diag(q)*1e-5) b<-vi%*%zp return (list(b[q],vi[q,q])) } library(Matrix) #dir<-"/Users/defuliu/research/GLMMGWAS/明亮分享/0422/" #setwd(dir) #par<-read.csv(file="parameter.csv",header=T) #pseudo<-read.csv(file="pseudo.csv",header=T) #residual<-read.csv(file="residual.csv") #X1<-read.csv(file="X1.csv") #X2<-read.csv(file="X2.csv") par<= par pseudo<-p residual<-rr kk<-read.csv(file=kkcsv) gen<-read.csv(file=gencsv) kk<-as.matrix(kk[,-c(1,2)]) gen<-as.matrix(gen[,-c(1:5)]) n<-ncol(gen) m<-nrow(gen) rr<-as.matrix(residual) p<-as.matrix(pseudo) X1<-as.matrix(X1) X2<-as.matrix(X2) chol<-chol(solve(rr)) K<-t(chol)%*%X2%*%kk%*%t(X2)%*%chol qq<-eigen(K,symmetric=T) d<-qq$values u<-qq$vectors y<-t(u)%*%t(chol)%*%p z0<-t(u)%*%t(chol)%*%X1 Sys.time() www<-NULL for(k in 1:m){ zk<-t(u)%*%t(chol)%*%X2%*%as.matrix(gen[k,]) z<-cbind(z0,zk) par0<-1 parm<-optim(par=par0,fn=mixed,method="L-BFGS-B",lower=0,upper=1e5) par1<-parm$par bb<-fixed(par1) variance<-par1 effect<- bb[[1]] stderr<- sqrt(bb[[2]]) wald<- (effect/stderr)^2 pvalue<- 1-pchisq(wald,1) www<-rbind(www,c(k,variance,effect,stderr,wald,pvalue)) } www<-data.frame(www) names(www)<-c("SNP","Variance","Effect","Stderr","Wald","p") #file=paste(outpath,'/scan-poly-approximate-fast-test.csv',exec_time,'.csv',sep="") write.csv(www,file=paste(outpath,'/scan-poly-approximate-fast-test.csv',exec_time,'.csv',sep=""),row.names=F) Sys.time() } method_c_binary_binomial_possion_exec <- function(gencsv, phecsv, kkcsv, phetype, phename, phenameother, outpath) { ## Input data gen <- read.csv(file = gencsv, header = TRUE) phe <- read.csv(file = phecsv, header = TRUE) kk <- read.csv(file = kkcsv, header = TRUE) kk <- as.matrix(kk) # Select trait based on phetype if(phetype=="binary" || phetype=="poisson"){ y<-as.matrix(eval(parse(text = paste0("phe$",phename)))) }else{ phe1<-as.matrix(eval(parse(text = paste0("phe$",phename)))) phe2<-as.matrix(eval(parse(text = paste0("phe$",phenameother)))) y<-phe1/phe2 } z<-as.matrix(gen[,-1]) n<-nrow(kk) m<-nrow(z) x0<-matrix(1,n,1) x<-matrix(1,n,1) iblup<-function(par){ err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 b<-matrix(0,1,1) g<-matrix(0,n,1) while(iter < maxiter & err > minerr){ nv<-x%*%b+g if (phetype == "binary" || phetype == "binomial") { d <- dnorm(nv) # Normal density for binary or binomial trait mu <- pnorm(nv) # CDF for binary or binomial trait if (phetype == "binary") { r <- c(mu * (1 - mu) / (d^2 + 1e-10)) # Variance function for binary trait } else { r <- c(mu * (1 - mu) / (d^2 + 1e-10) / Trial) # Adjusted for binomial trials } } else if (phetype == "poisson") { d <- exp(nv) # Rate for Poisson trait mu <- exp(nv) # Mean for Poisson trait r <- c(mu / (d^2 + 1e-10)) # Variance function for Poisson trait } p<-(y-mu)/(d+1e-10)+nv v<-kk*par+diag(r) vi<-solve(v) xx<-t(x)%*%vi%*%x xy<-t(x)%*%vi%*%p b1<-solve(xx,xy) g1<-par*kk%*%vi%*%(p-x%*%b1) iter<-iter+1 err<-sum((b1-b)^2)+sum((g1-g)^2)/n b<-b1 g<-g1 } return(cbind(p,r)) } reml<-function(par){ v<-kk*par+diag(c(r)); vi<-solve(v) b<-solve(t(x)%*%vi%*%x,t(x)%*%vi%*%p) det1<-determinant(v) det2<-determinant(t(x)%*%vi%*%x) e<-p-x%*%b like<- -0.5*det1[[1]]-0.5*det2[[1]]-0.5*t(e)%*%vi%*%e return (-like) } pstart_time <- proc.time() err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ blup<-iblup(par0) p<-blup[,1] r<-blup[,2] parm<-optim(par=par0,fn=reml,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } id<-seq(1:n) pseudo<-data.frame(id,y,p,r,par) pend_time <- proc.time() - pstart_time cat("\np_time : ", pend_time ," seconds\n", file = paste(outpath,'/time',exec_time,'.txt',sep=""), append = TRUE) write.csv(x=pseudo,file=paste(outpath,'/pseudo',exec_time,'.csv',sep=""),row.names=F) iblup000<-function(par){ err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 b<-matrix(0,2,1) g<-matrix(0,n,1) while(iter < maxiter & err > minerr){ nv<-x%*%b+g if (phetype == "binary" || phetype == "binomial") { d <- dnorm(nv) # Normal density for binary or binomial trait mu <- pnorm(nv) # CDF for binary or binomial trait if (phetype == "Binary") { r <- c(mu * (1 - mu) / (d^2 + 1e-10)) # Variance function for binary trait } else { r <- c(mu * (1 - mu) / (d^2 + 1e-10) / Trial) # Adjusted for binomial trials } } else if (phetype == "poisson") { d <- exp(nv) # Rate for Poisson trait mu <- exp(nv) # Mean for Poisson trait r <- c(mu / (d^2 + 1e-10)) # Variance function for Poisson trait } p<-(y-mu)/(d+1e-10)+nv v<-kk*par+diag(r) vi<-solve(v) xx<-t(x)%*%vi%*%x xy<-t(x)%*%vi%*%p xxi<-solve(xx) sb<-as.matrix(sqrt(diag(xxi))) b1<-xxi%*%xy g1<-par*kk%*%vi%*%(p-x%*%b1) iter<-iter+1 err<-sum((b1-b)^2)+sum((g1-g)^2)/n b<-b1 g<-g1 } return(list(cbind(p,r),cbind(b,sb))) } gstart_time <- proc.time() gwas<-NULL for(k in 1:m){ x<-cbind(x0,as.matrix(z[k,])) err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 blup<-iblup000(par) beta<-blup[[2]][,1] sb<-blup[[2]][,2] wald<-(beta[2]/sb[2])^2 pv<-1-pchisq(wald,1) out<-c(k,par,beta,sb,wald,pv) gwas<-rbind(gwas,out) } gwas<-data.frame(gwas) #names(gwas)<-c("snp","lambda","b0","b1","s0","s1","wald","p") names(gwas)<-c("snp","lambda","b0","effect","s0","standard error","wald","p-value") write.csv(x=gwas,file=paste(outpath,'/gwas',exec_time,'.csv',sep=""),row.names=F) gend_time <- proc.time() - gstart_time cat("\np_time : ", gend_time ," seconds\n", file = paste(outpath,'/time',exec_time,'.txt',sep=""), append = TRUE) #b1 is the QTL effect #s1 is the standard error #wald test = b1^2/s1^2 #p is the p-value } method_c_ordinal_exec<-function(gencsv, phecsv, kkcsv, outpath){ #Read data gen<-read.csv(file=gencsv,header=TRUE) phe<-read.csv(file=phecsv,header=TRUE) kk<-read.csv(file=kkcsv,header=T) #Generate pseudo response and the residual covariance matrix iblup0<-function(vg){ a<-matrix(0,c+1,1) mu<-matrix(0,1,c) a[1]<--1e3 a[c+1]<-1e3 a[2:c]<-0 g0<-a[2:c] h0<-matrix(0,n0,1) maxiter<-100 minerr<-1e-8 err<-1e8 iter<-0 out<-NULL while((iterminerr)){ rr<-NULL ps<-NULL eta<-NULL pp<-list() for(i in 1:n){ zh<-sum(Z[i*(c-1)-1,]*h0) for(k in 1:c){ mu[k]<-pnorm(c(a[k+1]+zh))-pnorm(c(a[k]+zh)) if(mu[k]<1e-4){mu[k]<-1e-4} if(mu[k]>(1-1e-4)){mu[k]<-(1-1e-4)} } mu<-mu/sum(mu) d<-matrix(0,c,c-1) for(k in 2:(c-1)){ d[k,k-1]<--dnorm(a[k]+zh) d[k,k]<-dnorm(a[k+1]+zh) } d[1,1]<-dnorm(a[2]+zh) d[c,c-1]<--dnorm(a[c]+zh) w<-diag(1/c(mu)) dwd<-solve(t(d)%*%w%*%d+diag(c-1)*1e-5) eta<-rbind(eta,(diag(c-1)%*%as.matrix(a[2:c])+matrix(1,c-1,1)%*%zh)) ps<-rbind(ps,dwd%*%t(d)%*%w%*%as.matrix(c(y[i,]-mu))) pp[[i]]<-dwd } rr<-as.matrix(bdiag(pp)) v<-Z%*%t(Z)*vg+rr vi<-solve(v) g<-solve(t(X)%*%vi%*%X)%*%t(X)%*%vi%*%as.matrix(ps+eta) h<-vg*t(Z)%*%vi%*%(ps+eta-X%*%g) a[2:c]<-g[1:(c-1)] iter<-iter+1 err<-sum((g-g0)^2)+sum((h-h0)^2) out<-rbind(out,cbind(iter,err,t(g))) g0<-g h0<-h } pe<-ps+eta pseudo<-list(pe,rr) return(pseudo) } #Define the restricted log likelihood function reml<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vv<-t(X)%*%vi%*%X b<-solve(vv)%*%t(X)%*%vi%*%p det1<-unlist(determinant(v))[1] det2<-unlist(determinant(vv))[1] e<-p-X%*%b like<--0.5*det1-0.5*det2-0.5*t(e)%*%vi%*%e return (-as.matrix(like)) } #Generate fixed effects. Under the null model, fixed effects are the intercepts or threshold values fixed<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vb<-solve(t(X)%*%vi%*%X) b<-vb%*%t(X)%*%vi%*%p return(list(b,vb)) } #Eigen decomposition EDstart_time <- proc.time() kk<-as.matrix(kk) G<-gen[,-1] m<-nrow(G) n<-nrow(phe) qq<-eigen(kk,symmetric=T) dd<-qq$values uu<-qq$vectors indx<-which(dd>1e-8) n0<-length(indx) dd<-dd[indx] uu<-uu[,indx] u<-uu%*%diag(sqrt(dd)) s<-as.factor(phe$Ordinal) y<-model.matrix(~s-1) c<-ncol(y) X1<-NULL for(i in 1:n){ X1<-rbind(X1,diag(c-1)) } X2<-as.matrix(bdiag(replicate(n,matrix(1,c-1,1),simplify=FALSE))) X<-X1 Z<-X2%*%u EDend_time <- proc.time() - EDstart_time cat("\nED_time : ", EDend_time ," seconds\n", file = paste(outpath,'/time',exec_time,'.txt',sep=""), append = TRUE) #Doubly iterative algorithm to generate the pseudo response variable and the residual variance matrix DIstart_time <- proc.time() err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 par0<-1 while(iter < maxiter & err > minerr){ pp<-iblup0(par0) p<-pp[[1]] rr<-pp[[2]] parm<-optim(par=par0,fn=reml,method="L-BFGS-B",lower=0,upper=1e5) par<-parm$par iter<-iter+1 err<-(par-par0)^2 par0<-par } par #Collect the parameters into a list output0<-list(par,p,rr) #Output of the fixed effects (interecepts or thresholds) bb<-fixed(par) bb[[1]] #fixed effects (thresholds or intercepts) bb[[2]] #variable matrix of the fixed effects #write.csv(x=par,file="Ordinal/approximate_c/parameter.csv",row.names=F) #write.csv(x=p,file="Ordinal/approximate_c/pseudo.csv",row.names=F) #write.csv(x=rr,file="Ordinal/approximate_c/residual.csv",row.names=F) #write.csv(x=X1,file="Ordinal/approximate_c/X1.csv",row.names=F) #write.csv(x=X2,file="Ordinal/approximate_c/X2.csv",row.names=F) write.csv(x=par,file=paste(outpath,'parameter',exec_time,'.csv',sep=""),row.names=F) write.csv(x=p,file=paste(outpath,'pseudo',exec_time,'.csv',sep=""),row.names=F) write.csv(x=rr,file=paste(outpath,'residual',exec_time,'.csv',sep=""),row.names=F) write.csv(x=X1,file=paste(outpath,'X1',exec_time,'.csv',sep=""),row.names=F) write.csv(x=X2,file=paste(outpath,'X2',exec_time,'.csv',sep=""),row.names=F) DIend_time <- proc.time() - DIstart_time cat("\nDI_time : ", DIend_time ," seconds\n", file = paste(outpath,'/time',exec_time,'.txt',sep=""), append = TRUE) iblup000<-function(vg){ a<-matrix(0,c+1,1) mu<-matrix(0,1,c) a[1]<--1e3 a[c+1]<-1e3 a[2:c]<-0 g0<-a[2:c] h0<-matrix(0,n0,1) maxiter<-100 minerr<-1e-8 err<-1e8 iter<-0 out<-NULL while((iterminerr)){ rr<-NULL ps<-NULL eta<-NULL pp<-list() for(i in 1:n){ zh<-sum(Z[i*(c-1)-1,]*h0) for(k in 1:c){ mu[k]<-pnorm(c(a[k+1]+zh))-pnorm(c(a[k]+zh)) if(mu[k]<1e-4){mu[k]<-1e-4} if(mu[k]>(1-1e-4)){mu[k]<-(1-1e-4)} } mu<-mu/sum(mu) d<-matrix(0,c,c-1) for(k in 2:(c-1)){ d[k,k-1]<--dnorm(a[k]+zh) d[k,k]<-dnorm(a[k+1]+zh) } d[1,1]<-dnorm(a[2]+zh) d[c,c-1]<--dnorm(a[c]+zh) w<-diag(1/c(mu)) dwd<-solve(t(d)%*%w%*%d+diag(c-1)*1e-5) eta<-rbind(eta,(diag(c-1)%*%as.matrix(a[2:c])+matrix(1,c-1,1)%*%zh)) ps<-rbind(ps,dwd%*%t(d)%*%w%*%as.matrix(c(y[i,]-mu))) pp[[i]]<-dwd } rr<-as.matrix(bdiag(pp)) v<-Z%*%t(Z)*vg+rr vi<-solve(v) g<-solve(t(X)%*%vi%*%X)%*%t(X)%*%vi%*%as.matrix(ps+eta) h<-vg*t(Z)%*%vi%*%(ps+eta-X%*%g) a[2:c]<-g[1:(c-1)] iter<-iter+1 err<-sum((g-g0)^2)+sum((h-h0)^2) out<-rbind(out,cbind(iter,err,t(g))) g0<-g h0<-h } pe<-ps+eta pseudo<-list(pe,rr) return(pseudo) } #Generate fixed effects. Under the null model, fixed effects are the intercepts or threshold values fixed000<-function(par){ v<-Z%*%t(Z)*par+rr vi<-solve(v) vb<-solve(t(X)%*%vi%*%X) b<-vb%*%t(X)%*%vi%*%p return(list(b,vb)) } www<-NULL for(k in 1:m){ x<-X2%*%t(G[k,]) X<-cbind(X1,x) f<-ncol(X) err<-1e8 maxiter<-20 minerr<-1e-8 iter<-0 blup<-iblup000(par) #Output of the fixed effects (interecepts or thresholds) bb<-fixed000(par) effect<-bb[[1]][f] #fixed effects (thresholds or intercepts) stderr<-sqrt(bb[[2]][f,f]) #variable matrix of the fixed effects wald<-(effect/stderr)^2 pvalue<-1-pchisq(wald,1) www<-rbind(www,c(k,par,effect,stderr,wald,pvalue)) } www<-data.frame(www) names(www)<-c("SNP","Variance","Effect","Stderr","Wald","p") write.csv(x=www,file="Ordinal/approximate_c/scan-exact-trial.csv",row.names=F) DIend_time <- proc.time() - DIstart_time cat("\nDI_time : ", DIend_time ," seconds\n", file = "Ordinal/approximate_c/time.txt", append = TRUE) } getTime<- function(){ current_time <- Sys.time() year <- format(current_time, "%Y") month <- format(current_time, "%m") day <- format(current_time, "%d") hour <- format(current_time, "%H") minute <- format(current_time, "%M") second <- format(current_time, "%S") exec_time <<-paste(year,month,day,hour,minute,second,sep=""); }