# DGE example with DESeq2: # R version 4.1.2 (2021-11-01) BiocManager::install("DESeq2") library(DESeq2) # v 1.34.0 # setwd("path/to/downloadedData") counts_NK_Th<-read.csv("htseq_counts_NK_Th.csv", row.names = 1, header = T) counts_NK_Th<-counts_NK_Th[-c(which(rowSums(counts_NK_Th)==0)),] dim(counts_NK_Th) # [1] 38573 15 # build a sample metadata table: coldata<-as.data.frame(cbind(cell_type=c(rep("NK", 6), rep("Th", 9)), donor=sapply(strsplit(colnames(counts_NK_Th), "_"), '[',1), sample_id=colnames(counts_NK_Th))) coldata$cell_type<-as.factor(coldata$cell_type) coldata$cell_type<-factor(coldata$cell_type, levels=levels(coldata$cell_type)[c(2,1)]) head(coldata) # cell_type donor sample_id # 1 NK S15 S15_NK_CD56dim # 2 NK S15 S15_NK_CD56bright # 3 NK S16 S16_NK_CD56dim # 4 NK S16 S16_NK_CD56bright # 5 NK S17 S17_NK_CD56dim # 6 NK S17 S17_NK_CD56bright # Create DESeq object: dds <- DESeqDataSetFromMatrix(countData = counts_NK_Th, colData = coldata, design= ~ donor + cell_type) # Difference between cell types, accounting for the sample pairing dds <- DESeq(dds) resultsNames(dds) # lists the coefficients # [1] "Intercept" "donor_S16_vs_S15" "donor_S17_vs_S15" "cell_type_NK_vs_Th" deseq2_NK_vs_Th <- as.data.frame(results(dds, alpha=0.05, contrast=c("cell_type","NK","Th"), cooksCutoff=F)) # use cooksCutoff=F only if some genes of interest do not have a calculated p-value # author recomendation is to use default cooksCutoff=T head(deseq2_NK_vs_Th) # baseMean log2FoldChange lfcSE stat pvalue padj # TSPAN6 40.851111 -6.9583034 1.1044621 -6.30017417 2.973114e-10 8.742152e-09 # TNMD 0.104281 -0.1228379 3.1336380 -0.03919977 9.687311e-01 NA # DPM1 2566.964652 -0.1466129 0.2211112 -0.66307338 5.072836e-01 7.133950e-01 # SCYL3 571.791633 0.5728065 0.4039864 1.41788547 1.562242e-01 3.515869e-01 # C1orf112 201.504414 0.8758449 0.5938469 1.47486651 1.402484e-01 3.263445e-01 # FGR 8793.900467 8.5188295 1.2025099 7.08420757 1.398422e-12 5.868549e-11 deseq2_NK_vs_Th[grep("CPS1", rownames(deseq2_NK_vs_Th)),] # baseMean log2FoldChange lfcSE stat pvalue padj # CPS1 2.34916186 -3.56324252 1.855768 -1.92009033 0.05484649 0.1702518 # CPS1.IT1 0.09375824 -0.08381473 3.134376 -0.02674048 0.97866673 NA deseq2_NK_vs_Th[grep("GZMB", rownames(deseq2_NK_vs_Th)),] # baseMean log2FoldChange lfcSE stat pvalue padj # GZMB 27758.47 9.075387 0.8897603 10.19981 1.986563e-24 2.65665e-22