我试图以R标记显示KEGG路径,但它们都保存为png。我可以在报告中显示它们,但我必须添加每个文件的名称,这使得如果代码发生更改,更新文件变得更加困难。有人知道我如何保存由以下代码生成的png文件,以便每当代码或数据发生更改时,我都可以让rmarkdown文件更新图像吗?我已经添加了代码和找到它的网站以及下面的数据。
https://rpubs.com/barryus/class15
标题:“kegg”输出:html_documentdate:“2023-02-07”
knitr::opts_chunk$set(echo = TRUE)
#link to source code and data: https://rpubs.com/barryus/class15
#BiocManager::install("DESeq2")
library(DESeq2)
#BiocManager::install("AnnotationDbi")
library("AnnotationDbi")
#BiocManager::install("org.Hs.eg.db")
library("org.Hs.eg.db")
#BiocManager::install("pathview")
library(pathview)
#BiocManager::install("gage")
library(gage)
#BiocManager::install("gageData")
library(gageData)
#import source data
metaFile <- "GSE37704_metadata.csv"
countFile <- "GSE37704_featurecounts.csv"
# Import metadata and take a peak
colData = read.csv(metaFile, row.names=1)
head(colData)
# Import countdata
countData = read.csv(countFile, row.names=1)
head(countData)
# Note we need to remove the odd first $length col
#   need the countData and colData files to match up so we will need to
#   remove that odd first column in countData namely contData$length.
countData <- as.matrix(countData[,-1])
head(countData)
# Filter count data where you have 0 read count across all samples.
countData = countData[rowSums(countData)>1, ]
head(countData)
#setup the DESeqDataSet object required for the DESeq() function
tdds = DESeqDataSetFromMatrix(countData=countData,
                              colData=colData,
                              design=~condition)
#run the DESeq pipeline
tdds = DESeq(tdds)
tdds
#get results for the HoxA1 knockdown versus control siRNA
#   labeled these as “hoxa1_kd” and “control_sirna” in our original colData metaFile input to DESeq
tres = results(tdds, contrast=c("condition", "hoxa1_kd", "control_sirna"))
#check names
resultsNames(tdds)
#reorder these results by p-value and call summary() on the results object
#   gives you a sense of how many genes are up or down-regulated at the default FDR of 0.1
#   FDR: "false-discovery rate" is the fraction of positives that are false positives at a given p-value threshold
tres = tres[order(tres$pvalue),]
summary(tres)
#Since we mapped and counted against the Ensembl annotation, our results only have information about Ensembl gene IDs
#   However, our pathway analysis downstream will use KEGG pathways
#   genes in KEGG pathways are annotated with Entrez gene IDs
#   need to add Entrez IDs
columns(org.Hs.eg.db)
tres$symbol = mapIds(org.Hs.eg.db,
                     keys=row.names(tres),
                     column="SYMBOL",
                     keytype="ENSEMBL",
                     multiVals="first")
tres$entrez = mapIds(org.Hs.eg.db,
                     keys=row.names(tres),
                     column="ENTREZID",
                     keytype="ENSEMBL",
                     multiVals="first")
tres$name =   mapIds(org.Hs.eg.db,
                     keys=row.names(tres),
                     column="GENENAME",
                     keytype="ENSEMBL",
                     multiVals="first")
head(tres, 10)
#use the gage package for pathway analysis.
#Once we have a list of enriched pathways
#    use the pathview package to draw pathway diagrams
#    shading the molecules in the pathway by their degree of up/down-regulation.
# gageData package has pre-compiled databases mapping genes to KEGG pathways and GO terms for common organisms
#     kegg.sets.hs is a named list of 229 elements.
#     Each element is a character vector of member gene Entrez IDs for a single KEGG pathway. (See also go.sets.hs)
#     The sigmet.idx.hs is an index of numbers of signaling and metabolic pathways in kegg.set.gs
#     KEGG pathway include other types of pathway definitions which may be undesirable in pathway analysis
#         i.e: “Global Map” and “Human Diseases”
#setup the KEGG data-sets we need.
data(kegg.sets.hs)
data(sigmet.idx.hs)
kegg.sets.hs = kegg.sets.hs[sigmet.idx.hs]
head(kegg.sets.hs, 3)
#main gage() function requires a named vector of fold changes, where the names of the values are the Entrez gene IDs.
tfoldchanges = tres$log2FoldChange
names(tfoldchanges) = tres$entrez
head(tfoldchanges)
# run the pathway analysis
#     might want to try changing the value of same.dir.
#     This value determines whether to test for
#         a. changes in a gene set toward a single direction (all genes up or down regulated)
#         b. changes towards both directions simultaneously (i.e. any genes in the pathway dysregulated).
#     we’re using same.dir=TRUE, which will give us separate lists for pathways that are upregulated-
#     versus pathways that are down-regulated. Let’s look at the first few results from each.
#Get the results for the pathway analysis
keggtres = gage(tfoldchanges, gsets=kegg.sets.hs, same.dir=TRUE)
#look at the result object
#   It is a list with three elements (“greater”, “less” and “stats”).
attributes(keggtres)
#keggtres is a list object
class(keggtres) #[1] "list"
#use the dollar syntax to access a named element, e.g.
head(keggtres$greater)
head(keggtres$less)
#look at both up (greater), down (less), and statistics by calling head() with the lapply() function.
#   both keggtres$greater and keggtres$less are data matrices with gene sets as rows sorted by p-value.
lapply(keggtres, head)
#process the results to pull out the top 5 upregulated pathways
#   further process that just to get the IDs
#   use these KEGG pathway IDs downstream for plotting
## Sanity check displaying all pathways data
tpathways = data.frame(id=rownames(keggtres$greater), keggtres$greater)
head(tpathways)
#use the pathview() function to make a pathway plot with our result shown in color.
#   manually supply a pathway.id (namely the first part of the "hsa04110 Cell cycle")
#   this is visible from the print out above.
# code blw downloads the patway figure data from KEGG and adds our results to it
pathview(gene.data=tfoldchanges, pathway.id="hsa04110") #svrl gns r perturbed (the coloured ones)
# A different PDF based output of the same data
#there are df arrows on this one
pathview(gene.data=tfoldchanges, pathway.id="hsa04110", kegg.native=FALSE)
#process our results a bit more to automagicaly pull out the top 5 upregulated pathways
#   then further process that just to get the IDs needed by the pathview() function
#   We’ll use these KEGG pathway IDs for plotting below.
## Focus on top 5 upregulated pathways here for demo purposes only
keggtrespathways <- rownames(keggtres$greater)[1:5]
# Extract the IDs part of each string
keggtresids = substr(keggtrespathways, start=1, stop=8)
keggtresids
#pass these IDs in keggtresids to the pathview() function to draw plots for all the top 5 pathways.
pathview(gene.data=tfoldchanges, pathway.id=keggtresids, species="hsa")
图像
#may need to change image name 
knitr::include_graphics("hsa04060.pathview.png", error = FALSE)