1

我有一个gff文件和一个csv看起来像的文件:

# CSV dataframe
file.csv <- read.table(text = "Sample  Name    Estimate    Std.Err P.Adjust
Sample_1    B005300.2.1 0.345930183 0.05662846  1.58E-06
Sample_1    B005230.2.1 0.048159129 0.013862871 0.019181546
Sample_1    B006450.2.1 -0.263951161    0.079297432 0.027327576
Sample_2    B005230.2.1 39.04308043 11.23861018 0.019181546
Sample_2    B006260.1.1 0.003968994 0.00063087  6.12E-07
Sample_2    B006170.2.1 0.117171563 0.024018888 0.000272761
Sample_3    B006450.2.1 0.012033053 0.003670908 0.030632664
Sample_3    B006980.1-c2.1  -0.007653796    0.002047582 0.009944649
Sample_3    B006980.1.1 -0.011369481    0.002871014 0.00539717", header = TRUE)

# GFF GRanges, example data
#dput(head(GFF))
GFF <- new("GRanges", seqnames = new("Rle", values = structure(1L, .Label = c("Bch01", "Bch02", "Bch03", "Bch04", "Bch05"), class = "factor"), lengths = 6L, 
    elementMetadata = NULL, metadata = list()), ranges = new("IRanges", 
    start = c(21882L, 21882L, 21882L, 21882L, 22697L, 22697L), 
    width = c(126L, 126L, 126L, 126L, 60L, 60L), NAMES = NULL, 
    elementType = "ANY", elementMetadata = NULL, metadata = list()), 
    strand = new("Rle", values = structure(2L, .Label = c("+", 
    "-", "*"), class = "factor"), lengths = 6L, elementMetadata = NULL, 
        metadata = list()), seqinfo = new("Seqinfo", seqnames = c("Bch01", "Bch02", "Bch03", "Bch04", "Bch05"), seqlengths = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), is_circular = c(NA, NA, NA, NA, NA), genome = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_)), elementMetadata = new("DFrame", rownames = NULL, nrows = 6L, 
        listData = list(source = structure(c(1L, 1L, 1L, 1L, 
        1L, 1L), .Label = "maker", class = "factor"), type = structure(c(1L, 
        2L, 3L, 4L, 1L, 2L), .Label = c("CDS", "exon", "gene", 
        "mRNA", "three_prime_UTR", "five_prime_UTR"), class = "factor"), 
            score = c(NA, NA, NA, 126, NA, NA), phase = c(0L, 
            NA, NA, NA, 0L, NA), ID = c("B024400.1.1:cds", 
            "B024400.1.1:exon:2", "B024400.1", 
            "B024400.1.1", "B008910.1.1:cds", 
            "B008910.1.1:exon:4"), Parent = new("CompressedCharacterList", 
                elementType = "character", elementMetadata = NULL, 
                metadata = list(), unlistData = c("B024400.1.1", 
                "B024400.1.1", "B024400.1", 
                "B008910.1.1", "B008910.1.1"
                ), partitioning = new("PartitioningByEnd", end = c(1L, 
                2L, 2L, 3L, 4L, 5L), NAMES = NULL, elementType = "ANY", 
                  elementMetadata = NULL, metadata = list())), 
            Name = c(NA, NA, "B024400.1", "B024400.1.1", 
            NA, NA), Note = new("CompressedCharacterList", elementType = "character", 
                elementMetadata = NULL, metadata = list(), unlistData = c("Similar to B024400.1.1: LOW QUALITY:50S ribosomal protein L4, chloroplastic", "Similar to B024400.1.1: LOW QUALITY:50S ribosomal protein L4, chloroplastic"), partitioning = new("PartitioningByEnd", end = c(0L, 
                0L, 1L, 2L, 2L, 2L), NAMES = NULL, elementType = "ANY", 
                  elementMetadata = NULL, metadata = list())), 
            ref_id = c(NA, NA, "B024400.1.1", "B024400.1.1", 
            NA, NA), Dbxref = new("CompressedCharacterList", 
                elementType = "character", elementMetadata = NULL, 
                metadata = list(), unlistData = character(0), 
                partitioning = new("PartitioningByEnd", end = c(0L, 
                0L, 0L, 0L, 0L, 0L), NAMES = NULL, elementType = "ANY", 
                  elementMetadata = NULL, metadata = list())), 
            Ontology_term = new("CompressedCharacterList", elementType = "character", 
                elementMetadata = NULL, metadata = list(), unlistData = character(0), 
                partitioning = new("PartitioningByEnd", end = c(0L, 
                0L, 0L, 0L, 0L, 0L), NAMES = NULL, elementType = "ANY", 
                  elementMetadata = NULL, metadata = list()))), 
        elementType = "ANY", elementMetadata = NULL, metadata = list()), 
    elementType = "ANY", metadata = list())

我想按列合并这两个文件Name。我试过了:

GFF = rtracklayer::import("gene_models.gff")
merge_data<-merge(file.csv,GFF,by="Name")

但是在 csv 文件中,我Name对不同的值都有相同的值,Samples例如B005230.2.1两者都相同Sample1Sample2而它只在GFF文件中出现一次。因此,合并文件搞砸了。我将不胜感激任何帮助解决这个问题。谢谢!

4

0 回答 0