diff --git a/jsplice/lib/bio.py b/jsplice/lib/bio.py index 5b89a60dbfa45cccc77e0024918c276f2996a7b7..22868d619fe9ddb2ea61dabe2d7f4e503c9ec34b 100644 --- a/jsplice/lib/bio.py +++ b/jsplice/lib/bio.py @@ -119,18 +119,29 @@ class Region: def __lt__(self,reg): if self.chrom!=reg.chrom: - return self.chrom<reg.chrom + c1 = self.chrom + c2 = reg.chrom + + # remove chr if present + if c1[:3]=='chr': + c1 = c1[3:] + if c2[:3]=='chr': + c2 = c2[3:] + + try: #try a numerical comparison + c1n = int(c1) + c2n = int(c2) + return c1n < c2n + except ValueError: # if the chromosomes are not number, then revert to string comparison + return c1<c2 else: - if self.strand!=reg.strand: - return self.strand=='-' + if self.start<reg.start: + return True + elif self.start==reg.start: + return self.end<reg.end else: - if self.start<reg.start: - return True - elif self.start==reg.start: - return self.end<reg.end - else: - return False - + return False + #Gene class. The region of the gene is defined by its exons #A gene has to be instanciated by one single exon. Subsequent exons are to be added through the 'addExon' method class Gene(Region): @@ -186,14 +197,18 @@ class Genome: self.name = name self.genes = list() - #Saves genes and exon position in BED format. (Designed for jSplice to run coverageBed) + #Saves genes and exon position in BED format (sorted). (Designed for jSplice to run coverageBed) def saveBED(self, filename): - f = open(filename,'w') + # Get all regions + regs = dict() for gene in self.genes: - f.write(gene.chrom+'\t'+str(gene.start)+'\t'+str(gene.end)+'\t'+gene.id+'\t0\t'+gene.strand+'\n') + regs[gene.getRegion()] = gene.id+'\t0\t'+gene.strand for exn in gene.exons: - f.write(exn.chrom+'\t'+str(exn.start)+'\t'+str(exn.end)+'\t'+gene.id+'|EXN\t0\t'+exn.strand+'\n') - + regs[exn] = gene.id+'|EXN\t0\t'+exn.strand + + f = open(filename,'w') + for reg in sorted(regs.keys()): + f.write(reg.chrom + '\t' + str(reg.start) + '\t' + str(reg.end) + '\t' + regs[reg] + '\n') f.close() def saveGTF(self, filename): diff --git a/jsplice/lib/jsplicelib.py b/jsplice/lib/jsplicelib.py index 059ada39d2021b1f405b9311518a7474c348846c..f5962bd5b08359c67ac3b4479371b67a746baacf 100644 --- a/jsplice/lib/jsplicelib.py +++ b/jsplice/lib/jsplicelib.py @@ -37,7 +37,7 @@ def FDR(x): l = [l[k] if l[k] < 1.0 else 1.0 for k in ro] return l -#Run coverageBed +#Run coverageBed. Assumes that BAM files are sorted def runCovBed(q, bedFile, strand_arg): #get bedtools version cmd = "coverageBed -h 2>&1 >/dev/null | grep Version | awk '{ print $2 }'" @@ -58,9 +58,9 @@ def runCovBed(q, bedFile, strand_arg): cmd='coverageBed '+strand_arg+' -split -abam "'+exp.bamFile+'" -b "'+bedFile+'" > "'+exp.bedCountFile+'"' else: #starting on version 2.24, the a and b file are interverted... if strand_arg is None: - cmd='coverageBed -split -a "'+bedFile+'" -b "'+exp.bamFile+'" > "'+exp.bedCountFile+'"' + cmd='coverageBed -sorted -split -a "'+bedFile+'" -b "'+exp.bamFile+'" > "'+exp.bedCountFile+'"' else: - cmd='coverageBed '+strand_arg+' -split -a "'+bedFile+'" -b "'+exp.bamFile+'" > "'+exp.bedCountFile+'"' + cmd='coverageBed '+strand_arg+' -sorted -split -a "'+bedFile+'" -b "'+exp.bamFile+'" > "'+exp.bedCountFile+'"' print cmd p=subprocess.Popen(cmd,shell=True) #Not the safest solution but cannot find any other... :(