Sunday, April 7, 2013
Saturday, April 6, 2013
Python code to download list of pdb files
#!/usr/bin/python
"""
This simple script can be used to download
pdb files directly from internet
usage: python getpdb.py <pdbids>
example: python getpdb.py 3ZBQ 4HXP 4HXM 4B8F
"""
import urllib
url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId="
from sys import argv
for i in argv[1:]:
pdbid = url+str(i)
open( i+".pdb", "w" ).write( urllib.urlopen(pdbid).read() )
print i+".pdb"
Python class for basic operations dealing with DNA sequence
class DNA:
"""Class representing DNA as a string sequence."""
basecomplement = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'}
codon2aa = {"TTT":"F","TTC":"F","TTA":"L","TTG":"L","TCT":"S","TCC":"S",
"TCA":"S","TCG":"S","TAT":"Y","TAC":"Y","TAA":"*","TAG":"*",
"TGT":"C","TGC":"C","TGA":"*","TGG":"W","CTT":"L","CTC":"L",
"CTA":"L","CTG":"L","CCT":"P","CCC":"P","CCA":"P","CCG":"P",
"CAT":"H","CAC":"H","CAA":"Q","CAG":"Q","CGT":"R","CGC":"R",
"CGA":"R","CGG":"R","ATT":"I","ATC":"I","ATA":"I","ATG":"M",
"ACT":"T","ACC":"T","ACA":"T","ACG":"T","AAT":"N","AAC":"N",
"AAA":"K","AAG":"K","AGT":"S","AGC":"S","AGA":"R","AGG":"R",
"GTT":"V","GTC":"V","GTA":"V","GTG":"V","GCT":"A","GCC":"A",
"GCA":"A","GCG":"A","GAT":"D","GAC":"D","GAA":"E","GAG":"E",
"GGT":"G","GGC":"G","GGA":"G","GGG":"G"}
def __init__(self, s):
"""Create DNA instance initialized to string s."""
self.seq = s
def transcribe(self):
"""Return as rna string."""
return self.seq.replace('T', 'U')
def reverse(self):
"""Return dna string in reverse order."""
letters = list(self.seq)
letters.reverse()
return ''.join(letters)
def complement(self):
"""Return the complementary dna string."""
letters = list(self.seq)
letters = [self.basecomplement[base] for base in letters]
return ''.join(letters)
def reversecomplement(self):
"""Return the reverse of complement of the dna string."""
letters = list(self.seq)
letters.reverse()
letters = [self.basecomplement[base] for base in letters]
return ''.join(letters)
def gc(self):
"""Return the % of dna composed of G+C."""
s = self.seq
gc = s.count('G') + s.count('C')
return gc * 100.0 / len(s)
def codons(self):
"""Return list of codons for the dna string,"""
s = self.seq
end = len(s) - (len(s) % 3) - 1
codons = [s[i:i+3] for i in range(0, end, 3)]
return codons
def translate(self):
"""Return amino acid sequence translating dna seq."""
s = self.seq
codons = self.codons()
aa = [self.codon2aa[aa] for aa in codons]
return ''.join(aa)
"""
How to use this code...
save this code in a file name as bio.py
then open python terminal and use this as a module
for example...
>>> from bio import *
>>> S = DNA('ATGTGCGTGCTC')
>>> S
<bio.DNA instance at 0x02A09B48>
>>> S.translate()
'MCVL'
>>> S.transcribe()
'AUGUGCGUGCUC'
>>> S.reverse()
'CTCGTGCGTGTA'
>>> S.complement()
'TACACGCACGAG'
>>> S.codons()
['ATG', 'TGC', 'GTG', 'CTC']
>>>
"""
RNA to protein conversion (translation) using perl
# This is my first post starting with very basic code...
print “Enter the RNA sequence: “;
$rna = <>;
chomp($rna);
$rna =~s/[^acgu]//ig;
my $rna = uc($rna);
my(%genetic_code) = (
‘GGA’ => ‘G’, ‘GGC’ => ‘G’, ‘GGG’ => ‘G’, ‘GGU’ => ‘G’, # Glycine
‘GCA’ => ‘A’, ‘GCC’ => ‘A’, ‘GCG’ => ‘A’, ‘GCU’ => ‘A’, # Alanine
‘GUA’ => ‘V’, ‘GUC’ => ‘V’, ‘GUG’ => ‘V’, ‘GUU’ => ‘V’, # Valine
‘CCA’ => ‘P’, ‘CCC’ => ‘P’, ‘CCG’ => ‘P’, ‘CCU’ => ‘P’, # Proline
‘UCA’ => ‘S’, ‘UCC’ => ‘S’, ‘UCG’ => ‘S’, ‘UCU’ => ‘S’, # Serine
‘CUA’ => ‘L’, ‘CUC’ => ‘L’, ‘CUG’ => ‘L’, ‘CUU’ => ‘L’, # Leucine
‘CGA’ => ‘R’, ‘CGC’ => ‘R’, ‘CGG’ => ‘R’, ‘CGU’ => ‘R’, # Arginine
‘ACA’ => ‘T’, ‘ACC’ => ‘T’, ‘ACG’ => ‘T’, ‘ACU’ => ‘T’, # Threonine
‘AUA’ => ‘I’, ‘AUC’ => ‘I’, ‘AUU’ => ‘I’, # Isoleucine
‘UAA’ => ‘_’, ‘UAG’ => ‘_’, ‘UGA’ => ‘_’, # Stop
‘AAA’ => ‘K’, ‘AAG’ => ‘K’, # Lysine
‘AGC’ => ‘S’, ‘AGU’ => ‘S’, # Serine
‘UUA’ => ‘L’, ‘UUG’ => ‘L’, # Leucine
‘AGA’ => ‘R’, ‘AGG’ => ‘R’, # Arginine
‘UAC’ => ‘Y’, ‘UAU’ => ‘Y’, # Tyrosine
‘UGC’ => ‘C’, ‘UGU’ => ‘C’, # Cysteine
‘CAA’ => ‘Q’, ‘CAG’ => ‘Q’, # Glutamine
‘AAC’ => ‘N’, ‘AAU’ => ‘N’, # Asparagine
‘UUC’ => ‘F’, ‘UUU’ => ‘F’, # Phenylalanine
‘GAC’ => ‘D’, ‘GAU’ => ‘D’, # Aspartic Acid
‘GAA’ => ‘E’, ‘GAG’ => ‘E’, # Glutamic Acid
‘UGG’ => ‘W’, # Tryptophan
‘CAC’ => ‘H’, # Histidine
‘AUG’ => ‘M’, # Methionine
‘CAU’ => ‘H’, # Histidine
);
my ($protein) = “”;
for(my $i=0;$i<length($rna)-2;$i+=3)
{
$codon = substr($rna,$i,3);
$protein .= $genetic_code{$codon};
}
print “Translated protein sequence is $protein”;
#This program can also used for six reading frame, by changing the three
#character shift in forward and reverse of the RNA sequence.
Subscribe to:
Posts (Atom)