Bioinformatics tutorials

Friday, June 28, 2013

Trajectory file conversion using wordom

wordom -conv -imol mymolecule.pdb -omol mymolecule.crd

wordom -conv -imol mymolecule.pdb -sele "/A/@(20-100)/*" -omol mymolecule.crd

wordom -conv -itrj mytrj.dcd -imol mymolecule.pdb -otrj mytrj.xtc

wordom -conv -itrj mytrj.xtc -otrj mytrj.dcd

Wednesday, June 12, 2013

MD simulation of LJ Particles (Ar) in C

Exported from Notepad++

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// input parameters

int np = 500;              // number of Ar particles
double density = 0.75;     // density of system
double temp = 1.0;         // desired temperature
double dt = 0.001;         // time step
double numstep = 1000;      // number of steps for MD

// constants and variables

double r[500][3];      // positions
double v[500][3];      // velocities
double f[500][3];          // forces

double L, t, temperature;  // box length
double ke, pe, total;      // kinetic energy, potential energy, total energy
int nc;                    // number of cells in each direction (cubic box)

// functions prototype

void initialize();
void calcforce();
void update();

int main () 
{
    int p, i;
    FILE *fh;
    fh = fopen("sim-box.xyz", "w");

    initialize();
    // writing initial configuration to file
    fprintf(fh, "%d\n\n", np);
    for (i = 0; i < np; i++) {
    fprintf(fh, "%s\t%f\t%f\t%f\n","Ar", r[i][0], r[i][1], r[i][2]);
    }

    calcforce();

    t = 0;
    for (t =0; t < numstep; t++) {
    update();
        // writing configuration at each step to file
        for (i = 0; i < np; i++) {
         fprintf(fh, "%s\t%f\t%f\t%f\n","Ar", r[i][0], r[i][1], r[i][2]);
        }
        ke = 0;
        for (p = 0; p < np; p++) {
        for (i = 0; i < 3; i++) {
            ke += 0.5 * v[p][i] * v[p][i];
        }
        }
        total = ke + pe;    
        printf("pe = %f ke = %f Total Energy = %f\n",pe, ke, total);
    }

    fclose(fh);
}

void initialize () {
    int i, j, k, m, n, p, nc, IX, IY, IZ, IREF, M;
    double L, CELL, CELL2, sumv2, sumv3, df, fs;
    double Sum[3]  = {0.0, 0.0, 0.0};

    // compute length of box
    L = powf( np / density, 1.0 / 3.0 );
    nc = powf( np / 4, 1.0 / 3.0 );

    /* CALCULATE THE SIDE OF THE UNIT CELL */
    CELL  = L / nc;
    CELL2 = 0.5 * CELL;
  
    /* BUILD THE UNIT CELL */
  
    /* SUBLATTICE A */
    r[0][0] =  0.0;
    r[0][1] =  0.0;
    r[0][2] =  0.0;
    /* SUBLATTICE B */
    r[1][0] =  0.0;
    r[1][1] =  CELL2;
    r[1][2] =  CELL2;
    /* SUBLATTICE C */
    r[2][0] =  CELL2;
    r[2][1] =  0.0;
    r[2][2] =  CELL2;
    /* SUBLATTICE D */
    r[3][0] =  CELL2;
    r[3][1] =  CELL2;
    r[3][2] =  0.0;
  
    /* CONSTRUCT THE LATTICE FROM THE UNIT CELL */
    M = 0;
    for( IZ=0; IZ<5; IZ++ )
        for( IY=0; IY<5; IY++ )
            for( IX=0; IX<5; IX++ ){
    
            for( IREF=0; IREF<4; IREF++ ){
      
                r[IREF+M][0] = r[IREF][0] + CELL * IX ;
                r[IREF+M][1] = r[IREF][1] + CELL * IY ;
                r[IREF+M][2] = r[IREF][2] + CELL * IZ ;
      
            }
        M = M + 4;
           }

    // random distributed initial velocities
    for (p = 0; p < np; p++)
    for (i = 0; i < 3; i++) {
        v[p][i] = (double)rand()/RAND_MAX - 0.5;
        Sum[i] += v[p][i];
    }

    // with zero total momentum
    for (p = 0; p < np; p++)
    for (i = 0; i < 3; i++)
        v[p][i] -= Sum[i] / np;

    // rescaled to desired temperature
    for (p = 0; p < np; p++)
    for (i = 0; i < 3; i++)
        sumv2 += v[p][i] * v[p][i];

    df = 3 * np - 3;
    fs = sqrt(temp * df / sumv2);

    printf("temperature before scaling= %f\n", sumv2/df);

    sumv3 = 0.0;
    for (p = 0; p < np; p++)
    for (i = 0; i < 3; i++) {
        v[p][i] *= fs;
            sumv3 += v[p][i] * v[p][i];
        }

    printf("temperature after scaling= %f\n", sumv3/df);

}

void calcforce () {
    int i, j, k, sign;
    double r2, rij[3], mag, L;
    double r2i, r6i, r12i, ff, fij[3];

    for (i = 0; i < np; i++)
    for (k = 0; k < 3; k++)
        f[i][k] = 0.0;

    pe = 0.0;
    L = powf(np / density, 1.0 / 3.0 );

    // loop over all pairs of particles
    for (i = 0; i < np - 1; i++) {
    for (j = i + 1; j < np; j++) {
        r2 = 0.0;
        for (k = 0; k < 3; k++) {
        rij[k] = r[i][k] - r[j][k];
        sign = rij[k] > 0.0 ? 1 : -1;
        mag = rij[k] * sign;
        if (mag > L / 2)         // use nearest image
            rij[k] -= L * sign;
        r2 += rij[k] * rij[k];
        }
        r2i = 1 / r2;
        r6i = r2i * r2i * r2i;
        r12i = r6i * r6i;
        ff = r2i * (48 * r12i - 24 * r6i);
        for (k = 0; k < 3; k++) {
        fij[k] = rij[k] * ff;
        f[i][k] += fij[k];
        f[j][k] -= fij[k];
        }
        pe += 4 * (r12i - r6i);
    }
    }
}

void update () {
    int p, k;

    // compute length of box
    L = powf( np / density, 1.0 / 3.0 );

    for (p = 0; p < np; p++)
    for (k = 0; k < 3; k++) {
        r[p][k] += v[p][k] * dt + 0.5 * f[p][k] * dt * dt;
        // impose periodic boundary conditions
        if (r[p][k] < 0.0)
        r[p][k] += L;
        if (r[p][k] >= L)
        r[p][k] -= L;
        v[p][k] += 0.5 * f[p][k] * dt;
    }
    calcforce();
    for (p = 0; p < np; p++)
    for (k = 0; k < 3; k++)
        v[p][k] += 0.5 * f[p][k] * dt;

}

Sunday, May 12, 2013

Some important linux commands, a bioinformatician must know

Exported from Notepad++

### Some important linux commands, a bioinformatician must know ###

head # to see first few lines of a file 
tail # to see last few lines of a file
grep # to search file(s) for lines that match a pattern
cut  # to get few character columns of a file
awk  # I use this to extract columns of a tabular file
sort # sort files with number or text
uniq # uniquify files
wc   # print byte, word, and line count
tr   # translate, sqeeze, and/or delete character 
fold # to make all lines with fixed width like fatsa format

### for more info use man command ### 

Sunday, April 7, 2013

Autodock Tutorial

Saturday, April 6, 2013

Python code to download list of pdb files

Exported from Notepad++

#!/usr/bin/python
"""
This simple script can be used to download
pdb files directly from internet
usage: python getpdb.py <pdbids>
example: python getpdb.py 3ZBQ 4HXP 4HXM 4B8F
"""
import urllib
url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId="

from sys import argv
for i in argv[1:]:
    pdbid = url+str(i)
    open( i+".pdb", "w" ).write( urllib.urlopen(pdbid).read() )
    print i+".pdb"
    
    

Python class for basic operations dealing with DNA sequence

Exported from Notepad++

class DNA:
    """Class representing DNA as a string sequence."""
    
    basecomplement = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'}
    
    codon2aa = {"TTT":"F","TTC":"F","TTA":"L","TTG":"L","TCT":"S","TCC":"S",
                "TCA":"S","TCG":"S","TAT":"Y","TAC":"Y","TAA":"*","TAG":"*",
                "TGT":"C","TGC":"C","TGA":"*","TGG":"W","CTT":"L","CTC":"L",
                "CTA":"L","CTG":"L","CCT":"P","CCC":"P","CCA":"P","CCG":"P",
                "CAT":"H","CAC":"H","CAA":"Q","CAG":"Q","CGT":"R","CGC":"R",
                "CGA":"R","CGG":"R","ATT":"I","ATC":"I","ATA":"I","ATG":"M",
                "ACT":"T","ACC":"T","ACA":"T","ACG":"T","AAT":"N","AAC":"N",
                "AAA":"K","AAG":"K","AGT":"S","AGC":"S","AGA":"R","AGG":"R",
                "GTT":"V","GTC":"V","GTA":"V","GTG":"V","GCT":"A","GCC":"A",
                "GCA":"A","GCG":"A","GAT":"D","GAC":"D","GAA":"E","GAG":"E",
                "GGT":"G","GGC":"G","GGA":"G","GGG":"G"}
              
    def __init__(self, s):
        """Create DNA instance initialized to string s."""
        self.seq = s
        
    def transcribe(self):
        """Return as rna string."""
        return self.seq.replace('T', 'U')
        
    def reverse(self):
        """Return dna string in reverse order."""
        letters = list(self.seq)
        letters.reverse()
        return ''.join(letters)
        
    def complement(self):
        """Return the complementary dna string."""
        letters = list(self.seq)
        letters = [self.basecomplement[base] for base in letters]
        return ''.join(letters)
        
    def reversecomplement(self):
        """Return the reverse of complement of the dna string."""
        letters = list(self.seq)
        letters.reverse()
        letters = [self.basecomplement[base] for base in letters]
        return ''.join(letters)
        
    def gc(self):
        """Return the % of dna composed of G+C."""
        s = self.seq
        gc = s.count('G') + s.count('C')
        return gc * 100.0 / len(s)
        
    def codons(self):
        """Return list of codons for the dna string,"""
        s = self.seq
        end = len(s) - (len(s) % 3) - 1
        codons = [s[i:i+3] for i in range(0, end, 3)]
        return codons
        
    def translate(self):
        """Return amino acid sequence translating dna seq."""
        s = self.seq
        codons = self.codons()
        aa = [self.codon2aa[aa] for aa in codons]
        return ''.join(aa)
        
"""
How to use this code...
save this code in a file name as bio.py
then open python terminal and use this as a module 
for example...

>>> from bio import *
>>> S = DNA('ATGTGCGTGCTC')
>>> S
<bio.DNA instance at 0x02A09B48>
>>> S.translate()
'MCVL'
>>> S.transcribe()
'AUGUGCGUGCUC'
>>> S.reverse()
'CTCGTGCGTGTA'
>>> S.complement()
'TACACGCACGAG'
>>> S.codons()
['ATG', 'TGC', 'GTG', 'CTC']
>>> 

"""

RNA to protein conversion (translation) using perl

Exported from Notepad++

# This is my first post starting with very basic code...

print “Enter the RNA sequence: “;
$rna = <>;
chomp($rna);
$rna =~s/[^acgu]//ig;
my $rna = uc($rna);

my(%genetic_code) = (
‘GGA’ => ‘G’, ‘GGC’ => ‘G’, ‘GGG’ => ‘G’, ‘GGU’ => ‘G’,  # Glycine
‘GCA’ => ‘A’, ‘GCC’ => ‘A’, ‘GCG’ => ‘A’, ‘GCU’ => ‘A’, # Alanine
‘GUA’ => ‘V’, ‘GUC’ => ‘V’, ‘GUG’ => ‘V’, ‘GUU’ => ‘V’, # Valine
‘CCA’ => ‘P’, ‘CCC’ => ‘P’, ‘CCG’ => ‘P’, ‘CCU’ => ‘P’, # Proline
‘UCA’ => ‘S’, ‘UCC’ => ‘S’, ‘UCG’ => ‘S’, ‘UCU’ => ‘S’, # Serine
‘CUA’ => ‘L’, ‘CUC’ => ‘L’, ‘CUG’ => ‘L’, ‘CUU’ => ‘L’, # Leucine
‘CGA’ => ‘R’, ‘CGC’ => ‘R’, ‘CGG’ => ‘R’, ‘CGU’ => ‘R’, # Arginine
‘ACA’ => ‘T’, ‘ACC’ => ‘T’, ‘ACG’ => ‘T’, ‘ACU’ => ‘T’, # Threonine
‘AUA’ => ‘I’, ‘AUC’ => ‘I’, ‘AUU’ => ‘I’, # Isoleucine
‘UAA’ => ‘_’, ‘UAG’ => ‘_’, ‘UGA’ => ‘_’, # Stop
‘AAA’ => ‘K’, ‘AAG’ => ‘K’, # Lysine
‘AGC’ => ‘S’, ‘AGU’ => ‘S’, # Serine
‘UUA’ => ‘L’, ‘UUG’ => ‘L’, # Leucine
‘AGA’ => ‘R’, ‘AGG’ => ‘R’, # Arginine
‘UAC’ => ‘Y’, ‘UAU’ => ‘Y’, # Tyrosine
‘UGC’ => ‘C’, ‘UGU’ => ‘C’, # Cysteine
‘CAA’ => ‘Q’, ‘CAG’ => ‘Q’, # Glutamine
‘AAC’ => ‘N’, ‘AAU’ => ‘N’, # Asparagine
‘UUC’ => ‘F’, ‘UUU’ => ‘F’, # Phenylalanine
‘GAC’ => ‘D’, ‘GAU’ => ‘D’, # Aspartic Acid
‘GAA’ => ‘E’, ‘GAG’ => ‘E’, # Glutamic Acid
‘UGG’ => ‘W’, # Tryptophan
‘CAC’ => ‘H’, # Histidine
‘AUG’ => ‘M’, # Methionine
‘CAU’ => ‘H’, # Histidine
);

my ($protein) = “”;
for(my $i=0;$i<length($rna)-2;$i+=3)
{
$codon = substr($rna,$i,3);
$protein .= $genetic_code{$codon};
}
print “Translated protein sequence is $protein”;

#This program can also used for six reading frame, by changing the three 
#character shift in forward and reverse of the RNA sequence.