Friday, June 28, 2013

Trajectory file conversion using wordom


wordom -conv -imol mymolecule.pdb -omol mymolecule.crd

wordom -conv -imol mymolecule.pdb -sele "/A/@(20-100)/*" -omol mymolecule.crd

wordom -conv -itrj mytrj.dcd -imol mymolecule.pdb -otrj mytrj.xtc

wordom -conv -itrj mytrj.xtc -otrj mytrj.dcd

Wednesday, June 12, 2013

MD simulation of LJ Particles (Ar) in C

Exported from Notepad++
#include <stdio.h> #include <stdlib.h> #include <math.h> // input parameters int np = 500; // number of Ar particles double density = 0.75; // density of system double temp = 1.0; // desired temperature double dt = 0.001; // time step double numstep = 1000; // number of steps for MD // constants and variables double r[500][3]; // positions double v[500][3]; // velocities double f[500][3]; // forces double L, t, temperature; // box length double ke, pe, total; // kinetic energy, potential energy, total energy int nc; // number of cells in each direction (cubic box) // functions prototype void initialize(); void calcforce(); void update(); int main () { int p, i; FILE *fh; fh = fopen("sim-box.xyz", "w"); initialize(); // writing initial configuration to file fprintf(fh, "%d\n\n", np); for (i = 0; i < np; i++) { fprintf(fh, "%s\t%f\t%f\t%f\n","Ar", r[i][0], r[i][1], r[i][2]); } calcforce(); t = 0; for (t =0; t < numstep; t++) { update(); // writing configuration at each step to file for (i = 0; i < np; i++) { fprintf(fh, "%s\t%f\t%f\t%f\n","Ar", r[i][0], r[i][1], r[i][2]); } ke = 0; for (p = 0; p < np; p++) { for (i = 0; i < 3; i++) { ke += 0.5 * v[p][i] * v[p][i]; } } total = ke + pe; printf("pe = %f ke = %f Total Energy = %f\n",pe, ke, total); } fclose(fh); } void initialize () { int i, j, k, m, n, p, nc, IX, IY, IZ, IREF, M; double L, CELL, CELL2, sumv2, sumv3, df, fs; double Sum[3] = {0.0, 0.0, 0.0}; // compute length of box L = powf( np / density, 1.0 / 3.0 ); nc = powf( np / 4, 1.0 / 3.0 ); /* CALCULATE THE SIDE OF THE UNIT CELL */ CELL = L / nc; CELL2 = 0.5 * CELL; /* BUILD THE UNIT CELL */ /* SUBLATTICE A */ r[0][0] = 0.0; r[0][1] = 0.0; r[0][2] = 0.0; /* SUBLATTICE B */ r[1][0] = 0.0; r[1][1] = CELL2; r[1][2] = CELL2; /* SUBLATTICE C */ r[2][0] = CELL2; r[2][1] = 0.0; r[2][2] = CELL2; /* SUBLATTICE D */ r[3][0] = CELL2; r[3][1] = CELL2; r[3][2] = 0.0; /* CONSTRUCT THE LATTICE FROM THE UNIT CELL */ M = 0; for( IZ=0; IZ<5; IZ++ ) for( IY=0; IY<5; IY++ ) for( IX=0; IX<5; IX++ ){ for( IREF=0; IREF<4; IREF++ ){ r[IREF+M][0] = r[IREF][0] + CELL * IX ; r[IREF+M][1] = r[IREF][1] + CELL * IY ; r[IREF+M][2] = r[IREF][2] + CELL * IZ ; } M = M + 4; } // random distributed initial velocities for (p = 0; p < np; p++) for (i = 0; i < 3; i++) { v[p][i] = (double)rand()/RAND_MAX - 0.5; Sum[i] += v[p][i]; } // with zero total momentum for (p = 0; p < np; p++) for (i = 0; i < 3; i++) v[p][i] -= Sum[i] / np; // rescaled to desired temperature for (p = 0; p < np; p++) for (i = 0; i < 3; i++) sumv2 += v[p][i] * v[p][i]; df = 3 * np - 3; fs = sqrt(temp * df / sumv2); printf("temperature before scaling= %f\n", sumv2/df); sumv3 = 0.0; for (p = 0; p < np; p++) for (i = 0; i < 3; i++) { v[p][i] *= fs; sumv3 += v[p][i] * v[p][i]; } printf("temperature after scaling= %f\n", sumv3/df); } void calcforce () { int i, j, k, sign; double r2, rij[3], mag, L; double r2i, r6i, r12i, ff, fij[3]; for (i = 0; i < np; i++) for (k = 0; k < 3; k++) f[i][k] = 0.0; pe = 0.0; L = powf(np / density, 1.0 / 3.0 ); // loop over all pairs of particles for (i = 0; i < np - 1; i++) { for (j = i + 1; j < np; j++) { r2 = 0.0; for (k = 0; k < 3; k++) { rij[k] = r[i][k] - r[j][k]; sign = rij[k] > 0.0 ? 1 : -1; mag = rij[k] * sign; if (mag > L / 2) // use nearest image rij[k] -= L * sign; r2 += rij[k] * rij[k]; } r2i = 1 / r2; r6i = r2i * r2i * r2i; r12i = r6i * r6i; ff = r2i * (48 * r12i - 24 * r6i); for (k = 0; k < 3; k++) { fij[k] = rij[k] * ff; f[i][k] += fij[k]; f[j][k] -= fij[k]; } pe += 4 * (r12i - r6i); } } } void update () { int p, k; // compute length of box L = powf( np / density, 1.0 / 3.0 ); for (p = 0; p < np; p++) for (k = 0; k < 3; k++) { r[p][k] += v[p][k] * dt + 0.5 * f[p][k] * dt * dt; // impose periodic boundary conditions if (r[p][k] < 0.0) r[p][k] += L; if (r[p][k] >= L) r[p][k] -= L; v[p][k] += 0.5 * f[p][k] * dt; } calcforce(); for (p = 0; p < np; p++) for (k = 0; k < 3; k++) v[p][k] += 0.5 * f[p][k] * dt; }

Sunday, May 12, 2013

Some important linux commands, a bioinformatician must know

Exported from Notepad++
### Some important linux commands, a bioinformatician must know ### head # to see first few lines of a file tail # to see last few lines of a file grep # to search file(s) for lines that match a pattern cut # to get few character columns of a file awk # I use this to extract columns of a tabular file sort # sort files with number or text uniq # uniquify files wc # print byte, word, and line count tr # translate, sqeeze, and/or delete character fold # to make all lines with fixed width like fatsa format ### for more info use man command ###

Sunday, April 7, 2013

Saturday, April 6, 2013

Python code to download list of pdb files

Exported from Notepad++
#!/usr/bin/python """ This simple script can be used to download pdb files directly from internet usage: python getpdb.py <pdbids> example: python getpdb.py 3ZBQ 4HXP 4HXM 4B8F """ import urllib url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId=" from sys import argv for i in argv[1:]: pdbid = url+str(i) open( i+".pdb", "w" ).write( urllib.urlopen(pdbid).read() ) print i+".pdb"

Python class for basic operations dealing with DNA sequence

Exported from Notepad++
class DNA: """Class representing DNA as a string sequence.""" basecomplement = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'} codon2aa = {"TTT":"F","TTC":"F","TTA":"L","TTG":"L","TCT":"S","TCC":"S", "TCA":"S","TCG":"S","TAT":"Y","TAC":"Y","TAA":"*","TAG":"*", "TGT":"C","TGC":"C","TGA":"*","TGG":"W","CTT":"L","CTC":"L", "CTA":"L","CTG":"L","CCT":"P","CCC":"P","CCA":"P","CCG":"P", "CAT":"H","CAC":"H","CAA":"Q","CAG":"Q","CGT":"R","CGC":"R", "CGA":"R","CGG":"R","ATT":"I","ATC":"I","ATA":"I","ATG":"M", "ACT":"T","ACC":"T","ACA":"T","ACG":"T","AAT":"N","AAC":"N", "AAA":"K","AAG":"K","AGT":"S","AGC":"S","AGA":"R","AGG":"R", "GTT":"V","GTC":"V","GTA":"V","GTG":"V","GCT":"A","GCC":"A", "GCA":"A","GCG":"A","GAT":"D","GAC":"D","GAA":"E","GAG":"E", "GGT":"G","GGC":"G","GGA":"G","GGG":"G"} def __init__(self, s): """Create DNA instance initialized to string s.""" self.seq = s def transcribe(self): """Return as rna string.""" return self.seq.replace('T', 'U') def reverse(self): """Return dna string in reverse order.""" letters = list(self.seq) letters.reverse() return ''.join(letters) def complement(self): """Return the complementary dna string.""" letters = list(self.seq) letters = [self.basecomplement[base] for base in letters] return ''.join(letters) def reversecomplement(self): """Return the reverse of complement of the dna string.""" letters = list(self.seq) letters.reverse() letters = [self.basecomplement[base] for base in letters] return ''.join(letters) def gc(self): """Return the % of dna composed of G+C.""" s = self.seq gc = s.count('G') + s.count('C') return gc * 100.0 / len(s) def codons(self): """Return list of codons for the dna string,""" s = self.seq end = len(s) - (len(s) % 3) - 1 codons = [s[i:i+3] for i in range(0, end, 3)] return codons def translate(self): """Return amino acid sequence translating dna seq.""" s = self.seq codons = self.codons() aa = [self.codon2aa[aa] for aa in codons] return ''.join(aa) """ How to use this code... save this code in a file name as bio.py then open python terminal and use this as a module for example... >>> from bio import * >>> S = DNA('ATGTGCGTGCTC') >>> S <bio.DNA instance at 0x02A09B48> >>> S.translate() 'MCVL' >>> S.transcribe() 'AUGUGCGUGCUC' >>> S.reverse() 'CTCGTGCGTGTA' >>> S.complement() 'TACACGCACGAG' >>> S.codons() ['ATG', 'TGC', 'GTG', 'CTC'] >>> """

RNA to protein conversion (translation) using perl

Exported from Notepad++
# This is my first post starting with very basic code... print “Enter the RNA sequence: ; $rna = <>; chomp($rna); $rna =~s/[^acgu]//ig; my $rna = uc($rna); my(%genetic_code) = ( ‘GGA’ => ‘G’, ‘GGC’ => ‘G’, ‘GGG’ => ‘G’, ‘GGU’ => ‘G’, # Glycine ‘GCA’ => ‘A’, ‘GCC’ => ‘A’, ‘GCG’ => ‘A’, ‘GCU’ => ‘A’, # Alanine ‘GUA’ => ‘V’, ‘GUC’ => ‘V’, ‘GUG’ => ‘V’, ‘GUU’ => ‘V’, # Valine ‘CCA’ => ‘P’, ‘CCC’ => ‘P’, ‘CCG’ => ‘P’, ‘CCU’ => ‘P’, # Proline ‘UCA’ => ‘S’, ‘UCC’ => ‘S’, ‘UCG’ => ‘S’, ‘UCU’ => ‘S’, # Serine ‘CUA’ => ‘L’, ‘CUC’ => ‘L’, ‘CUG’ => ‘L’, ‘CUU’ => ‘L’, # Leucine ‘CGA’ => ‘R’, ‘CGC’ => ‘R’, ‘CGG’ => ‘R’, ‘CGU’ => ‘R’, # Arginine ‘ACA’ => ‘T’, ‘ACC’ => ‘T’, ‘ACG’ => ‘T’, ‘ACU’ => ‘T’, # Threonine ‘AUA’ => ‘I’, ‘AUC’ => ‘I’, ‘AUU’ => ‘I’, # Isoleucine ‘UAA’ => ‘_’, ‘UAG’ => ‘_’, ‘UGA’ => ‘_’, # Stop ‘AAA’ => ‘K’, ‘AAG’ => ‘K’, # Lysine ‘AGC’ => ‘S’, ‘AGU’ => ‘S’, # Serine ‘UUA’ => ‘L’, ‘UUG’ => ‘L’, # Leucine ‘AGA’ => ‘R’, ‘AGG’ => ‘R’, # Arginine ‘UAC’ => ‘Y’, ‘UAU’ => ‘Y’, # Tyrosine ‘UGC’ => ‘C’, ‘UGU’ => ‘C’, # Cysteine ‘CAA’ => ‘Q’, ‘CAG’ => ‘Q’, # Glutamine ‘AAC’ => ‘N’, ‘AAU’ => ‘N’, # Asparagine ‘UUC’ => ‘F’, ‘UUU’ => ‘F’, # Phenylalanine ‘GAC’ => ‘D’, ‘GAU’ => ‘D’, # Aspartic Acid ‘GAA’ => ‘E’, ‘GAG’ => ‘E’, # Glutamic Acid ‘UGG’ => ‘W’, # Tryptophan ‘CAC’ => ‘H’, # Histidine ‘AUG’ => ‘M’, # Methionine ‘CAU’ => ‘H’, # Histidine ); my ($protein) = “”; for(my $i=0;$i<length($rna)-2;$i+=3) { $codon = substr($rna,$i,3); $protein .= $genetic_code{$codon}; } print “Translated protein sequence is $protein”; #This program can also used for six reading frame, by changing the three #character shift in forward and reverse of the RNA sequence.