BioPHP - Oligonucleotide Frequency
Original code submitted by josebaCode bellow is covered by GNU GPL v2 license.
Description
Last change: 2011/04/27 11:01 | Recent Changes | Original descriptionThis minitool will count number of ocurrences of each oligonucleotide with length between 1 and 8 within sequence. Counting may be performed in one or in blth strands.
Code
Last change: 2011/04/27 11:01 | Recent Changes | Download | Original code and<html> <head><title>Oligonucleotide frequencies</title> </head> <body bgcolor=FFFFFF> <center> <h2>Frequency of nucleotides and oligonucleotides in a sequence</h2> This demo is limited to oligos 1-3 bases long, and a maximum input sequence of 10000 bp. <pZ> <?php exec("renice +10 ".getmypid()); // reduce priority for this script in linux error_reporting(0); // to avoid errors // The method used bellow may not be very efficient, but probably it is easy to understand // so it may be easy to customized for other porposes // For suggestion or problems, http://www.in-silico.com/contact.php // Let us know the name of the script if (!$_POST){ print_form (); }else{ // get data from form $sequence=strtoupper($_POST["sequence"]); $oligo_len=$_POST["len"]; $strands=$_POST["strands"]; // remove useless from sequence (non-letters and digists are removed) $sequence=preg_replace("/\W|\d/","",$sequence); // removed // when length of query sequence is 0 => error if (strlen($sequence)==0){die("Error: query sequence not provided. Plase go back andtry again.");} if (strlen($sequence)>1000000){die("Error: sequence is too long. Download the script from biophp.org and used it localy.");} // when length of query sequence is bellow 4^oligo_len => error (to avoid a lot of 0 frequencies); if (strlen($sequence)<pow(4,$oligo_len)){die("Error: query sequence must be at least 4^(length of oligo) to proceed.");} // when frequencies at both strands are requested, place sequence and reverse complement of sequence in one line if ($strands==2){$sequence.=" ".RevComp($sequence); } // compute request and save data in an array $result=find_oligos($sequence,$oligo_len); // print the form print_form ($sequence); //print out results print "<p>Frequencie of oligos with length $oligo_len<br><textarea cols=60 rows=50>"; foreach($result as $oligo => $frequency){ print "$oligo\t$frequency\n"; } print "\n</textarea>\n"; } // ###################################################################################################### // ##################################### FUNCTIONS ################################### // ###################################################################################################### function print_form($sequence){ print "<table bgcolor=DDDDFF cellpadding=10><tr><td>\n"; print "<form method=post action=\"".$_SERVER["PHP_SELF"]."\">\n"; print "<b>Copy your sequence in the textarea below</b>\n"; print "<br><font size=-1>(non coding characters, as number, returns or spaces will be removed)</font>\n"; print "<br><textarea name=sequence cols=60 rows=5>".chunk_split($sequence, 60)."</textarea>\n"; print "<br>Select length of oligonucleotides\n"; print "<select name=len>\n"; print "<option>1\n"; print "<option>2\n"; print "<option>3\n"; print "<option>4\n"; print "<option>5\n"; print "<option>6\n"; print "<option>7\n"; print "<option>8\n"; print "</select>*\n"; print "<br>Compute frequencies in \n"; print "<select name=strands>\n"; print "<option value=1>one strand\n"; print "<option value=2>both strands"; print "</select>\n"; print "<br><input type=submit value=\"Find oligonucleotide frequencies\">\n"; print "<br><font size=-1>This version does not work with degenerated nucleotides</font>\n"; print "</form>\n"; print "<font size=-1>* when selected length is one, frequency of nucleotides A, C, G and T will be reported.</font>\n"; print "</td></tr></table>\n"; } function find_oligos($sequence,$oligo_len){ //for oligos 1 bases long if ($oligo_len==1){ $oligos["A"] = substr_count($sequence,"A"); $oligos["C"] = substr_count($sequence,"C"); $oligos["G"] = substr_count($sequence,"G"); $oligos["T"] = substr_count($sequence,"T"); return $oligos; } //for oligos with at least two bases 2 bases $i=0; $len=strlen($sequence)-$oligo_len+1; while ($i<$len){ $seq=substr($sequence,$i,$oligo_len); $oligos_1step[$seq]++; $i++; } $base_a=array("A","C","G","T"); $base_b=$base_a; $base_c=$base_a; $base_d=$base_a; $base_e=$base_a; $base_f=$base_a; $base_g=$base_a; $base_h=$base_a; //for oligos 2 bases long if ($oligo_len==2){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ if ($oligos_1step[$val_a.$val_b]){ $oligos[$val_a.$val_b] = $oligos_1step[$val_a.$val_b]; }else{ $oligos[$val_a.$val_b] = 0; } }} } //for oligos 3 bases long if ($oligo_len==3){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ if ($oligos_1step[$val_a.$val_b.$val_c]){ $oligos[$val_a.$val_b.$val_c] = $oligos_1step[$val_a.$val_b.$val_c]; }else{ $oligos[$val_a.$val_b.$val_c] = 0; } }}} } //for oligos 4 bases long if ($oligo_len==4){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ foreach($base_d as $key_d => $val_d){ if ($oligos_1step[$val_a.$val_b.$val_c.$val_d]){ $oligos[$val_a.$val_b.$val_c.$val_d] = $oligos_1step[$val_a.$val_b.$val_c.$val_d]; }else{ $oligos[$val_a.$val_b.$val_c.$val_d] = 0; } }}}} } //for oligos 5 bases long if ($oligo_len==5){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ foreach($base_d as $key_d => $val_d){ foreach($base_e as $key_e => $val_e){ if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e]){ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e]; }else{ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e] = 0; } }}}}} } //for oligos 6 bases long if ($oligo_len==6){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ foreach($base_d as $key_d => $val_d){ foreach($base_e as $key_e => $val_e){ foreach($base_f as $key_f => $val_f){ if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f]){ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f]; }else{ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f] = 0; } }}}}}} } //for oligos 7 bases long if ($oligo_len==7){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ foreach($base_d as $key_d => $val_d){ foreach($base_e as $key_e => $val_e){ foreach($base_f as $key_f => $val_f){ foreach($base_g as $key_g => $val_g){ if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g]){ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g]; }else{ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g] = 0; } }}}}}}} } //for oligos 8 bases long if ($oligo_len==8){ foreach($base_a as $key_a => $val_a){ foreach($base_b as $key_b => $val_b){ foreach($base_c as $key_c => $val_c){ foreach($base_d as $key_d => $val_d){ foreach($base_e as $key_e => $val_e){ foreach($base_f as $key_f => $val_f){ foreach($base_g as $key_g => $val_g){ foreach($base_h as $key_h => $val_h){ if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h]){ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h]; }else{ $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h] = 0; } }}}}}}}} } return $oligos; } // computes the reverse complement of a sequence (only ACGT nucleotides are used) function RevComp($seq){ $seq=strrev($seq); $seq=str_replace("A", "t", $seq); $seq=str_replace("T", "a", $seq); $seq=str_replace("G", "c", $seq); $seq=str_replace("C", "g", $seq); $seq = strtoupper ($seq); return $seq; } ?> </center> </body> </html>