BioPHP - Sequence Manipulation and Data
Original code submitted by josebaCode bellow is covered by GNU GPL v2 license.
Description
Last change: 2011/04/27 11:01 | Recent Changes | Original descriptionThe sequence is manipulate to remove non-coding characters, to get reverse and complement strands, to obtain both strands, to calculate G+C content and nucleotide composition, or may be converted to RNA.
Code
Last change: 2011/04/27 11:01 | Recent Changes | Download | Original code and<?php // author Joseba Bikandi // license GNU GPL v2 // source code available at biophp.org // the code in the top will manipulated the input sequence // in the middle of the file is located the form // in the botton are located the functions used in this script //############################################################################ //################# lets manipulated the sequence ################# //############################################################################ if($_POST){ $seq=$_POST["seq"]; $action=$_POST["action"]; // remove non coding (works by default) $seq=remove_non_coding($seq); // if subsequence is requested if ($_POST["start"] or $_POST["end"]){ if($_POST["start"]!=""){$start=$_POST["start"]-1;}else{$start=0;} if($_POST["end"]!=""){$end=$_POST["end"];}else{$end=strlen($seq);} $seq=substr($seq,$start,$end-$start); } // length of sequence $seqlen=strlen($seq); if($action=="reverse"){ // reverse the sequence $seq=strrev($seq); } if($action=="complement"){ // get the complementary sequence $seq=Complement($seq); } if($action=="reverse_and_complement"){ // reverse the sequence $seq=strrev($seq); // get the complementary sequence $seq=Complement($seq); } $result=""; if($action=="display_both_strands"){ // get a string with results $result=Display_both_strands($seq); } if($action=="toRNA"){ // get a string with results $result=toRNA($seq); } if($_POST["GC"]==1){ // calculate G+C content $result.=GC_content($seq); } if ($_POST["ACGT"]==1){ // calculate nucleotide conposition $result.=ACGT_content($seq); } // 70 characters per line before output $seq = chunk_split($seq, 70); }else{ $seq=""; } //############################################################################ //################# we have already manipulated the sequence ################# //############################# bellow is the form ########################### //############################################################################ ?> <html> <head> <title>DNA sequence manipulation</title> </head> <body bgcolor=FFFFFF> <center> <H2>DNA sequence manipulation</H2> <form method='post' action="<? print $_SERVER["PHP_SELF"]; ?>"> <table cellpadding=5 width=650 border=0 bgcolor=DDFFFF> <tr><td> <B>Sequence <?php if($seq){print "($seqlen bp)";} ?>:</B> </td></tr> <tr><td> <textarea name='seq' rows='8' cols='80'><?php print $seq ?></textarea> </td></tr> <tr><td> <select name=action size=7> <option value=remove_non_coding>Remove no coding characters <option value=reverse>Reverse sequence <option value=complement>Complement sequence <option value=reverse_and_complement>Reverse and Complement of sequence <option value=display_both_strands>Display Double-stranded Sequence <option value=toRNA>Convert to RNA </select> <br>Select subsequence from position <input type=text name=start size=4> to <input type=text name=end size=4> (both included) </td></tr> <tr><td> <input type=checkbox name=GC value=1> G+C content <br><input type=checkbox name=ACGT value=1> Nucleotide composition </td></tr> <tr><td align=center> <input type='submit' value='Sutmit'> </td></tr> <?php if($other_results!=""){ print "<tr><td align=center>"; print "<textarea rows=10 cols=80>$other_results</textarea>"; print "</td></tr>"; } ?> </table> </form> <table cellpadding=5 width=650 border=0> <tr><td> <pre><?php print $result; ?></pre> </td></tr> <tr><td> <b>NOTES</b>: <br>Non-coding characters will be removed by default, and X is replaced by N. <br><a href=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=7957164&dopt=abstract>NC-UIBMB</a> codes are used as a reference. <p>Source code is available at <a href=http://www.biophp.org/minitools/sequence_manipulation_and_data>BioPHP.org</a> </td></tr> </table> </center> </body> </html> <?php //############################################################################ //################# Functions used in this script ############################ //############################################################################ function Complement($seq){ // change the sequence to upper case $seq = strtoupper ($seq); // the system used to get the complementary sequence is simple but fas $seq=str_replace("A", "t", $seq); $seq=str_replace("T", "a", $seq); $seq=str_replace("G", "c", $seq); $seq=str_replace("C", "g", $seq); $seq=str_replace("Y", "r", $seq); $seq=str_replace("R", "y", $seq); $seq=str_replace("W", "w", $seq); $seq=str_replace("S", "s", $seq); $seq=str_replace("K", "m", $seq); $seq=str_replace("M", "k", $seq); $seq=str_replace("D", "h", $seq); $seq=str_replace("V", "b", $seq); $seq=str_replace("H", "d", $seq); $seq=str_replace("B", "v", $seq); // change the sequence to upper case again for output $seq = strtoupper ($seq); return $seq; } function remove_non_coding($seq) { // change the sequence to upper case $seq=strtoupper($seq); // remove non-words (\W), con coding ([^ATGCYRWSKMDVHBN]) and digits (\d) from sequence $seq=preg_replace("/\W|[^ATGCYRWSKMDVHBN]|\d/","",$seq); // replace all X by N (to normalized sequences) $seq=preg_replace("/X/","N",$seq); return $seq; } function Display_both_strands($seq) { // get the complementary sequence $revcomp=Complement($seq); $result=""; $i=0; while ($i<strlen($seq)){ if(strlen($seq)<($i+70)){$j=strlen($seq);}else{$j=$i;} $result.=substr($seq,$i,70)."\t$j\n"; $result.=substr($revcomp,$i,70)."\t$j\n"; $result.="\n"; //line break $i+=70; } return $result; } function GC_content($seq) { $number_of_G=substr_count($seq,"G"); $number_of_C=substr_count($seq,"C"); $gc_porcentaje=round(100*($number_of_G+$number_of_C)/strlen($seq),2); return "G+C %: $gc_porcentaje\n\n"; } function toRNA($seq) { // replace T by U $seq=preg_replace("/T/","U",$seq); $seq=chunk_split($seq, 70); return $seq; } function ACGT_content($seq) { $result="Nucleotide composition"; $result.="\nA: ".substr_count($seq,"A"); $result.="\nC: ".substr_count($seq,"C"); $result.="\nG: ".substr_count($seq,"G"); $result.="\nT: ".substr_count($seq,"T"); if (substr_count($seq,"Y")>0){$result.="\nY: ".substr_count($seq,"Y");} if (substr_count($seq,"R")>0){$result.="\nR: ".substr_count($seq,"R");} if (substr_count($seq,"W")>0){$result.="\nW: ".substr_count($seq,"W");} if (substr_count($seq,"S")>0){$result.="\nS: ".substr_count($seq,"S");} if (substr_count($seq,"K")>0){$result.="\nK: ".substr_count($seq,"K");} if (substr_count($seq,"M")>0){$result.="\nM: ".substr_count($seq,"M");} if (substr_count($seq,"D")>0){$result.="\nD: ".substr_count($seq,"D");} if (substr_count($seq,"V")>0){$result.="\nV: ".substr_count($seq,"V");} if (substr_count($seq,"H")>0){$result.="\nH: ".substr_count($seq,"H");} if (substr_count($seq,"B")>0){$result.="\nB: ".substr_count($seq,"B");} if (substr_count($seq,"N")>0){$result.="\nN: ".substr_count($seq,"N");} $result.="\n\n"; return $result; } //############################################################################ //############################### End of fuctions ############################ //############################################################################ ?>