Info | Home

BioPHP - Oligonucleotide Frequency

Original code submitted by joseba
Code bellow is covered by GNU GPL v2 license.

Description

Last change: 2011/04/27 11:01 | Recent Changes | Original description
This minitool will count number of ocurrences of each oligonucleotide with
length between 1 and 8 within sequence. Counting may be performed in one
or in blth strands.  

Code

Last change: 2011/04/27 11:01 | Recent Changes | Download | Original code and demo
<html>
<head><title>Oligonucleotide frequencies</title>
</head>
<body bgcolor=FFFFFF>
<center>
<h2>Frequency of nucleotides and oligonucleotides in a sequence</h2>
This demo is limited to oligos 1-3 bases long, and a maximum input sequence of 10000 bp.
<pZ>
<?php
exec("renice +10 ".getmypid()); // reduce priority for this script in linux
error_reporting(0);             // to avoid errors

// The method used bellow may not be very efficient, but probably it is easy to understand
// so it may be easy to customized for other porposes
// For suggestion or problems, http://www.in-silico.com/contact.php
// Let us know the name of the script


if (!$_POST){
        print_form ();
}else{
        // get data from form
        $sequence=strtoupper($_POST["sequence"]);
        $oligo_len=$_POST["len"];
        $strands=$_POST["strands"];

        // remove useless from sequence (non-letters and digists are removed)
        $sequence=preg_replace("/\W|\d/","",$sequence);  // removed

        // when length of query sequence is 0 => error
        if (strlen($sequence)==0){die("Error: query sequence not provided. Plase go back andtry again.");}
        if (strlen($sequence)>1000000){die("Error: sequence is too long. Download the script from biophp.org and used it localy.");}

        // when length of query sequence is bellow 4^oligo_len => error (to avoid a lot of 0 frequencies);
       if (strlen($sequence)<pow(4,$oligo_len)){die("Error: query sequence must be at least 4^(length of oligo) to proceed.");}


        // when frequencies at both strands are requested, place sequence and reverse complement of sequence in one line
        if ($strands==2){$sequence.=" ".RevComp($sequence); }

        // compute request and save data in an array
        $result=find_oligos($sequence,$oligo_len);

        // print the form
        print_form ($sequence);

        //print out results
        print "<p>Frequencie of oligos with length $oligo_len<br><textarea cols=60 rows=50>";
        foreach($result as $oligo => $frequency){
                print "$oligo\t$frequency\n";
        }
        print "\n</textarea>\n";

}


// ######################################################################################################
// #####################################        FUNCTIONS             ###################################
// ######################################################################################################
function print_form($sequence){
        print "<table bgcolor=DDDDFF cellpadding=10><tr><td>\n";
        print "<form method=post action=\"".$_SERVER["PHP_SELF"]."\">\n";
        print "<b>Copy your sequence in the textarea below</b>\n";
        print "<br><font size=-1>(non coding characters, as number, returns or spaces will be removed)</font>\n";
        print "<br><textarea name=sequence cols=60 rows=5>".chunk_split($sequence, 60)."</textarea>\n";
        print "<br>Select length of oligonucleotides\n";
        print "<select name=len>\n";
        print "<option>1\n";
        print "<option>2\n";
        print "<option>3\n";
        print "<option>4\n";
        print "<option>5\n";
        print "<option>6\n";
        print "<option>7\n";
        print "<option>8\n";
        print "</select>*\n";
        print "<br>Compute frequencies in \n";
        print "<select name=strands>\n";
        print "<option value=1>one strand\n";
        print "<option value=2>both strands";
        print "</select>\n";
        print "<br><input type=submit value=\"Find oligonucleotide frequencies\">\n";
        print "<br><font size=-1>This version does not work with degenerated nucleotides</font>\n";
        print "</form>\n";
        print "<font size=-1>* when selected length is one, frequency of nucleotides A, C, G and T will be reported.</font>\n";
        print "</td></tr></table>\n";
}

function find_oligos($sequence,$oligo_len){
        //for oligos 1 bases long
        if ($oligo_len==1){
                $oligos["A"] = substr_count($sequence,"A");
                $oligos["C"] = substr_count($sequence,"C");
                $oligos["G"] = substr_count($sequence,"G");
                $oligos["T"] = substr_count($sequence,"T");
                return $oligos;
        }

        //for oligos with at least two bases 2 bases
        $i=0;
        $len=strlen($sequence)-$oligo_len+1;
        while ($i<$len){
            $seq=substr($sequence,$i,$oligo_len);
            $oligos_1step[$seq]++;
            $i++;
        }
        $base_a=array("A","C","G","T");
        $base_b=$base_a;
        $base_c=$base_a;
        $base_d=$base_a;
        $base_e=$base_a;
        $base_f=$base_a;
        $base_g=$base_a;
        $base_h=$base_a;

        //for oligos 2 bases long
        if ($oligo_len==2){
            foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                         if ($oligos_1step[$val_a.$val_b]){
                                $oligos[$val_a.$val_b] = $oligos_1step[$val_a.$val_b];
                        }else{
                                $oligos[$val_a.$val_b] = 0;
                        }
                        }}
        }
        //for oligos 3 bases long
        if ($oligo_len==3){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        if ($oligos_1step[$val_a.$val_b.$val_c]){
                                $oligos[$val_a.$val_b.$val_c] = $oligos_1step[$val_a.$val_b.$val_c];
                        }else{
                                $oligos[$val_a.$val_b.$val_c] = 0;
                        }
                        }}}
        }
        //for oligos 4 bases long
        if ($oligo_len==4){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        foreach($base_d as $key_d => $val_d){
                            if ($oligos_1step[$val_a.$val_b.$val_c.$val_d]){
                                $oligos[$val_a.$val_b.$val_c.$val_d] = $oligos_1step[$val_a.$val_b.$val_c.$val_d];
                            }else{
                                $oligos[$val_a.$val_b.$val_c.$val_d] = 0;
                            }
                        }}}}
        }
        //for oligos 5 bases long
        if ($oligo_len==5){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        foreach($base_d as $key_d => $val_d){
                        foreach($base_e as $key_e => $val_e){
                            if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e]){
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e];
                            }else{
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e] = 0;
                            }
                        }}}}}
        }
        //for oligos 6 bases long
        if ($oligo_len==6){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        foreach($base_d as $key_d => $val_d){
                        foreach($base_e as $key_e => $val_e){
                        foreach($base_f as $key_f => $val_f){
                            if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f]){
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f];
                            }else{
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f] = 0;
                            }
                        }}}}}}
        }
        //for oligos 7 bases long
        if ($oligo_len==7){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        foreach($base_d as $key_d => $val_d){
                        foreach($base_e as $key_e => $val_e){
                        foreach($base_f as $key_f => $val_f){
                        foreach($base_g as $key_g => $val_g){
                            if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g]){
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g];
                            }else{
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g] = 0;
                            }
                        }}}}}}}
        }
        //for oligos 8 bases long
        if ($oligo_len==8){
                        foreach($base_a as $key_a => $val_a){
                        foreach($base_b as $key_b => $val_b){
                        foreach($base_c as $key_c => $val_c){
                        foreach($base_d as $key_d => $val_d){
                        foreach($base_e as $key_e => $val_e){
                        foreach($base_f as $key_f => $val_f){
                        foreach($base_g as $key_g => $val_g){
                        foreach($base_h as $key_h => $val_h){
                            if ($oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h]){
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h] = $oligos_1step[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h];
                            }else{
                                $oligos[$val_a.$val_b.$val_c.$val_d.$val_e.$val_f.$val_g.$val_h] = 0;
                            }
                        }}}}}}}}
        }
        return $oligos;
}

// computes the reverse complement of a sequence (only ACGT nucleotides are used)
function RevComp($seq){
        $seq=strrev($seq);
        $seq=str_replace("A", "t", $seq);
        $seq=str_replace("T", "a", $seq);
        $seq=str_replace("G", "c", $seq);
        $seq=str_replace("C", "g", $seq);
        $seq = strtoupper ($seq);
return $seq;
}



?>
</center>
</body>
</html>