N $seqb=preg_replace("/X/", "N", $seqb); // substitute X -> N // LIMIT SEQUENCE LENGTH TO LIMIT MEMORY USAGE // THIS SCRIPT CREATES A BIG ARRAY THAT REQUIRES A HUGE AMOUNT OF MEMORY // DO NOT USED SEQUENCES LONGER THAN 700 BASES EACH (1400 BASES FOR BOTH SEQUENCES) // IN THIS DEMO, THE LIMIT HAS BEEN SET UP IN 300 BASES $limit=300; if ((strlen($seqa)+strlen($seqb))>$limit){die ("Error:
The maximum length of code accepted for both sequences is $limit nucleotides");} // CHECK WHETHER THEY ARE DNA OR PROTEIN, AND ALIGN SEQUENCES if ((substr_count($seqa,"A")+substr_count($seqa,"C")+substr_count($seqa,"G")+substr_count($seqa,"T"))>(strlen($seqa)/2)){ // if A+C+G+T is at least half of the sequence, it is a DNA $alignment=align_DNA($seqa,$seqb); }else{ // else is protein $alignment=align_proteins($seqa,$seqb); } // EXTRACT DATA FROM ALIGNMENT $align_seqa=$alignment["seqa"]; $align_seqb=$alignment["seqb"]; // COMPARE ALIGNMENTS $compare=compare_alignment($align_seqa,$align_seqb); // PRINT RESULTS print "
New alignment
\n"; print "

Alignment of two DNA sequences

\n"; print "

\n";
$i=0;
while($istrlen($align_seqa)){$ii=strlen($align_seqa);}
        print substr($align_seqa,$i,100)."  $ii\n";
        print substr($compare,$i,100)."\n";
        print substr($align_seqb,$i,100)."  $ii\n\n";
        $i+=100;
}
print "

\n"; print "
"; print "
"; print $_POST["id1"]."

"; print $_POST["id2"]."

"; print "


"; print "
\n"; print "

"; // END PRINT RESULTS // ######################################################################################## // ############################### Functions #################################### // ######################################################################################## function align_DNA($seqa,$seqb){ $match = 2; $mismatch = -1; $gap = -4; $a = preg_split('//', $seqa, -1, PREG_SPLIT_NO_EMPTY); $b = preg_split('//', $seqb, -1, PREG_SPLIT_NO_EMPTY); $maxa=sizeof($a); $maxb=sizeof($b); $lenn=max ($maxa,$maxb); // Creación de la matriz // He reducido el código para hacerlo mas simple y rapido, pero tan solo ahorra un 20% del tiempo // Con matrices muy grandes, PHP no sabe trabajar muy bien (es poco eficaz). $mx=0; for ($i=0;$i<$maxa;$i++){ for ($j=0;$j<$maxb;$j++){ if($b[$j]==$a[$i]){ $x=$matriz[$j-1][$i-1]+$match; }else{ $x=max (0,$matriz[$j-1][$i-1]-1,$matriz[$j][$i-1]-4,$matriz[$j-1][$i]-4); } $matriz[$j][$i]=$x; if ($mx<$x){$mx=$x; $mj=$j; $mi=$i;} } } // Matriz terminada $j=$mj; $i=$mi; $matrizz[$j][$i]=1; while ($i>0 or $j>0): $aa=$matriz[$j-1][$i-1]; $ab=$matriz[$j][$i-1]; $ac=$matriz[$j-1][$i]; if($aa<>'//' or $aa==0){ if($aa>=$ab and $aa>=$ac){ $j=$j-1; $i=$i-1; } if($ab>$aa){$i=$i-1;} if($ac>$aa){$j=$j-1;} }else{ if($ab<>'//'){$i=$i-1;} if($ac<>'//'){$j=$j-1;} } if($j<0){$j=0;} if($i<0){$i=0;} $matrizz[$j][$i]=1; endwhile; $j=$mj; $i=$mi; while ($i'//'){ if($aa>=$ab and $aa>=$ac){ $j=$j+1; $i=$i+1; } if($ab>$aa){$i=$i+1;} if($ac>$aa){$j=$j+1;} }else{ if($ab<>'//'){$i=$i+1;} if($ac<>'//'){$j=$j+1;} } if($j>$lenn){$j=$lenn;} if($i>$lenn){$i=$lenn;} $matrizz[$j][$i]=1; endwhile; $j=0; $i=0; $t=1; while ($i$lenn){ $lenn=$maxb; } $b=$arrayb; for ($i=0;$i<$maxa;$i++){ $matriz[0][$i]=$PAM250["$a[$i]$b[0]"]; } for ($i=0;$i<$maxb;$i++){ $matriz[$i][0]=$PAM250["$b[$i]$a[0]"]; } for ($i=1;$i<$maxa;$i++){ for ($j=1;$j<$maxb;$j++){ if($b[$j]==$a[$i]){ $x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$match; }else{ $x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$mismatch; $y=$matriz[$j][$i-1]+$gap; if($y>$x){$x=$y;} $y=$matriz[$j-1][$i]+$gap; if($y>$x){$x=$y;} if($x<0){$x=0;} } $matriz[$j][$i]=$x; $x=0; }//end for $j } $mx=0; for ($i=0;$i<$maxa;$i++){ for ($j=0;$j<$maxb;$j++){ if($mx<$matriz[$j][$i]){ $mx=$matriz[$j][$i]; $mj=$j; $mi=$i; } } } $j=$mj; $i=$mi; $matrizz[$j][$i]=1;//matrixx(n, m) = 1 while ($i>0 or $j>0): $aa=$matriz[$j-1][$i-1];//a = matrix(n - 1, m - 1) $ab=$matriz[$j][$i-1];//b = matrix(n, m - 1) $ac=$matriz[$j-1][$i];//c = matrix(n - 1, m) if($aa<>'//' or $aa==0){//If a <> "" Then if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then $j=$j-1;// n = n - 1: m = m - 1 $i=$i-1; } if($ab>$aa){//If b > a Then m = m - 1 $i=$i-1; } if($ac>$aa){//If c > a Then n = n - 1 $j=$j-1; } }else{//If a = "" Then if($ab<>'//' or $ab==0){// If b <> "" Then m = m - 1 $i=$i-1; } if($ac<>'//' or $ac==0){// If c <> "" Then n = n - 1 $j=$j-1; } }//End If if($j<0){//If n = 0 Then n = 1 $j=0; } if($i<0){//If m = 0 Then m = 1 $i=0; } $matrizz[$j][$i]=1;//matrixx(n, m) = 1 endwhile; $j=$mj;//n = mn $i=$mi;//m = mm while ($i'//' or $aa==0){//If a <> "" Then if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then $j=$j+1;// n = n - 1: m = m - 1 $i=$i+1; } if($ab>$aa){//If b > a Then m = m - 1 $i=$i+1; } if($ac>$aa){//If c > a Then n = n - 1 $j=$j+1; } }else{//If a = "" Then if($ab<>'//' or $ab==0){// If b <> "" Then m = m - 1 $i=$i+1; } if($ac<>'//' or $ac==0){// If c <> "" Then n = n - 1 $j=$j+1; } } if($j>$lenn){//If n > lenn Then n = lenn $j=$lenn; } if($i>$lenn){//If m > lenn Then m = lenn $i=$lenn; } $matrizz[$j][$i]=1;//matrixx(n, m) = 1 endwhile; $j=0; $i=0; $t=1; while ($i Alignment of two DNA or RNA sequences

">
Alignment of two DNA, RNA or protein sequences 

C

C

Freely downloable PHP script at biophp.org
Alignment of two DNA or protein sequences

Alignment of two DNA sequences


This script has been adapted to PHP scripting languaje from the original version written in Visual Basic for Applications and available on a Excel page here .

The alignment method is the Smith-Waterman type (Smith, T. F., & M. S. Waterman. 1981. Identification of common molecular subsequences. Journal of Molecular Biology 147:195-197. PubMed). To run the program, paste the DNA or RNA sequences in the form and submit the data.

Alignment is shown on the response page, and sequences with gabs are at the bottom.

%nbsp;

Developed by:

Dr. Jose Manuel González Hernández
Departamento de Microbiología y Biología Celular
Facultad de Farmacia
Universidad de La Laguna
La Laguna, Tenerife
Spain

For suggestions or problems, contact us