// author Joseba Bikandi
// license GNU GPL v2
// Developers: consider adding new options to the form
// the code in the top will manipulated the input protein sequence
// in the middle of the file is located the form
// in the bottom are located the functions used in this script
//############################################################################
//################# lets manipulated the sequence #################
//############################################################################
if ($_SERVER["QUERY_STRING"]=="info"){
print_information();die();
}elseif($_POST){
$seq=$_POST["seq"];
$pH=$_POST["pH"];
$data_source=$_POST["data_source"];
$result="";
// remove non coding (works by default)
$seq=remove_non_coding_prot($seq);
// we will save the original sequence, just in case subsequence is used
$original_seq = chunk_split($seq, 70);
// if subsequence is requested
if ($_POST["start"] or $_POST["end"]){
if($_POST["start"]!=""){$start=$_POST["start"]-1;}else{$start=0;}
if($_POST["end"]!=""){$end=$_POST["end"];}else{$end=strlen($seq);}
$seq=substr($seq,$start,$end-$start);
$result.="Subsequence used for calculations:
".chunk_split($seq, 70);
}
// length of sequence
$seqlen=strlen($seq);
// compute requested parameter
if ($_POST["composition"]==1 or $_POST["molweight"]==1 or $_POST["abscoef"]==1 or $_POST["charge"]==1 or $_POST["charge2"]==1){
// calculate nucleotide conposition
$aminoacid_content=aminoacid_content($seq);
// prepare nucleotide composition to be printed out
if ($_POST["composition"]==1){
$result.="
Aminoacid composition of protein:
".print_aminoacid_content($aminoacid_content);
}
}
if ($_POST["molweight"]==1 or $_POST["abscoef"]==1){
// calculate molecular weight of protein
$molweight=protein_molecular_weight($seq,$aminoacid_content);
if ($_POST["molweight"]==1){
$result.="
Molecular weight:
$molweight Daltons";
}
}
if ($_POST["abscoef"]==1){
$abscoef=molar_absorption_coefficient_of_prot($seq,$aminoacid_content,$molweight);
$result.="
Molar Absorption Coefficient at 280 nm:
".round($abscoef,2);
}
if ($_POST["charge"]==1){
// get pk values for charged aminoacids
$pK=pK_values ($data_source);
// calculate isoelectric point of protein
$charge=protein_isoelectric_point($pK,$aminoacid_content);
$result.="
Isoelectric point of sequence ($data_source):
".round($charge,2);
}
if ($_POST["charge2"]==1){
// get pk values for charged aminoacids
$pK=pK_values ($data_source);
// calculate charge of protein at requested pH
$charge=protein_charge($pK,$aminoacid_content,$pH);
$result.="
Charge of sequence at pH = $pH ($data_source):
".round($charge,2);
}
// colored sequence based in plar/non-plar/charged aminoacids
if ($_POST["3letters"]==1){
// get the colored sequence (html code)
$three_letter_code=seq_1letter_to_3letter($seq);
// add to result
$result.="
Sequence as three letters aminoacid code:
".$three_letter_code;
}
// 50 characters per line before output
$seq = chunk_split($seq, 70);
// colored sequence based in polar/non-plar/charged aminoacids
if ($_POST["type1"]==1){
// get the colored sequence (html code)
$colored_seq=protein_aminoacid_nature1($seq);
// add to result
$result.="
Polar, Nonpolar or Charged aminoacids:
".$colored_seq;
}
// colored sequence based in polar/non-plar/charged aminoacids
if ($_POST["type2"]==1){
// get the colored sequence (html code)
$colored_seq=protein_aminoacid_nature2($seq);
// add to result
$result.="
Polar, small non-polar, hydrophobic, negatively or positively charged aminoacids:
".$colored_seq;
}
}else{
$seq="ARNDCEQGHILKMFPSTWYVX*";
$original_seq=$seq;
$seqlen=strlen($seq);
$pH=7.0;
}
//############################################################################
//################# we have already manipulated the sequence #################
//############################# bellow is the form ###########################
//############################################################################
?>
Protein sequence information
$charge\t$pH";
// modify pH for next round
if ($charge > 0) {$pH = $pH + $delta;}else{$pH = $pH - $delta;}
// reduce value for $delta
$delta = $delta/2;
}
// return pH at which charge=0 (the isoelectric point) with two decimals
return round($pH,2);
}
function partial_charge($val1,$val2){
// compute concentration ratio
$cr=pow(10,$val1-$val2);
// compute partial charge
$pc=$cr/($cr+1);
return $pc;
}
// computes protein charge at corresponding pH
function protein_charge($pK,$aminoacid_content,$pH){
$charge = partial_charge($pK["N_terminus"],$pH);
$charge+= partial_charge($pK["K"],$pH)*$aminoacid_content["K"];
$charge+= partial_charge($pK["R"],$pH)*$aminoacid_content["R"];
$charge+= partial_charge($pK["H"],$pH)*$aminoacid_content["H"];
$charge-= partial_charge($pH,$pK["D"])*$aminoacid_content["D"];
$charge-= partial_charge($pH,$pK["E"])*$aminoacid_content["E"];
$charge-= partial_charge($pH,$pK["C"])*$aminoacid_content["C"];
$charge-= partial_charge($pH,$pK["Y"])*$aminoacid_content["Y"];
$charge-= partial_charge($pH,$pK["C_terminus"]);
return $charge;
}
function pK_values ($data_source){
// pK values for each component (aa)
if ($data_source=="EMBOSS"){
$pK=array(
"N_terminus"=>8.6,
"K"=>10.8,
"R"=>12.5,
"H"=>6.5,
"C_terminus"=>3.6,
"D"=>3.9,
"E"=>4.1,
"C"=>8.5,
"Y"=>10.1
);
}elseif ($data_source=="DTASelect"){
$pK=array(
"N_terminus"=>8,
"K"=>10,
"R"=>12,
"H"=>6.5,
"C_terminus"=>3.1,
"D"=>4.4,
"E"=>4.4,
"C"=>8.5,
"Y"=>10
);
}elseif ($data_source=="Solomon"){
$pK=array(
"N_terminus"=>9.6,
"K"=>10.5,
"R"=>125,
"H"=>6.0,
"C_terminus"=>2.4,
"D"=>3.9,
"E"=>4.3,
"C"=>8.3,
"Y"=>10.1
);
}
return $pK;
}
function print_aminoacid_content($aminoacid_content) {
$results="";
foreach($aminoacid_content as $aa => $count){
$results.="$aa\t".seq_1letter_to_3letter ($aa)."\t$count\n";
}
return $results;
}
function aminoacid_content($seq) {
$array=array("A"=>0,"R"=>0,"N"=>0,"D"=>0,"C"=>0,"E"=>0,"Q"=>0,"G"=>0,"H"=>0,"I"=>0,"L"=>0,
"K"=>0,"M"=>0,"F"=>0,"P"=>0,"S"=>0,"T"=>0,"W"=>0,"Y"=>0,"V"=>0,"X"=>0,"*"=>0);
for($i=0; $i0){$result.="".substr($seq,$i,1)."";continue;}
// polar aminoacids, magenta
if (strpos(" SCTNQHYW",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
// charged aminoacids, red
if (strpos(" DEKR",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
}
return $result;
}
function protein_aminoacid_nature2($seq){
$result="";
for($i=0; $i0){$result.="".substr($seq,$i,1)."";continue;}
// Small hydrophobic (green)
if (strpos(" CVILPFYMW",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
// Polar
if (strpos(" DQH",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
// Negatively charged
if (strpos(" NE",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
// Positively charged
if (strpos(" KR",substr($seq,$i,1))>0){$result.="".substr($seq,$i,1)."";continue;}
}
return $result;
}
// Chemical group/aminoacids:
// L/GAVLI Amino Acids with Aliphatic R-Groups
// H/ST Non-Aromatic Amino Acids with Hydroxyl R-Groups
// M/NQ Acidic Amino Acids
// R/FYW Amino Acids with Aromatic Rings
// S/CM Amino Acids with Sulfur-Containing R-Groups
// I/P Imino Acids
// A/DE Acidic Amino Acids
// C/KRH Basic Amino Acids
// */*
// X/X
function protein_aminoacids_chemical_group($amino_seq){
$chemgrp_seq = "";
$ctr = 0;
while(1)
{
$amino_letter = substr($amino_seq, $ctr, 1);
if ($amino_letter == "") break;
if (strpos(" GAVLI", $amino_letter)>0) $chemgrp_seq .= "L";
elseif (($amino_letter == "S") or ($amino_letter == "T")) $chemgrp_seq .= "H";
elseif (($amino_letter == "N") or ($amino_letter == "Q")) $chemgrp_seq .= "M";
elseif (strpos(" FYW", $amino_letter)>0) $chemgrp_seq .= "R";
elseif (($amino_letter == "C") or ($amino_letter == "M")) $chemgrp_seq .= "S";
elseif ($amino_letter == "P") $chemgrp_seq .= "I";
elseif (($amino_letter == "D") or ($amino_letter == "E")) $chemgrp_seq .= "A";
elseif (($amino_letter == "K") or ($amino_letter == "R") or ($amino_letter == "H"))
$chemgrp_seq .= "C";
elseif ($amino_letter == "*") $chemgrp_seq .= "*";
elseif ($amino_letter == "X" or $amino_letter == "N") $chemgrp_seq .= "X";
else die("Invalid amino acid symbol in input sequence.");
$ctr++;
}
return $chemgrp_seq;
}
function print_information (){
?>
NOTES:
Non-coding characters will be removed by default.
NC-UIBMB codes are used as a reference.
Computation:
Molecular Weight:
MW =(A*71.07)+(R*156.18)+(nN*114.08)+(nD*115.08)+(nC*103.10)+
+(nQ*128.13)+(nE*129.11)+(nG*57.05)+(nH*137.14)+(nI*113.15)+
+(nL*113.15)+(nK*128.17)+(nM*131.19)+(nF*147.17)+(nP*97.11)+
+(nS*87.07)+(nT*101.10)+(nW*186.20)+(nY*163.17)+(nV*99.13)+18.02
Molar absorption coefficient: Pace et al., 1995
Isoelectric point estimation: Tabb D., 2003
Type of aminoacid (1):
Polar: SCTNQHYW
Non-Polar: GAPVILFM
Charged: DEKR
Type of aminoacid (2):
Small non-polar: SCTNQHYW (Yellow)
Hydrophobic: CVILPFYMW (Green)
Polar: DQH (Magenta)
Negatively charged: NE (Red)
Positively charged: KR (Blue)
">Use the tool
|
}
//############################################################################
//############################### End of fuctions ############################
//############################################################################
?>
Source code is available at BioPHP.org