Login | Register

Info | Home

BioPHP - Describe data

Original code submitted by joseba
Code bellow is covered by GNU GPL v2 license.

Description

Last change: 2010/10/18 21:09 | Recent Changes | Original description
Will compute Mean, Median, Mode, Minimum, Maximum, Range, Sum x, Sum x2,
Variance, Standard Deviation, Skewness and Kurtosis from input values.
Some statiticals are described.

Code

Last change: 2010/10/18 21:09 | Recent Changes | Download | Original code and
<html>
<head>
<title>Describe data</title>
</head>
<body bgcolor=FFFFFF>
<center>
<h1>Describe data</h1>

<?php

// author    Joseba Bikandi
// license   GNU GPL v2
// biophp.org

error_reporting(0);

// if formula is requested, show it
if ($_GET["show"]=="formula"){print_formula();die;}

if (!$_POST){
        // when nothing is posted, this data is used as an example
        $values="1,5,3,2,4,3,4,2,3";
        $nums=array(1,5,3,2,4,3,4,2,3);
}else{
        // when data is posted, lets parse it to an array
        // get the data
        $values=$_POST["values"];
        // to remove spaces and word characters
        $values=preg_replace("/ /","",$values);

        // parse data to an array
        $nums=preg_split("/,/",$values,-1,PREG_SPLIT_NO_EMPTY);
}
print_form($values);
print_results($nums);


//########print form
function print_form($values){
?>
        <table width=600>
        <tr><td align=center bgcolor=DDDDFF>
                <form action="<? print $_SERVER["PHP_SELF"]; ?>" method=post>
                  Add comma separated data: <p>
                  <input type=text name=values value="<? print $values; ?>" size=60><p>
                  <input type=submit value="Describe data">
                </form>
        </td></tr>
        </table>
<?
}
//######## print results
// this function requests computation of variables
function print_results($nums){
?>
        <table width=600>

        <tr><td>N:</td><td><? print sizeof($nums); ?></td></tr>

        <tr><td><a href=?show=formula>Mean:</a></td><td><? print mean($nums); ?></td></tr>
        <tr><td><a href=?show=formula>Median:</a></td><td><? print median($nums); ?></td></tr>
        <tr><td><a href=?show=formula>Mode:</a></td><td><? print mode($nums); ?></td></tr>

        <tr><td>Minimum:</td><td><? print min($nums); ?></td></tr>
        <tr><td>Maximum:</td><td><? print max($nums); ?></td></tr>
        <tr><td>Range:</td><td><? print max($nums)-min($nums); ?></td></tr>

        <tr><td>Sum x:</td><td><? print sum($nums); ?></td></tr>
        <tr><td>Sum x<sup>2</sup>:</td><td><? print sum2($nums); ?></td></tr>

        <tr><td><a href=?show=formula>Variance:</a></td><td><? print variance($nums); ?></td></tr>
        <tr><td><a href=?show=formula>Standard Deviation:</a></td><td><? print sd($nums); ?></td></tr>

        <tr><td><a href=?show=formula>Skewness</a>:</td><td><? print skewness($nums); ?></td></tr>
        <tr><td><a href=?show=formula>Kurtosis</a>:</td><td><? print kurtosis($nums); ?></td></tr>

        </table>
<?
}

// #################### print example
function print_formula(){
?>
        <table width=600>
        <tr><td>



                <b>Mean</b>
                <p>A type of average, calculated by dividing the sum of a set of values by
                the number of values.
                <p> mean = Sum(Xi)/N
                <hr>

                <b>Median</b>
                <p>A type of average, found by arranging the values in order and then selecting
                the one in the middle. If the total number of values in the sample is even, then
                the median is the mean of the two middle numbers.
                <hr>

                <b>Mode</b>
                <p>The most frequent value in a group of values.
                <hr>

                <b>Variance</b>
                <p>The average of the square of the distance of each data point from the mean
                <p> variance = Sum((Xi-mean)^2))/N
                <hr>
                
                <b>Standard Deviation</b>
                <p>The standard deviation is the square root of the variance.
                <p> sd = sqrt(variance)
                <hr>

                <b>Skewness</b>
                <p>Skewness is a measure of symmetry, or more precisely, the lack of symmetry.
                A distribution, or data set, is symmetric if it looks the same to the left
                and right of the center point.
                <p> skewness = (Sum((Xi-mean)^3))/((N-1)*SD^3)

                <p>Skewness of symmetric data is zero
                <hr>
                
                <b>Kurtosis</b>
                <p>Kurtosis is a measure of whether the data are peaked or flat relative
                to a normal distribution. That is, data sets with high kurtosis tend
                to have a distinct peak near the mean, decline rather rapidly, and have
                heavy tails. Data sets with low kurtosis tend to have a flat top near
                the mean rather than a sharp peak. A uniform distribution would be the
                extreme case.
                <p> kurtosis = (Sum((Xi-mean)^4))/((N-1)*SD^4) -3

                <p>The standard normal distribution has a kurtosis of zero. Positive kurtosis
                indicates a "peaked" distribution and negative kurtosis indicates a "flat"
                distribution.
                <p>
                <div align=right><a href="<? print $_SERVER["PHP_SELF"]; ?>">Go back to this tool</a></div>
        </td></tr>
        </table>
<?
}

function sum ($nums) {
        $temp = 0;
        foreach ($nums as $key => $val) {
                $temp += $val;
        }
        return $temp;
}

function sum2 ($nums) {
        $temp = 0;
        foreach ($nums as $key => $val) {
                $temp += pow($val,2);
        }
        return $temp;
}


function mean ($nums) {
        $temp = 0;
        foreach ($nums as $key => $val) {
                $temp += $val;
        }
        return  $temp/sizeof($nums);;
}

function median ($nums) {
        $n = count($nums);
        sort($nums);
        if ($n & 1) {
                return $nums [($n-1)/2];
        } else {
                return ($nums [($n-1)/2] + $nums [$n/2])/2;
        }
}

function mode ($nums) {
        foreach ($nums as $key => $val) {
                $counts[$val]++;
        }
        arsort($counts);
        if (count($nums)==count($counts)){
                return "frequency for each data is 1";
        }else{
                return key($counts);
        }

}


function variance ($nums) {
        $n = count($nums);
        $mean = mean($nums);
        foreach ($nums as $key => $val) {
                $temp += pow($val - $mean, 2);
        }
        return $temp/$n;
}

function sd ($nums) {
        return sqrt(variance($nums));
}


function skewness ($nums) {
        $n = count($nums);
        $mean = mean($nums);
        $sd = sd($nums);
        foreach ($nums as $key => $val) {
                $temp += pow(($val - $mean), 3);
        }
        $s = $temp/(($n - 1)*pow($sd,3));
        return $s;
}

function kurtosis ($nums) {
        $n = count($nums);
        $mean = mean($nums);
        $sd = sd($nums);
        foreach ($nums as $key => $val) {
                $temp += pow(($val - $mean), 4);
        }
        $s = ($temp/(($n - 1)*pow($sd,4)))-3;
        return $s;
}

?>
<hr>
Source code is available at <a href=http://www.biophp.org/stats/describe_data/>biophp.org</a>
</center>
</body>
</html>