# C Program on statistical analysis

Statistics is the branch of mathematics used in many other fields. The basic tasks in statistical analysis are calculating mean of a distribution etc,. I wrote this C program which can perform all the basic operations of statistical analysis and also do curve fitting by method of least squares.

## Basic Concepts of Statistics

Distribution : Collection of data i.e. some values is called a distribution. For example, 2, 4, 8 is a distribution with 3 observations.

Mean : Mean of the distribution is the average value of observations in the distribution. There are three types of mean called Arithmetic Mean, Geometric Mean and Harmonic Mean. Consider the distribution 2, 4, 8 for example. Its Arithmetic, Geometric and Harmonic means are 4.67, 4 and 3.43 respectively and are calculated from the formulas:

These means are also called measures of central tendency.

Range : The difference between maximum and minimum observations of the distribution is called the range of the distribution.
Standard Deviation : Standard Deviation is the measure of deviation of the distribution from the mean of the distribution.
Variance : The square of standard deviation of the distribution is called the variance of the distribution.
The range, variance and standard deviation of the distribution 2, 4, 8 are 6, 6.22 and 2.49 respectively and are calculated from the formulas:

Range, Variance and Standard Deviation are also called measures of dispersion.

Coefficient of Correlation : It is a magnitude of relationship between two or more distributions. Its formula is:

All the above formulas have some common variables. They are:
1) 'n' for number of observations in a distribution.
2) 'x1', 'x2' up to 'xn' for 'n' observations in a distribution.
3) 'y1', 'y2' up to 'yn' for 'n' observations in another distribution.

Curve Fitting : Curve Fitting refers to the fitting of a distribution to a straight line, parabola etc,. Simply, curve fitting refers to the finding of a curve whose behavior is as near as possible to that of the considered distribution. This program does curve fitting using the method of least squares.

## Program

#include<stdio.h>
#include<conio.h>
#include<math.h>
typedef long double val;
val maximumvalue_obs(val A[],val b)
{
val c,d;
for(c=0,d=A[0];c<b;c++)
{
if(A[c]>d)
d=A[c];
}
return(d);
}
val minimumvalue_obs(val A[],val b)
{
val c,d;
for(c=0,d=A[0];c<b;c++)
{
if(A[c]<d)
d=A[c];
}
return(d);
}
val stddeviation_obs(val A[],val b)
{
val c,d,e,f;
for(c=0,d=0,e=0;c<b;c++)
{
d+=A[c]/b;
e+=(A[c]*A[c])/b;
}
f=sqrt(e-d*d);
return(f);
}
void starlinedisplay(void)
{
val a;
for(a=0;a<77;a++)
printf("*");
printf("\n ");
}
void main()
{
val choice1,choice2,a,b,c,d,e,f,g,h,i,j,k,l,m,A[10],B[10];
for(choice1=0;choice1<=5;choice1++)
{
printf("\n 1.Measures of Central Tendency ");
printf(" 2.Measures of Dispersion ");
printf(" 3.Curve Fitting \n\n by using the Method of Least Squares ");
printf(" 4.Coefficient of Correlation ");
printf(" 5.Off ");
printf("\n\n Enter your choice:");
scanf("%lf",&choice1);
if(choice1==1)
{
printf("\n MEASURES OF CENTRAL TENDENCY \n\n");
printf(" 1.Arithmetic Mean ");
printf(" 2.Geometric Mean ");
printf(" 3.Harmonic Mean ");
printf("\n\n Enter your choice:");
scanf("%lf",&choice2);
if(choice2==1)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
if(a==floor(a)&&a>0)
{
printf("\n Enter {observations} of data (A):");
for(b=0;b<a;b++)
scanf("%lf",&A[b]);
for(b=0,c=0;b<a;b++)
c+=A[b]/a;
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Arithmetic Mean of Data (A) is {%lf} \n\n ",c);
}
else
printf("\n Arithmetic Mean of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else if(choice2==2)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
if(a==floor(a)&&a>0)
{
printf("\n Enter {observations} of data (A):");
for(b=0;b<a;b++)
scanf("%lf",&A[b]);
for(b=0,c=1;b<a;b++)
c*=A[b];
if(c<0&&(long int)a%2==0)
{
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Geometric Mean of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else
{
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Geometric Mean of Data (A) is {%lf} \n\n ",c);
}
}
else
printf("\n Geometric Mean of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else if(choice2==3)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
if(a==floor(a)&&a>0)
{
printf("\n Enter {observations} of data (A):");
for(b=0;b<a;b++)
scanf("%lf",&A[b]);
for(b=0,c=0;b<a;b++)
c+=(A[b]==0)?(1):(0);
if(c==0)
{
for(b=0,d=0;b<a;b++)
d+=1/A[b];
e=a/c;
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Harmomic Mean of Data (A) is {%lf} \n\n ",e);
}
else
printf("\n Harmonic Mean of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else
printf("\n Harmonic Mean of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else
printf("\n {%lf} IS NOT A VALID CHOICE:TRY AGAIN \n\n ",choice2);
}
else if(choice1==2)
{
printf("\n MEASURES OF DEVIATION \n\n");
printf(" 1.Range ");
printf(" 2.Standard Deviation ");
printf("\n\n Enter your choice:");
scanf("%lf",&choice2);
if(choice2==1)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
if(a==floor(a)&&a>0)
{
printf("\n Enter {observations} of data (A):");
for(b=0;b<a;b++)
scanf("%lf",&A[b]);
c=maximumvalue_obs(A,a)-minimumvalue_obs(A,a);
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Range of Data (A) is {%lf} \n\n ",c);
}
else
printf("\n Range of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else if(choice2==2)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
if(a==floor(a)&&a>0)
{
printf("\n Enter {observations} of data (A):");
for(b=0;b<a;b++)
scanf("%lf",&A[b]);
c=stddeviation_obs(A,a);
printf("\n Data (A) is: \n\n");
for(b=0;b<a;b++)
printf(" (%lf) ",A[b]);
printf("\n\n Standard Deviation of Data (A) is {%lf} \n\n ",c);
}
else
printf("\n Standard Deviation of Data (A) IS UNDEFINED:TRY AGAIN \n\n ");
}
else
printf("\n {%lf} IS NOT A VALID CHOICE:TRY AGAIN \n\n ",choice2);
}
else if(choice1==3)
{
printf("\n CURVE FITTING USING THE METHOD OF LEAST SQUARES \n\n");
printf(" 1.(B)=x+y(A) ");
printf(" 2.(B)=x+y(A)+zpow[(A),2] ");
printf(" 3.(B)=xpow[(A),y] ");
printf(" 4.(B)=xpow[y,(A)] ");
printf("\n\n Enter your choice:");
scanf("%lf",&choice2);
if(choice2==1)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
printf("\n Enter {number} of observations of data (B):");
scanf("%lf",&b);
if(a==floor(a)&&b==floor(b)&&a>0&&b>0&&a==b)
{
printf("\n Enter {observations} of data (A):");
for(c=0;c<a;c++)
scanf("%lf",&A[c]);
printf("\n Enter {observations} of data (B):");
for(c=0;c<b;c++)
scanf("%lf",&B[c]);
for(c=0,d=0,e=0,f=0,g=0;c<a;c++)
{
d+=A[c];
e+=B[c];
f+=A[c]*A[c];
g+=A[c]*B[c];
}
if((a*f-d*d)!=0)
{
h=(e*f-g*d)/(a*f-d*d);
i=(a*g-e*d)/(a*f-d*d);
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n straight line is (B)={%lf}+{%lf}(A) \n\n ",h,i);
}
else
{
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n straight line IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
{
printf("\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n straight line IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else if(choice2==2)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
printf("\n Enter {number} of observations of data (B):");
scanf("%lf",&b);
if(a==floor(a)&&b==floor(b)&&a>0&&b>0&&a==b)
{
printf("\n Enter {observations} of data (A):");
for(c=0;c<a;c++)
scanf("%lf",&A[c]);
printf("\n Enter {observations} of data (B):");
for(c=0;c<b;c++)
scanf("%lf",&B[c]);
for(c=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0;c<a;c++)
{
d+=A[c];
e+=B[c];
f+=A[c]*A[c];
g+=A[c]*B[c];
h+=A[c]*A[c]*A[c];
i+=A[c]*A[c]*B[c];
j+=A[c]*A[c]*A[c]*A[c];
}
if((a*f*j+2*d*f*h-a*h*h-d*d*j-f*f*f)!=0)
{
k=(e*f*j-e*h*h-d*g*j+d*h*i+f*g*h-f*f*i)/(a*f*j+2*d*f*h-a*h*h-d*d*j-f*f*f);
l=(a*g*j-a*h*i-d*e*j+e*f*h+d*f*i-f*f*g)/(a*f*j+2*d*f*h-a*h*h-d*d*j-f*f*f);
m=(a*f*i-a*g*h-d*d*i+d*f*g+d*e*h-e*f*f)/(a*f*j+2*d*f*h-a*h*h-d*d*j-f*f*f);
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n parabola is (B)={%lf}+{%lf}(A)+{%lf}pow[(A),2] \n\n ",k,l,m);
}
else
{
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n parabola IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
{
printf("\n Best Fit of Data (A) and Data (B) to a ");
printf("\n\n parabola IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else if(choice2==3)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
printf("\n Enter {number} of observations of data (B):");
scanf("%lf",&b);
if(a==floor(a)&&b==floor(b)&&a>0&&b>0&&a==b)
{
printf("\n Enter {observations} of data (A):");
for(c=0;c<a;c++)
scanf("%lf",&A[c]);
printf("\n Enter {observations} of data (B):");
for(c=0;c<b;c++)
scanf("%lf",&B[c]);
for(c=0,d=0,e=0,f=0,g=0;c<a;c++)
{
d+=log(A[c]);
e+=log(B[c]);
f+=log(A[c])*log(A[c]);
g+=log(A[c])*log(B[c]);
}
if((a*f-d*d)!=0)
{
h=(e*f-g*d)/(a*f-d*d);
i=(a*g-e*d)/(a*f-d*d);
j=exp(h);
k=i;
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve is (B)={%lf}pow[(A),{%lf}] \n\n ",j,k);
}
else
{
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
{
printf("\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else if(choice2==4)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
printf("\n Enter {number} of observations of data (B):");
scanf("%lf",&b);
if(a==floor(a)&&b==floor(b)&&a>0&&b>0&&a==b)
{
printf("\n Enter {observations} of data (A):");
for(c=0;c<a;c++)
scanf("%lf",&A[c]);
printf("\n Enter {observations} of data (B):");
for(c=0;c<b;c++)
scanf("%lf",&B[c]);
for(c=0,d=0,e=0,f=0,g=0;c<a;c++)
{
d+=A[c];
e+=log(B[c]);
f+=A[c]*A[c];
g+=A[c]*log(B[c]);
}
if((a*f-d*d)!=0)
{
h=(e*f-g*d)/(a*f-d*d);
i=(a*g-e*d)/(a*f-d*d);
j=exp(h);
k=exp(i);
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve is (B)={%lf}pow[{%lf},(A)] \n\n ",j,k);
}
else
{
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
{
printf("\n Best Fit of Data (A) and Data (B) to an ");
printf("\n\n exponential curve IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
printf("\n {%lf} IS NOT A VALID CHOICE:TRY AGAIN \n\n ",choice2);
}
else if(choice1==4)
{
printf("\n Enter {number} of observations of data (A):");
scanf("%lf",&a);
printf("\n Enter {number} of observations of data (B):");
scanf("%lf",&b);
if(a==floor(a)&&b==floor(b)&&a>0&&b>0&&a==b)
{
printf("\n Enter {observations} of data (A):");
for(c=0;c<a;c++)
scanf("%lf",&A[c]);
printf("\n Enter {observations} of data (B):");
for(c=0;c<b;c++)
scanf("%lf",&B[c]);
for(c=0,d=0,e=0,f=0;c<a;c++)
{
d+=A[c]/a;
e+=B[c]/b;
f+=(A[c]*B[c])/b;
}
g=f-d*e;
if((stddeviation_obs(A,a)*stddeviation_obs(B,b))!=0)
{
h=g/(stddeviation_obs(A,a)*stddeviation_obs(B,b));
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Coefficient of Correlation of Data (A)");
printf(" and Data (B) is {%lf} \n\n ",h);
}
else
{
printf("\n Data (A) is: \n\n");
for(c=0;c<a;c++)
printf(" (%lf) ",A[c]);
printf("\n\n Data (B) is: \n\n");
for(c=0;c<b;c++)
printf(" (%lf) ",B[c]);
printf("\n\n Coefficient of Correlation of Data (A)");
printf(" and Data (B) IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else
{
printf("\n Coefficient of Correlation of Data (A)");
printf(" and Data (B) IS UNDEFINED:TRY AGAIN \n\n ");
}
}
else if(choice1==5)
{
printf("\n ");
starlinedisplay();
exit(0);
}
else
{
printf("\n {%lf} IS AN INVALID CHOICE:TRY AGAIN \n\n ",choice1);
choice1=0;
}
starlinedisplay();
}
getch();
}

## Explanation

This is a menu driven C program. There are two variables 'choice1' and 'choice2' which are used to select type of operation and operation respectively. This program doesn't give error outputs. In addition, it displays error message when error input(s) is/are given.