//THIS IS CODE FOR COMPRESSION & DECOMPRESSION USING 2 BIT
#include"header.h"//HEADER FILE INCLUDE ALL DECLARATION...& header file in standard path <> sign means..copiler will search ur header file in standard path...but .. "" sign means ur headr file will 1st be searched in current directory & then ..if not found in current directory...then it will be searched in standard path i.e. /bin:$PATH...
int decompress()//decompression is done in reverse to compression
{
int i;
unsigned char ch=0,cp=0;
count=1;
printf("IN DECOMPRESS\n");
fd=open("desti",O_RDONLY);//open compressed file is rdonly mode
printf("fd=%d\n",fd);
count=1;
while(count)//read character from file untill whole the characters from file are decompressed
{
count=read(fd,&ch,1);//read 1 compressed character..that was 00011011 in our case
//printf("read from desti =%c\n",ch);
//printf("read\n");
//if(ch=='\0')
//break;
printf("read\n");
for(i=0;i<4;i++)//extract all 4 characters from that character that was read before in while loop//
{
sleep(1);
if(i==0)
cp=ch;//
else
ch=cp;
ch=ch<<(2*i);//make 2 left shift every time ....1st time i=0..so no left shift..only left shift will be performed...0000 0000 //2nd case..01000000//3rd 10000000//4th case 11000000
//printf("ch=%d\n",ch);
ch=ch>>6;//6 right shift will make //1st case 0000 0000//2nd 00000001//3rd 00000010//4th 00000011
printf("ch=%c\n",*(masterarray+ch));//printf that character every time...which is extracted
}
}
return 0;
}
int indu(char buff)//func to index..
{
int i=0;//i=0 initially
if(buff == '\0')
goto OUT;
//i=0;
//
while(buff != *(masterarray+i))//WHOLE FILE TO BE COMPRESSED IS COMPAIRED TO MASTERARAY..AT WHAT VALUE OF i..character from file is matched to distinct character of masterarray..that value of i will be treated as identification of that character ...so that value of i is returned...
unsigned char byt=byt^byt,ch=0;//byt & ch is always of unsigned char not of char..bcz.char can have -ve value .& we have to convert it in addressing...while decompression ,addition of -ve value will give wrong address ...result no character will found...so address must always be +ve...
fd=open("source",O_RDONLY);
printf("fd=%d\n",fd);
wfd=open("desti",O_WRONLY|O_CREAT);
printf("wfd=%d\n",wfd);
count=1;
while(count==1)
{
for(i=0;i<4;i++)
{
count=read(fd,&buff,1);//read character from source file
printf("buff=%c\n",buff);
if(count==0 && buff=='\0')//when file has no character then ...go out of loop using break
break;
ind=indu(buff);//1st find index of character..send character that is read to find index..
//printf("ind=%d\n",ind);
//sprintf(&ch,"%c",ind);
ch=ind;//index of distinct characters is distinct identinfication of distinct characters..so use this distinct identification for distinct no. of characters to find distinct addresses for distinct characters
printf("ch=%d\n",ch);//put that index into character type value..bcz char has 8 bits..& one character always have max. no. of 8 bits in our keyboard..or according to ASCII chart...
ch=ch<<6;//index was int value..which is put into char..in those 8 bits of char...last 2 bits will be address of that char...i is diffrent for diiffrent characters ...so index will also be diffrent for diffrent characters..so char value of that address will also be diffrent for diffrent characters..those diffrent char value of index are going to be used as addressing..as ch has 8 bits...distinct characters are 4 MAX..could be represented into 00 01 10 11..for ch=0 1 2 3....suppose last 2 bits of 8 bits are bits that are changing becoming ..0000 0000/0000 0001/0000 0010/0000 0011
//so make ch=ch<<6 means 6 left shift to get last two bit at 1st 2 positions..every time..become 00000000/01000000/10000000/11000000
ch=ch>>(2*i);//i=0 i=1 i=2 i=3//ch=ch>>(2*i) i=0 1 2 3 ..means after writing 1st character..every time make 2 right shift to OR with byt
byt=byt|ch;//become 1st byt..00011011....so 1st byt has 4 distinct characters..size is compressed 4 times..
}
ret=write(wfd,&byt,1);//byt every time is written into a file ..that is callled compressed file...
byt=byt^byt;//make byt NULL..again ...to get next 4 characters or with byt again...
//ALWAYS REMEMBER...DISTINCT NO. OF CHARACTERS IN FILE ARE 4...NOT ALL CHARACTERS OF FILE ARE 4...
////FOR E.G. // ANU ANU ANU //AGAIN A WILL HAVE SAME VALUE OF ch as was before..i.e.0 in our case...so addressing for a will always be same...
int codelength(int ndc)//now we have to find codelen
{
int i,j=0;//j will represent no. bits 2 to represent 4 distinct characters
//printf("ndc=%d\n",ndc);
for(i=1;i<ndc;j++)//untill i<ndc..j++ every time...
{
//printf("i=%d\n",i);
i=i*2;//when i=i*2 will count power of 2..j=0 2=1*2 j=1 4=2*2 j=2 8=4*2 j=3;...so on
}
if(ndc==0)
j=0;
return j;//in this case ...we have j=2 counted...for 2 bit compression..
}
int check(char buff)//character is passed further here in check function to find distinct no. of characters...
{
int i;
//printf("masterarray=%p\n",masterarray);
for(i=0;i<size;)//i=0 size=1 1st time ..so loop will operate only 1 time time in 1st iteration
{
if(*(masterarray+i)==buff)//1st time masterarray ppointer is empty that is malloced..so we will compare character in masterarray if there is any..to the character of file that has been read from the file ...
goto OUT;//if we get that character into masterarray..then we will not put that character into masterarray further...& goto OUT...WILL return -1 to check function ...which will not tend to ++ment size..or will not realloc the masterarray
else
i++;//if we had put some distinct characters in masterarray then compare those distinct no. of characters in masterarray with character read from the file...if character read from file do not match 1st character in masterarray...then ++ment the value of i to find the right match of that character in masterarray...
}
if(buff != '\0')//again if character that is not matched with any of character in masterarray..is not NULL
*(masterarray+i-1)=buff;//then ...after no match found for that character put that character into masterarray....*ptr=buff...means we are putting..value of buff into address of pointer
//ptr=&buff means we are putting address of character into ptr...& we do not have to change address of masterarray..so do not follow this assignment of character...
printf("*(masterarray+i-1)=%c\n",*(masterarray+i-1));//print distinct no. of characters
int create_masterarray(char buff)//we have to find distinct no. of characters..we have character read from the file in main send by their as a argument,..so here we have to find distinct no. of characters in from file ..& put those no. of characters in masterarray pointer..
{
if(flag==1)//1st time flag is 1..means malloc to masterarray
{
masterarray=malloc(sizeof(char)*size);//bcz we have to put characters in masterarray named pointer..so we need to malloc masterarrray..
//printf("masterarray in malloc=%p\n",masterarray+size);
flag=0;//once masterarray is malloced then flag=0
}
else
{
//printf("master\n");
masterarray=realloc(masterarray,sizeof(char)*size);//after making flag=0..every time ..it will not malloc the mastearray..it will realloc the masterarray..every time now...
//There is difference b/w malloc & realloc is this that..when we do malloc..a new memory from new block is provided to u..in masterarray..then realloc means ...everytime continue to memory that is malloced to u before .memory is provided to u.
//printf("masterarray in realloc=%p\n",masterarray+size);
}
//printf("flag=%d\n",flag);
if(buff != '\0')//if character that is read is not NULL...then is will send for to find distinct of characters in check function....bcz ..we do not have to increase size of our compressed file..so we will try ..not to compress this NULL character...
ret=check(buff);//call to check function to check the distinct no. of characters & put those characters into masterarray if they are not in masterarray named pointer
//printf("ret=%d\n",ret);
if(ret==0)//if returned value is zero means in check function ...character was put into masterarray this time ..so increse the size...to realloc size again to put character into masterarray again...
size++;
return size-2;//memory in which characters are being putted is masterarray+i-1..size is 1 more i...so size-2 is returned...as no. of distinct no. of charactes...
int main(int argc,char* argv[])//MAIN WILL TAKE ITS ARGUMENTS FROM COMMAND LINE..ARGC WILL KEEP COUNT OF THOSE ARGUMENTS..
{
char buff;
if(argc <2)//if arguments on command line are less than 2..it will demand for 2nd argument on command line..that u had not given here on command line..char* argv[] is pointer of array in which argv[0] ..argv[1]..will have name of arguments on command line...but when arguments come from command line ...on that time..ur compiler is not aware that this argument that is coming from command line ..could be a file...
{
printf("PLEASE GIVE ME FILENAME\n");
}
printf("argv[1]=%s\n",argv[1]);//print argument that we send from command line..with %s..bcz we have send name of file
fd=open(argv[1],O_RDONLY);//open that file ..by using argument which contains name of file...in RD_ONLY MODE..argv[0] will be the name of file ..in which we are..name of its ELF format
printf("fd=%d\n",fd);
while(count)
{
count=read(fd,&buff,1);//read characters from file one by one
//printf("buff=%c\n",buff);
ret=create_masterarray(buff);//send these characters to find distinct no..of characters in file ...on bases of those distinct no. of character ..we wil find codelength ..means those no. of distinct characters could be reprsented in how many bits...suppose here we 4 distinct characters..so we can represent these many no. of characters in 2 bits..
//00 01 10 11...
}
printf("ndc=%d\n",ret);
ret=codelength(ret);//make a logic to find codelength of distinct no. of characters...
printf("codelength=%d\n",ret);//printf that codelength..means distinct no. of characters could be represented in how many bits...
ret=compress();//now we have distinct no. of characters & codelen both..bcz...distinct no. characters are maximum 2 in our case ...in this file which is to be compressed ..so we wil...compress the file using 2 bit compression
if(ret==0)
printf("COMPRESSED\n");//if compression is succesful,. it will return 0
ret=decompress();//decompress the file again...in reverse order in which we have done compression of file.
if(ret==0)
printf("DECOMPRESSED\n");//if decompression is succesful...again returned value will be 0;