#include<stdio.h> #include<fcntl.h> #include<unistd.h> #include<stdlib.h> #include<string.h> int fd,wfd,count=1,size=1,flag=1,ret; int create_masterarray(char); char* masterarray; int indu(char); int codelength(int); int compress(); int decompress(); int check(char); ~
/*THIS IS 5 BIT COMPRESSION..& DECOMPRESSION OF CODE*/
//IN 5 BIT ...WE CAN REPRESENTED MAX. NO. OF 32 DISTINCT CHARACTERS FROM FILE ...SIZE OF COMPRESSED FILE DO NOT DEPENDS ONLY ON DISTINCT NO. OF CHARACTERS ..BUT ALSO DEPENDS UPON ..SIZE OF FILE TO BE COMPRESSED
int decompress()//decomression of 5 bit is in reverse to compression
while(buff != *(masterarray+i))//WHOLE FILE TO BE COMPRESSED IS COMPAIRED TO MASTERARAY..AT WHAT VALUE OF i..character from file is matched to distinct character of masterarray..that value of i will be treated as identification of that character ...so that value of i is returned...
{
//printf("i=%d\n",i);
//if(buff != *(masterarray+i))
i++;
}
return i;
OUT:
return -1;
}
/IN COMPRESSION OF 5 BIT I HAVE TO DO THIS TYPE OF SHIFTING,,,,,in will have same character as ch // in=in<<6 in=in<<4 in=in<<7 in=in<<5 //ch=ch<<3 ch=ch<<3 ch=ch<<3 ch=ch<<3 ch=ch<<3 ch=ch<<3 ch=ch<<3 ch=ch<<3 //ch=ch>>0 ch=ch>>5 ch=ch>>2 ch=ch>>7 ch=ch>>4 ch=ch>>1 ch=ch>>6 ch=ch>>3 int compress()//compression of 5 bit is done.. { int i=0,j,b=5,ind,set; int p,x=7,y=6; unsigned char buff,in=in^in;//;//byt & ch is always of unsigned char not of char..bcz.char can have -ve value .& we have to convert it in addressing...while decompression ,addition of -ve value will give wrong address ...result no character will found...so address must always be +ve...
ch=ind;////index of distinct characters is distinct identinfication of distinct characters..so use this distinct identification for distinct no. of characters to find distinct addresses for distinct characters
////put that index into character type value..bcz char has 8 bits..& one character always have max. no. of 8 bits in our keyboard..or according to ASCII chart...
//
if(j%2==0)//i have taken the case of even & odd seperately...even no. of right shift will take place for even value of j....for every even no.//i=0 2 4 6 ..will be right shift ..to compress character
{
i=j;//for even value ..right shift of ch will be same as value of j at that time
//printf("i=%d\n",i);
if(j>2)//for j=4 & j=6...i.e...4 6 right shift of ch..//.this condition is held to make left shift for pending no. of bit ,..which will be further taken as another byt...
{
p=x;//at the same time when j=4 & j=6 in variable in which there are pending no. of bits will have left shift of 7 & 5 respectively //7 in 1st case of even no. of shift of ch//use 5 for j=6//..
x=x-2;//5//3 not used//x=x-2 will make it 5 for further time when j=6 ...
i=b;//5 in i in 1st case//7 in 2nd case of odd//for j=5 b=1//j=7 for b=3
b=b+2;//b=7 in 1st case//b=9 not going to use 9//after j=5 b=3
if(j<4)//for j=1 & j=3..i.e...5 7 right shift of ch
{
p=y;//y=6//y=4 ...2nd time in//
y=y-2;//y=4
}
}
if((j>2 && j%2 == 0) || (j<4 && j%2 != 0))//j=4 j=6 for 4 6 right shift of ch & 7 5 left shift of in || j=1 j=3 for 5 7 right shift of ch & 6 4 left shift of in
{
in=ch;
in=in<<(p);//6 in 1st case of odd for j=1//4 left shift 2nd case of odd//7 in 1st case of even when j=4 & 4 right shift of ch..last bit came at 1st position of other byt//
}
ch=ch<<3;//
ch=ch>>(i);//7 in 2nd case of odd//4 shift j=4//b=1 for j=5.//last b=3..j=7
byt=byt|ch;//after making shift..make it or with byt..
//printf("byt=%p\n",byt);
if(j!=0 && j!=2 && j!=5)//not write in file for 0 2nd & 5th bit
{
write(wfd,&byt,1);
byt=byt^byt;//only make byt 0 when u put char into file ...otherwise not
}
byt=byt|in;//after making byt=0 add it to in..i.e. pending no. of bytes
in=in^in;//after adding in to byt make it 0 for further use...
}
set++;//no use
}
return 0;
}
int codelength(int ndc)//now we have to find codelen { int i,j=0;//j will represent no. bits 5 to represent 32 distinct characters //printf("ndc=%d\n",ndc);
for(i=1;i<ndc;j++)//untill i<ndc..j++ every time...
{
//printf("i=%d\n",i);
i=i*2;//when i=i*2 will count power of 2..j=0 2=1*2 j=1 4=2*2 j=2 8=4*2 j=3;...so on
}
if(ndc==0)
j=0;
return j;//in this case ...we have j=5 counted...for 5 bit compression..
}
int check(char buff)//character is passed further here in check function to find distinct no. of characters...
{
{ int i; //printf("masterarray=%p\n",masterarray); for(i=0;i<size;)//i=0 size=1 1st time ..so loop will operate only 1 time time in 1st iteration { if(*(masterarray+i)==buff)//1st time masterarray ppointer is empty that is malloced..so we will compare character in masterarray if there is any..to the character of file that has been read from the file ... goto OUT;//if we get that character into masterarray..then we will not put that character into masterarray further...& goto OUT...WILL return -1 to check function ...which will not tend to ++ment size..or will not realloc the masterarray else i++;//if we had put some distinct characters in masterarray then compare those distinct no. of characters in masterarray with character read from the file...if character read from file do not match 1st character in masterarray...then ++ment the value of i to find the right match of that character in masterarray... } if(buff != '\0')//again if character that is not matched with any of character in masterarray..is not NULL *(masterarray+i-1)=buff;//then ...after no match found for that character put that character into masterarray....*ptr=buff...means we are putting..value of buff into address of pointer //ptr=&buff means we are putting address of character into ptr...& we do not have to change address of masterarray..so do not follow this assignment of character... printf("*(masterarray+i-1)=%c\n",*(masterarray+i-1));//print distinct no. of characters return 0; OUT: return -1; }
int create_masterarray(char buff)//we have to find distinct no. of characters..we have character read from the file in main send by their as a argument,..so here we have to find distinct no. of characters in from file ..& put those no. of characters in masterarray pointer..
{
if(flag==1)//1st time flag is 1..means malloc to masterarray
{
masterarray=malloc(sizeof(char)*size);//bcz we have to put characters in masterarray named pointer..so we need to malloc masterarrray..
//printf("masterarray in malloc=%p\n",masterarray+size);
flag=0;//once masterarray is malloced then flag=0
}
else
{
//printf("master\n");
masterarray=realloc(masterarray,sizeof(char)*size);//after making flag=0..every time ..it will not malloc the mastearray..it will realloc the masterarray..every time now...
//There is difference b/w malloc & realloc is this that..when we do malloc..a new memory from new block is provided to u..in masterarray..then realloc means ...everytime continue to memory that is malloced to u before .memory is provided to u.
//printf("masterarray in realloc=%p\n",masterarray+size);
}
//printf("flag=%d\n",flag);
if(buff != '\0')//if character that is read is not NULL...then is will send for to find distinct of characters in check function....bcz ..we do not have to increase size of our compressed file..so we will try ..not to compress this NULL character...
ret=check(buff);//call to check function to check the distinct no. of characters & put those characters into masterarray if they are not in masterarray named pointer
//printf("ret=%d\n",ret);
if(ret==0)//if returned value is zero means in check function ...character was put into masterarray this time ..so increse the size...to realloc size again to put character into masterarray again...
size++;
return size-2;//memory in which characters are being putted is masterarray+i-1..size is 1 more i...so size-2 is returned...as no. of distinct no. of charactes...
int main(int argc,char* argv[])//MAIN WILL TAKE ITS ARGUMENTS FROM COMMAND LINE..ARGC WILL KEEP COUNT OF THOSE ARGUMENTS..
{
char buff;
if(argc <2)//if arguments on command line are less than 2..it will demand for 2nd argument on command line..that u had not given here on command line..char* argv[] is pointer of array in which argv[0] ..argv[1]..will have name of arguments on command line...but when arguments come from command line ...on that time..ur compiler is not aware that this argument that is coming from command line ..could be a file...
{
printf("PLEASE GIVE ME FILENAME\n");
}
printf("argv[1]=%s\n",argv[1]);//print argument that we send from command line..with %s..bcz we have send name of file
fd=open(argv[1],O_RDONLY);//open that file ..by using argument which contains name of file...in RD_ONLY MODE..argv[0] will be the name of file ..in which we are..name of its ELF format
printf("fd=%d\n",fd);
while(count)
{
count=read(fd,&buff,1);//read characters from file one by one
//printf("buff=%c\n",buff);
ret=create_masterarray(buff);//send these characters to find distinct no..of characters in file ...on bases of those distinct no. of character ..we wil find codelength ..means those no. of distinct characters could be reprsented in how many bits...suppose here we 32 distinct characters..so we can represent these many no. of characters in 5 bits..
}
printf("ndc=%d\n",ret);
ret=codelength(ret);//make a logic to find codelength of distinct no. of characters...
printf("codelength=%d\n",ret);//printf that codelength..means distinct no. of characters could be represented in how many bits...
ret=compress();//now we have distinct no. of characters & codelen both..bcz...distinct no. characters are maximum 32 in our case ...in this file which is to be compressed ..so we wil...compress the file using 5 bit compression
if(ret==0)
printf("COMPRESSED\n");//if compression is succesful,. it will return 0
ret=decompress();//decompress the file again...in reverse order in which we have done compression of file.
if(ret==0)
printf("DECOMPRESSED\n");//if decompression is succesful...again returned value will be 0;