/*THIS IS 7 BIT COMPRESSION & DECOMPRESSION WWHICH COULD BE USED FOR ..MAX.128 NO. OF DISTINCT CHARACTERS*/
//7 BYTES ARE USED TO PLACE 8 CHARACTERS IN FILE ....
int decompress()//7 bit code is decompressed in reverse order of compression
{
unsigned char byt=byt^byt,ch=0,cp=0,in=in^in;
int j=0,count=1;
fd=open("desti",O_RDONLY);//open compressed file in rdonly mode..
while(count)//again & again come into while loop
{
count=read(fd,&ch,1);//read one character from the file to be decompressed...
if(count==0)//if count=0 means there are no characters to decompress...
break;
cp=ch;//preserve that character into ..cp ..
i=2;//i=2
for(j=8;j>0;j--)//j=8 7 6 5 4 3 2 1
{
ch=cp;////1st of all preserve the byt..bcz.this byt contains 2 character address..so shifting to byt in 1st time will change its value...so preserve it for 2nd time use
if(j != 8)//1st time left shift is not made
ch=ch<<j;
//printf("j=%d\n",j);
//left shift will place the character into there left side 7 bits of ch..by removing all the garbage bits of all other characters or pending bits....
ch=ch>>1;//make riht shift to ch to get exact address of distinct no. of characters..
if(j!=8 && j!=1)//at 1st & last time there are no. pending bits so no need to extract pending bits
{
//printf("j=%d\n",j);
count=read(fd,&ch,1);//read a new byt from compressed file ...to get pending no. of bits
//printf("count=%d\n",count);
if(count==0)
break;
cp=ch;//preserve that character to extract 1 more character from same byt///
in=ch;
in=in>>i;//in is right shifted by i=2 3 4 5 6 7 ..for j=7 6 5 4 3 2 //
//this right shift of in will take pending no. of bits to last position...
//printf("i=%d\n",i);
byt=byt|in;//add pending no. of bits with byt..
in=in^in;//make in=0 for further use
i++;//3
}
//printf("byt=%p\n",byt);
printf("%c",*(masterarray+byt));//print full decompressed character using byt as addressing for that character or we can say byt is index which we had extract while compression..& masterarray will give me base address of those characters where those characters are placed...in memory of system..
byt=byt^byt; //make byt again 0..for its further use...
}//again go in while loop ...to read one more new character to extract or decompress that character ..it continues untill there are no character in the file to decompress....
int compress()//compression of 7 bit code is done ..
{
unsigned char ch=0,byt=byt^byt,in=in^in,buff;////byt & ch is always of unsigned char not of char..bcz.char can have -ve value .& we have to convert it in addressing...while decompression ,addition of -ve value will give wrong address ...result no character will found...so address must always be +ve...
int a,ind,count=1,set=0,j=0;
fd=open("source",O_RDONLY);//open source file in rdonly mode
wfd=open("desti",O_WRONLY|O_CREAT);//open file in which compressed characters are to be put...
while(count)//count=1 initially
{
a=2;//a=2
if(count==0)//if there are no characters to decompress..then go out of this loop
break;
for(j=8;j>0;j--)//j=8 7 6 4 3 2 1
{
count=read(fd,&buff,1);//read a character from the file ..to be compress
if(count==0 || buff=='\0')
break;
ind=indu(buff);//find index of that character
//printf("ind=%d\n",ind);
ch=ind;//put that index into char type variable
//printf("ind=%d\n",ch);
//each character is to be represented into 7 bits form 8th bit of this byt..next character starts..
if(j!=8 && j!=1)//pending no. of bit neither will be in 1st time nor will be in last time
{
in=ch;//put index of character into in named variable
//printf("a=%d\n",a);
in=in<<(a);//NO 2 3 4 5 6 7 NO//left shift of in takes place to get pending no. of bits in starting position of in...
a++;//increment the a every time after 1 left shift to use for 2nd time left shift of in
}
//address of every character could be represented by last 7 bits of index ch...from 8 bits of ch
ch=ch<<1;//make 1 left shift of ch..will place last 7 bits of ch into 1st 7 bits of ch..so one empty bit is left in this byt
if(j !=8 )//right shifting do not take place 1st time ...
ch=ch>>j;//for any other time ..>> right shift will give u full address of character leaving all bytes that we had put before..in byt..that was pending no. of bits...//j=7 6 5 4 3 2 1
byt=byt|ch;//make add ch with byt..to get compressed character into byt
if(j !=8 )//do not write into compressed file only 1st time...after 1st time ..every time ..byt will be full for reading only 1 character with pending no. of bits of last character....so write into file ..
{
ret=write(wfd,&byt,1);//write compressed byt into file..
// printf("byt=%p\n",byt);
byt=byt^byt;//make byt empty...
byt=byt|in;//I=2 3 4 5 6 7//now add pending no. of bits into byt..
in=in^in;//make in=0 for again use...
}//again go into while loop to compress further characters...
}
//set++;
}
return 0;
}
int codelength(int ndc)//now we have to find codelen { int i,j=0;//j will represent no. bits 7 to represent 128 distinct characters //printf("ndc=%d\n",ndc); for(i=1;i<ndc;j++)//untill i<ndc..j++ every time... { //printf("i=%d\n",i); i=i*2;//when i=i*2 will count power of 2..j=0 2=1*2 j=1 4=2*2 j=2 8=4*2 j=3;...so on } if(ndc==0) j=0; return j;//in this case ...we have j=7 counted...for 128 bit compression.. }
int check(char buff)//character is passed further here in check function to find distinct no. of characters...
{
int i;
//printf("masterarray=%p\n",masterarray);
for(i=0;i<size;)//i=0 size=1 1st time ..so loop will operate only 1 time time in 1st iteration
{
if(*(masterarray+i)==buff)//1st time masterarray ppointer is empty that is malloced..so we will compare character in masterarray if there is any..to the character of file that has been read from the file ...
goto OUT;//if we get that character into masterarray..then we will not put that character into masterarray further...& goto OUT...WILL return -1 to check function ...which will not tend to ++ment size..or will not realloc the masterarray
else
i++;//if we had put some distinct characters in masterarray then compare those distinct no. of characters in masterarray with character read from the file...if character read from file do not match 1st character in masterarray...then ++ment the value of i to find the right match of that character in masterarray...
}
if(buff != '\0')//again if character that is not matched with any of character in masterarray..is not NULL
*(masterarray+i-1)=buff;//then ...after no match found for that character put that character into masterarray....*ptr=buff...means we are putting..value of buff into address of pointer
//ptr=&buff means we are putting address of character into ptr...& we do not have to change address of masterarray..so do not follow this assignment of character...
printf("*(masterarray+i-1)=%c\n",*(masterarray+i-1));//print distinct no. of characters
int create_masterarray(char buff)//we have to find distinct no. of characters..we have character read from the file in main send by their as a argument,..so here we have to find distinct no. of characters in from file ..& put those no. of characters in masterarray pointer..
{
if(flag==1)//1st time flag is 1..means malloc to masterarray
{
masterarray=malloc(sizeof(char)*size);//bcz we have to put characters in masterarray named pointer..so we need to malloc masterarrray..
//printf("masterarray in malloc=%p\n",masterarray+size);
flag=0;//once masterarray is malloced then flag=0
}
else
{
//printf("master\n");
masterarray=realloc(masterarray,sizeof(char)*size);//after making flag=0..every time ..it will not malloc the mastearray..it will realloc the masterarray..every time now...
//There is difference b/w malloc & realloc is this that..when we do malloc..a new memory from new block is provided to u..in masterarray..then realloc means ...everytime continue to memory that is malloced to u before .memory is provided to u.
//printf("masterarray in realloc=%p\n",masterarray+size);
}
//printf("flag=%d\n",flag);
if(buff != '\0')//if character that is read is not NULL...then is will send for to find distinct of characters in check function....bcz ..we do not have to increase size of our compressed file..so we will try ..not to compress this NULL character...
ret=check(buff);//call to check function to check the distinct no. of characters & put those characters into masterarray if they are not in masterarray named pointer
//printf("ret=%d\n",ret);
if(ret==0)//if returned value is zero means in check function ...character was put into masterarray this time ..so increse the size...to realloc size again to put character into masterarray again...
size++;
return size-2;//memory in which characters are being putted is masterarray+i-1..size is 1 more i...so size-2 is returned...as no. of distinct no. of charactes...
int main(int argc,char* argv[])//MAIN WILL TAKE ITS ARGUMENTS FROM COMMAND LINE..ARGC WILL KEEP COUNT OF THOSE ARGUMENTS..
{
char buff;
if(argc <2)//if arguments on command line are less than 2..it will demand for 2nd argument on command line..that u had not given here on command line..char* argv[] is pointer of array in which argv[0] ..argv[1]..will have name of arguments on command line...but when arguments come from command line ...on that time..ur compiler is not aware that this argument that is coming from command line ..could be a file...
{
printf("PLEASE GIVE ME FILENAME\n");
}
printf("argv[1]=%s\n",argv[1]);//print argument that we send from command line..with %s..bcz we have send name of file
fd=open(argv[1],O_RDONLY);//open that file ..by using argument which contains name of file...in RD_ONLY MODE..argv[0] will be the name of file ..in which we are..name of its ELF format
printf("fd=%d\n",fd);
while(count)
{
count=read(fd,&buff,1);//read characters from file one by one
//printf("buff=%c\n",buff);
ret=create_masterarray(buff);//send these characters to find distinct no..of characters in file ...on bases of those distinct no. of character ..we wil find codelength ..means those no. of distinct characters could be reprsented in how many bits...suppose here we 128 distinct characters..so we can represent these many no. of characters in 7 bits...
}
printf("ndc=%d\n",ret);
ret=codelength(ret);//make a logic to find codelength of distinct no. of characters...
printf("codelength=%d\n",ret);//printf that codelength..means distinct no. of characters could be represented in how many bits...
ret=compress();//now we have distinct no. of characters & codelen both..bcz...distinct no. characters are maximum 2 in our case ...in this file which is to be compressed ..so we wil...compress the file using 2 bit compression
if(ret==0)
printf("COMPRESSED\n");//if compression is succesful,. it will return 0
ret=decompress();//decompress the file again...in reverse order in which we have done compression of file.
if(ret==0)
printf("DECOMPRESSED\n");//if decompression is succesful...again returned value will be 0;