/*4 BIT COMPRESSION IS DONE FOR 16 OR <16 NO. OF DISTINCT CHARACTERS*/
int decompress()//decompreassion of 4 bit code is done in reverse manner to compression as always done...
{
int j=0,count=1;
unsigned char ch,cp;//taken as unsigned //byt & ch is always of unsigned char not of char..bcz.char can have -ve value .& we have to convert it in addressing...while decompression ,addition of -ve value will give wrong address ...result no character will found...so address must always be +ve...
fd=open("desti",O_RDONLY);//open compressed file in rdonly mode to read characters from compressed file,.
printf("fd in decompress=%d\n",fd);
printf("masterarray=%p\n",masterarray);
while(count)
{
count=read(fd,&ch,1);//read a compressed character from compressed file ..in which 1st 4 bits will be address of 1st character & 2nd 4 bits will be address of 2nd character...
sleep(1);
//printf("ch=%c\n",ch);
if(count==0)//if there is no character into file ..go out of while loop
break;
cp=ch;//preserve ch character into cp to extract another 2nd character from same compressed byt read
ch=ch>>4;//make 4 right shift to extract 1st character frm the file ..means 1st 4 bits will be now last 4 bits & 1st 4 bits will have 0 padding ..so become 0000 0000..last 4 bits is address of 1st character ..for index=0
printf("ch=%c\n",*(masterarray+ch));//print 1st character
cp=cp<<4;//to extract every 2nd character from the file ..make 4 right shift ..means last 4 bits become 1st 4 bits..& means address of 2nd character will came at 1st 4 bits..0 padding in last 4 bits takes place ...it is done to remove the address of 1st character from character ..if any...will be treated as garbage this time
cp=cp>>4;//make agian 4 right shift ...means last 4 bits are now address of 2nd byt.i.e0000 0001
//printf("ch=%c\n",*(masterarray+ch));
printf("cp=%c\n",*(masterarray+cp));//print 2nd character
}
return 0;
}
int compress()//4 bit code is compressed
{
char byt,buff,ch;int j=0,i,ndc;
ndc=13;count=1;//no. of distinct character =13 in our case ..u may pass as argument to this function..for ndc..i.e.no. of distinct characters
wfd=open("desti",O_WRONLY|O_CREAT);//open file in which compressed characters is to be place,..
printf("wfd=%d\n",wfd);
fd=open("source",O_RDONLY);//open the source file in which characters to be compressed are placed..
count=read(fd,&buff,1);//read one character from file
//printf("ch1=%c\n",buff);
//if(count==0)
// break;
for(i=0;i<=ndc;i++)//extract index for this character../i=0 initially
{
if(buff == *(masterarray+i))////WHOLE FILE TO BE COMPRESSED IS COMPAIRED TO MASTERARAY..AT WHAT VALUE OF i..character from file is matched to distinct character of masterarray..that value of i will be treated as identification of that character ...so that value of i is returned...
break;
//else
//printf("not get\t");
}
j++;//j=1 1st time
//printf("i=%d\n",i);
//sleep(1);
sprintf(&ch,"%c",i);//put i into char ch..
//printf("ch=%d\n",ch);
ch=ch<<4;//make left shift 4..TO DO 4 bit compression ..16 or < 16 no. of distinct characters could have addressing in 4 bits...
//suppose 1st ch=0000 0000,<last 4 bits will have address of character for index=0 i.e. 'w' character in our case>..then make 4 left shift to get 1st 4 bits as address of 1st character..
//suppose 2nd ch=0000 0001,<last 4 bits will have address of character for index=1 i.e. 'h' character in our case>..then make 4 left shift to get 1st 4 bits as address of 2nd character..
if(j%2==0)
ch=ch>>4;//if j is even means 0 2 4 6 8...means every 2nd time ,,,for 2nd character 4 left shift is done for every 2nd character...so that it may have address of 2nd character in last 4 positions...
byt=byt|ch;//make it add with byt every time // when 2 characters r written into byt..0000 0001 ..addressing ...then for every even value ..it will be written in desti file ...to which we will say further ..compressed file
if(j%2==0)
{
count=write(wfd,&byt,1);//for every 2nd character byt is full ..so write into file ..compressed file
byt=byt^byt;//make byt 0 again...to get addressing of new character ..into byt..to write into compressed file again
}
}
if(j%2 !=0 )//will write into file for j=1 j=3 j=5 j=7..means every 2nd time when byt become full...
count=write(wfd,&byt,1);//write compressed byt..into compressed file...into which 1st 4 byt will represent 1st character & 2nd 4 bits will represent 2nd character address,,,,
int codelength(int ndc)//now we have to find codelen
{
int i,j=0;//j will represent no. bits 4 to represent 16 distinct characters
//printf("ndc=%d\n",ndc);
for(i=1;i<ndc;j++)//untill i<ndc..j++ every time...
{
//printf("i=%d\n",i);
i=i*2;//when i=i*2 will count power of 2..j=0 2=1*2 j=1 4=2*2 j=2 8=4*2 j=3;...so on
}
if(ndc==0)
j=0;
return j;//in this case ...we have j=4 counted...for 4 bit compression..
}
int check(char buff)//character is passed further here in check function to find distinct no. of characters...
{
int i;
//printf("masterarray=%p\n",masterarray);
for(i=0;i<size;)//i=0 size=1 1st time ..so loop will operate only 1 time time in 1st iteration
{
if(*(masterarray+i)==buff)//1st time masterarray pointer is empty that is malloced..so we will compare character in masterarray if there is any..to the character of file that has been read from the file ...
goto OUT;//if we get that character into masterarray..then we will not put that character into masterarray further...& goto OUT...WILL return -1 to check function ...which will not tend to ++ment size..or will not realloc the masterarray
else
i++;//if we had put some distinct characters in masterarray then compare those distinct no. of characters in masterarray with character read from the file...if character read from file do not match 1st character in masterarray...then ++ment the value of i to find the right match of that character in masterarray...
}
if(buff != '\0')//again if character that is not matched with any of character in masterarray..is not NULL
*(masterarray+i-1)=buff;//then ...after no match found for that character put that character into masterarray....*ptr=buff...means we are putting..value of buff into address of pointer
//ptr=&buff means we are putting address of character into ptr...& we do not have to change address of masterarray..so do not follow this assignment of character...
printf("*(masterarray+i-1)=%c\n",*(masterarray+i-1));//print distinct no. of characters
int create_masterarray(char buff)//we have to find distinct no. of characters..we have character read from the file in main send by their as a argument,..so here we have to find distinct no. of characters in from file ..& put those no. of characters in masterarray pointer..
{
if(flag==1)//1st time flag is 1..means malloc to masterarray
{
masterarray=malloc(sizeof(char)*size);//bcz we have to put characters in masterarray named pointer..so we need to malloc masterarrray..
//printf("masterarray in malloc=%p\n",masterarray+size);
flag=0;//once masterarray is malloced then flag=0
}
else
{
//printf("master\n");
masterarray=realloc(masterarray,sizeof(char)*size);//after making flag=0..every time ..it will not malloc the mastearray..it will realloc the masterarray..every time now...
//There is difference b/w malloc & realloc is this that..when we do malloc..a new memory from new block is provided to u..in masterarray..then realloc means ...everytime continue to memory that is malloced to u before .memory is provided to u.
//printf("masterarray in realloc=%p\n",masterarray+size);
}
//printf("flag=%d\n",flag);
if(buff != '\0')//if character that is read is not NULL...then is will send for to find distinct of characters in check function....bcz ..we do not have to increase size of our compressed file..so we will try ..not to compress this NULL character...
ret=check(buff);//call to check function to check the distinct no. of characters & put those characters into masterarray if they are not in masterarray named pointer
//printf("ret=%d\n",ret);
if(ret==0)//if returned value is zero means in check function ...character was put into masterarray this time ..so increse the size...to realloc size again to put character into masterarray again...
size++;
return size-2;//memory in which characters are being putted is masterarray+i-1..size is 1 more i...so size-2 is returned...as no. of distinct no. of charactes...
OUT:
return -1;
}
int main(int argc,char* argv[])//MAIN WILL TAKE ITS ARGUMENTS FROM COMMAND LINE..ARGC WILL KEEP COUNT OF THOSE ARGUMENTS..
{
char buff;
if(argc <2)//if arguments on command line are less than 2..it will demand for 2nd argument on command line..that u had not given here on command line..char* argv[] is pointer of array in which argv[0] ..argv[1]..will have name of arguments on command line...but when arguments come from command line ...on that time..ur compiler is not aware that this argument that is coming from command line ..could be a file...
{
printf("PLEASE GIVE ME FILENAME\n");
}
printf("argv[1]=%s\n",argv[1]);//print argument that we send from command line..with %s..bcz we have send name of file
fd=open(argv[1],O_RDONLY);//open that file ..by using argument which contains name of file...in RD_ONLY MODE..argv[0] will be the name of file ..in which we are..name of its ELF format
count=read(fd,&buff,1);//read characters from file one by one
//printf("buff=%c\n",buff);
ret=create_masterarray(buff);//send these characters to find distinct no..of characters in file ...on bases of those distinct no. of character ..we wil find codelength ..means those no. of distinct characters could be reprsented in how many bits...suppose here we 4 distinct characters..so we can represent these many no. of characters in 4 bits..
}
printf("ndc=%d\n",ret);
ret=codelength(ret);//make a logic to find codelength of distinct no. of characters...
printf("codelength=%d\n",ret);//printf that codelength..means distinct no. of characters could be represented in how many bits...
ret=compress();//now we have distinct no. of characters & codelen both..bcz...distinct no. characters are maximum 4 in our case ...in this file which is to be compressed ..so we wil...compress the file using 4 bit compression
if(ret==0)
printf("COMPRESSED\n");//if compression is succesful,. it will return 0
ret=decompress();//decompress the file again...in reverse order in which we have done compression of file.
if(ret==0)
printf("DECOMPRESSED\n");//if decompression is succesful...again returned value will be 0;