Generally, this question requires us to do a length encoding of an string. For example, after compression, the string “aaaabb” should become “a4b2”. If the compression could not shorten the string, the program will leave original string unchanged.
I solve this question differently from the solution on crack5e.info. The crack5e.info is unavailable now. But as far as I remember, in the solution of crack5e.info, you have no way to get the expected length of compressed string. Thus you have to allocate as much memory as possibly needed. But in my solution, we have a choice to get the length of compressed string before really allocate and fill memory. The source code is as following.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | // Author: Sheng Yu // Time: 03/02/2013 // // C solution for problem 1.5 in Cracking the Coding Interview 5th eidtion. // // Test environment: gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-52) // #include <stdio.h> #include <stdlib.h> #include <string.h> inline int intlen(int num){ // Estimate the number of digits for integer num // For example, the number of digits for integer 123 is 3. // // For positive integer, this function is equal to: // (int)log10(num)+1 // But this function is much much faster than the log10 solution. // int len = 1; // At least one char is needed if(num<0){ ++len; // An additional char is needed for sign - num = -num; } while(num/10 != 0){ ++len; num /= 10; } return len; } int compress(char *compressed, const char *original){ // Input: // compressed: the point to the memory to store the result string // could be NULL. // original: the point to the original c-style string. // Output: // when the compressed is NULL, return the size of memory to store // the final compressed string (including the ' '). // when the compressed is not NULL, return 0 if successfully. // in both cases, a negative return value means error happened. // Function: // Perform the basic string compression using the counts of // repeated characters. For example, the string aabcccccaaa should // be compressed into a2b1c5a3. // If the "compressed" string is not shorter than the original one, // fill the result with original string. char current_char; int current_count = 0; int total_count = 1; //At least there is a ' ' int index_original = 0; int original_len = 0; char *compressing = compressed; if( original == NULL ){ printf("Invalid parameter: original is NULL.n"); return -1; } current_char = original[0]; if (compressed ==NULL){ // Not really compress the string. But estimate how many bytes // are needed to store the compressed string. while(original[index_original] != ' '){ if(original[index_original] == current_char){ ++current_count; } else{ // 1 (one byte) is for the character itself total_count += (intlen(current_count)+1); current_char = original[index_original]; current_count = 1; } ++index_original; } if(current_count != 0){ // 1 (one byte) is for the character itself total_count += (intlen(current_count)+1); } ++index_original; // Now, index_original is the number of bytes // to store the original string (including // the null-character ' ') return total_count < index_original ? total_count : index_original; } else{ // Fill the compressed spaces with the compressed string if it's // shorter, otherwise the original string. original_len = strlen(original); compressing[0] = ' '; while(original[index_original] != ' '){ if(original[index_original] == current_char){ ++current_count; } else{ if(compressing - compressed + intlen(current_count) + 1 >= original_len){ // if the compressed string will not be shorter, // copy the original string instead of compression strcpy(compressed, original); return 0; } sprintf(compressing,"%c%d",current_char, current_count); compressing += strlen(compressing); current_char = original[index_original]; current_count = 1; } ++index_original; } if(current_count != 0){ if(compressing - compressed + intlen(current_count) + 1 >= original_len){ // if the compressed string will not be shorter, // copy the original string instead of compression strcpy(compressed, original); return 0; } sprintf(compressing,"%c%d",current_char, current_count); } return 0; } } int main(int argc, char *argv[]){ char *compressed = NULL; int compressed_size = 0; int compressed_result = 0; if(argc != 2){ printf("Invalid arguments.n"); return 1; } // Allocate the spaces for compressed string compressed_size = compress(NULL, argv[1]); // include NULL terminator if(compressed_size <= 0){ printf("Fail to estimate the size of compressed string.n"); return 2; } compressed = (char *) malloc(compressed_size); if(compressed == NULL){ printf("Fail to allocate enough memory.n"); return 3; } // Compress the string argv[1] compressed_result = compress(compressed, argv[1]); if(compressed_result != 0){ printf("Fail to compress the string: %sn",argv[1]); free(compressed); return 4; } printf("Original string(%d): %sn",(int) strlen(argv[1]),argv[1]); printf("Compressed string(%d): %sn",(int) strlen(compressed),compressed); free(compressed); return 0; } |
Thanks Sheng
Very helpful solution.
Am trying to run my unit tests over this.