Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

panda/src/express/patchfile.cxx

Go to the documentation of this file.
00001 // Filename: patchfile.cxx
00002 // Created by:  darren, mike (09Jan97)
00003 //
00004 ////////////////////////////////////////////////////////////////////
00005 //
00006 // PANDA 3D SOFTWARE
00007 // Copyright (c) 2001, Disney Enterprises, Inc.  All rights reserved
00008 //
00009 // All use of this software is subject to the terms of the Panda 3d
00010 // Software license.  You should have received a copy of this license
00011 // along with this source code; you will also find a current copy of
00012 // the license at http://www.panda3d.org/license.txt .
00013 //
00014 // To contact the maintainers of this program write to
00015 // panda3d@yahoogroups.com .
00016 //
00017 ////////////////////////////////////////////////////////////////////
00018 
00019 #include "config_express.h"
00020 #include "error_utils.h"
00021 #include "patchfile.h"
00022 #include "crypto_utils.h" // MD5 stuff
00023 #include "streamReader.h"
00024 #include "streamWriter.h"
00025 
00026 #include <stdio.h> // for tempnam
00027 
00028 // PROFILING ///////////////////////////////////////////////////////
00029 //#define PROFILE_PATCH_BUILD
00030 #ifdef PROFILE_PATCH_BUILD
00031 
00032 #include "clockObject.h"
00033 ClockObject *globalClock = ClockObject::get_global_clock();
00034 
00035 #define GET_PROFILE_TIME() globalClock->get_real_time()
00036 #define START_PROFILE(var) double var = GET_PROFILE_TIME()
00037 #define END_PROFILE(startTime, name) \
00038   cout << name << " took " << (GET_PROFILE_TIME() - (startTime)) << " seconds" << endl
00039 
00040 #else
00041 #define START_PROFILE(var)
00042 #define END_PROFILE(startTime, name)
00043 #endif
00044 ////////////////////////////////////////////////////////////////////
00045 
00046 // this actually slows things down...
00047 //#define USE_MD5_FOR_HASHTABLE_INDEX_VALUES
00048 
00049 // Patch File Format ///////////////////////////////////////////////
00050 ///// IF THIS CHANGES, UPDATE installerApplyPatch.cxx IN THE INSTALLER
00051 ////////////////////////////////////////////////////////////////////
00052 // [ HEADER ]
00053 //   4 bytes  0xfeebfaac ("magic number")
00054 //            (older patch files have a magic number 0xfeebfaab,
00055 //            indicating they are version number 0.)
00056 //   2 bytes  version number (if magic number == 0xfeebfaac)
00057 //   4 bytes  length of starting file (if version >= 1)
00058 //  16 bytes  MD5 of starting file    (if version >= 1)
00059 //   4 bytes  length of resulting patched file
00060 //  16 bytes  MD5 of resultant patched file
00061 
00062 const int _v0_header_length = 4 + 4 + 16;
00063 const int _v1_header_length = 4 + 2 + 4 + 16 + 4 + 16;
00064 //
00065 // [ ADD/COPY pairs; repeated N times ]
00066 //   2 bytes  AL = ADD length
00067 //  AL bytes  bytes to add
00068 //   2 bytes  CL = COPY length
00069 //   4 bytes  offset of data to copy from original file, if CL != 0.
00070 //            If version >= 2, offset is relative to end of previous
00071 //            copy block; if version < 2, offset is relative to
00072 //            beginning of file.
00073 //
00074 // [ TERMINATOR ]
00075 //   2 bytes  zero-length ADD
00076 //   2 bytes  zero-length COPY
00077 ////////////////////////////////////////////////////////////////////
00078 ////////////////////////////////////////////////////////////////////
00079 
00080 ////////////////////////////////////////////////////////////////////
00081 // Defines
00082 ////////////////////////////////////////////////////////////////////
00083 const PN_uint32 Patchfile::_v0_magic_number = 0xfeebfaab;
00084 const PN_uint32 Patchfile::_magic_number = 0xfeebfaac;
00085 
00086 // Created version 1 on 11/2/02 to store length and MD5 of original file.
00087 // To version 2 on 11/2/02 to store copy offsets as relative.
00088 const PN_uint16 Patchfile::_current_version = 2;
00089 
00090 const PN_uint32 Patchfile::_HASHTABLESIZE = PN_uint32(1) << 16;
00091 const PN_uint32 Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
00092 const PN_uint32 Patchfile::_NULL_VALUE = PN_uint32(0) - 1;
00093 const PN_uint32 Patchfile::_MAX_RUN_LENGTH = (PN_uint32(1) << 16) - 1;
00094 
00095 ////////////////////////////////////////////////////////////////////
00096 //     Function: Patchfile::Constructor
00097 //       Access: Public
00098 //  Description:
00099 ////////////////////////////////////////////////////////////////////
00100 Patchfile::
00101 Patchfile(void) {
00102   PT(Buffer) buffer = new Buffer(patchfile_buffer_size);
00103   init(buffer);
00104 }
00105 
00106 ////////////////////////////////////////////////////////////////////
00107 //     Function: Patchfile::Constructor
00108 //       Access: Public
00109 //  Description:
00110 ////////////////////////////////////////////////////////////////////
00111 Patchfile::
00112 Patchfile(PT(Buffer) buffer) {
00113   init(buffer);
00114 }
00115 
00116 ////////////////////////////////////////////////////////////////////
00117 //     Function: Patchfile::init
00118 //       Access: Private
00119 //  Description:
00120 ////////////////////////////////////////////////////////////////////
00121 void Patchfile::
00122 init(PT(Buffer) buffer) {
00123   _initiated = false;
00124   nassertv(!buffer.is_null());
00125   _buffer = buffer;
00126   
00127   _version_number = 0;
00128 
00129   reset_footprint_length();
00130 }
00131 
00132 ////////////////////////////////////////////////////////////////////
00133 //     Function: Patchfile::Destructor
00134 //       Access: Public
00135 //  Description:
00136 ////////////////////////////////////////////////////////////////////
00137 Patchfile::
00138 ~Patchfile(void) {
00139   if (true == _initiated)
00140     cleanup();
00141 }
00142 
00143 ////////////////////////////////////////////////////////////////////
00144 //     Function: Patchfile::cleanup
00145 //       Access: Private
00146 //  Description:
00147 ////////////////////////////////////////////////////////////////////
00148 void Patchfile::
00149 cleanup(void) {
00150   if (false == _initiated) {
00151     express_cat.error()
00152       << "Patchfile::cleanup() - Patching has not been initiated"
00153       << endl;
00154     return;
00155   }
00156 
00157   // close files
00158   _origfile_stream.close();
00159   _patch_stream.close();
00160   _write_stream.close();
00161 
00162   _initiated = false;
00163 }
00164 
00165 ////////////////////////////////////////////////////////////////////
00166 ///// PATCH FILE APPLY MEMBER FUNCTIONS
00167 /////
00168 ////////////////////
00169 ///// NOTE: this patch-application functionality unfortunately has to be
00170 /////       duplicated in the Installer. It is contained in the file
00171 /////       installerApplyPatch.cxx
00172 /////       PLEASE MAKE SURE THAT THAT FILE GETS UPDATED IF ANY OF THIS
00173 /////       LOGIC CHANGES! (i.e. if the patch file format changes)
00174 ////////////////////
00175 ////////////////////////////////////////////////////////////////////
00176 
00177 ////////////////////////////////////////////////////////////////////
00178 //     Function: Patchfile::initiate
00179 //       Access: Published
00180 //  Description: Set up to apply the patch to the file (original
00181 //     file and patch are destroyed in the process).
00182 ////////////////////////////////////////////////////////////////////
00183 int Patchfile::
00184 initiate(const Filename &patch_file, const Filename &file) {
00185   if (_initiated) {
00186     express_cat.error()
00187       << "Patchfile::initiate() - Patching has already been initiated"
00188       << endl;
00189     return EU_error_abort;
00190   }
00191 
00192   // Open the original file for read
00193   _orig_file = file;
00194   _orig_file.set_binary();
00195   if (!_orig_file.open_read(_origfile_stream)) {
00196     express_cat.error()
00197       << "Patchfile::initiate() - Failed to open file: " << _orig_file << endl;
00198     return get_write_error();
00199   }
00200 
00201   // Open the temp file for write
00202   {
00203     char *tempfilename = _tempnam(".", "pf");
00204     if (NULL == tempfilename) {
00205       express_cat.error()
00206         << "Patchfile::initiate() - Failed to create temp file name, using default" << endl;
00207       _temp_file = "patcher_temp_file";
00208     } else {
00209       _temp_file = tempfilename;
00210       free(tempfilename);
00211     }
00212   }
00213 
00214   _temp_file.set_binary();
00215   if (!_temp_file.open_write(_write_stream)) {
00216     express_cat.error()
00217       << "Patchfile::initiate() - Failed to open file: " << _temp_file << endl;
00218     return get_write_error();
00219   }
00220 
00221   if (express_cat.is_debug()) {
00222     express_cat.debug()
00223       << "Using temporary file " << _temp_file << "\n";
00224   }
00225 
00226   int result = internal_read_header(patch_file);
00227 
00228   _total_bytes_processed = 0;
00229 
00230   _initiated = true;
00231   return result;
00232 }
00233 
00234 ////////////////////////////////////////////////////////////////////
00235 //     Function: Patchfile::read_header
00236 //       Access: Published
00237 //  Description: Opens the patch file for reading, and gets the header
00238 //               information from the file but does not begin to do
00239 //               any real work.  This can be used to query the data
00240 //               stored in the patch.
00241 ////////////////////////////////////////////////////////////////////
00242 int Patchfile::
00243 read_header(const Filename &patch_file) {
00244   if (_initiated) {
00245     express_cat.error()
00246       << "Patchfile::initiate() - Patching has already been initiated"
00247       << endl;
00248     return EU_error_abort;
00249   }
00250 
00251   int result = internal_read_header(patch_file);
00252   _patch_stream.close();
00253   return result;
00254 }
00255 
00256 ////////////////////////////////////////////////////////////////////
00257 //     Function: Patchfile::run
00258 //       Access: Published
00259 //  Description: Perform one buffer's worth of patching
00260 ////////////////////////////////////////////////////////////////////
00261 int Patchfile::
00262 run(void) {
00263   // Now patch the file using the given buffer
00264   int buflen;
00265   int bytes_read;
00266   PN_uint16 ADD_length;
00267   PN_uint16 COPY_length;
00268   PN_int32 COPY_offset;
00269 
00270   if (_initiated == false) {
00271     express_cat.error()
00272       << "Patchfile::run() - Patching has not been initiated"
00273       << endl;
00274     return EU_error_abort;
00275   }
00276 
00277   StreamReader patch_reader(_patch_stream);
00278 
00279   buflen = _buffer->get_length();
00280   bytes_read = 0;
00281 
00282   while (bytes_read < buflen) {
00283     ///////////
00284     // read # of ADD bytes
00285     nassertr(_buffer->get_length() >= (int)sizeof(ADD_length), false);
00286     ADD_length = patch_reader.get_uint16();
00287 
00288     bytes_read += (int)ADD_length;
00289     _total_bytes_processed += (int)ADD_length;
00290 
00291     // if there are bytes to add, read them from patch file and write them to output
00292     if (express_cat.is_spam()) {
00293       express_cat.spam()
00294         << "ADD: " << ADD_length << " (to " 
00295         << _write_stream.tellp() << ")" << endl;
00296     }
00297 
00298     PN_uint32 bytes_left = (PN_uint32)ADD_length;
00299     while (bytes_left > 0) {
00300       PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
00301       _patch_stream.read(_buffer->_buffer, bytes_this_time);
00302       _write_stream.write(_buffer->_buffer, bytes_this_time);
00303       bytes_left -= bytes_this_time;
00304     }
00305 
00306     ///////////
00307     // read # of COPY bytes
00308     nassertr(_buffer->get_length() >= (int)sizeof(COPY_length), false);
00309     COPY_length = patch_reader.get_uint16();
00310 
00311     bytes_read += (int)COPY_length;
00312     _total_bytes_processed += (int)COPY_length;
00313 
00314     // if there are bytes to copy, read them from original file and write them to output
00315     if (0 != COPY_length) {
00316       // read copy offset
00317       nassertr(_buffer->get_length() >= (int)sizeof(COPY_offset), false);
00318       COPY_offset = patch_reader.get_int32();
00319 
00320       // seek to the copy source pos
00321       if (_version_number < 2) {
00322         _origfile_stream.seekg(COPY_offset, ios::beg);
00323       } else {
00324         _origfile_stream.seekg(COPY_offset, ios::cur);
00325       }
00326 
00327       if (express_cat.is_spam()) {
00328         express_cat.spam()
00329           << "COPY: " << COPY_length << " bytes from offset "
00330           << COPY_offset << " (from " << _origfile_stream.tellg()
00331           << " to " << _write_stream.tellp() << ")" 
00332           << endl;
00333       }
00334 
00335       // read the copy bytes from original file and write them to output
00336       PN_uint32 bytes_left = (PN_uint32)COPY_length;
00337 
00338       while (bytes_left > 0) {
00339         PN_uint32 bytes_this_time = (PN_uint32) min(bytes_left, (PN_uint32) buflen);
00340         _origfile_stream.read(_buffer->_buffer, bytes_this_time);
00341         _write_stream.write(_buffer->_buffer, bytes_this_time);
00342         bytes_left -= bytes_this_time;
00343       }
00344     }
00345 
00346     // if we got a pair of zero-length ADD and COPY blocks, we're done
00347     if ((0 == ADD_length) && (0 == COPY_length)) {
00348       cleanup();
00349 
00350       if (express_cat.is_debug()) {
00351         express_cat.debug()
00352           << "result file = " << _result_file_length 
00353           << " total bytes = " << _total_bytes_processed << endl;
00354       }
00355 
00356       // check the MD5 from the patch file against the newly patched file
00357       {
00358         HashVal MD5_actual;
00359         md5_a_file(_temp_file, MD5_actual);
00360         if (_MD5_ofResult != MD5_actual) {
00361           // Whoops, patching screwed up somehow.
00362           _origfile_stream.close();
00363           _write_stream.close();
00364 
00365           express_cat.info()
00366             << "Patching produced incorrect checksum.  Got:\n"
00367             << "    " << MD5_actual
00368             << "\nExpected:\n"
00369             << "    " << _MD5_ofResult
00370             << "\n";
00371 
00372           // This is a fine time to double-check the starting
00373           // checksum.
00374           if (!has_source_hash()) {
00375             express_cat.info()
00376               << "No source hash in patch file to verify.\n";
00377           } else {
00378             HashVal MD5_orig;
00379             md5_a_file(_orig_file, MD5_orig);
00380             if (MD5_orig != get_source_hash()) {
00381               express_cat.info()
00382                 << "Started from incorrect source file.  Got:\n"
00383                 << "    " << MD5_orig
00384                 << "\nExpected:\n"
00385                 << "    " << get_source_hash()
00386                 << "\n";
00387             } else {
00388               express_cat.info()
00389                 << "Started from correct source file:\n"
00390                 << "    " << MD5_orig
00391                 << "\n";
00392             }
00393           }
00394 
00395           // delete the temp file and the patch file
00396           if (!keep_temporary_files) {
00397             _temp_file.unlink();
00398             _patch_file.unlink();
00399           }
00400           // return "invalid checksum"
00401           return EU_error_invalid_checksum;
00402         }
00403       }
00404 
00405       // delete the patch file and the original file
00406       if (!keep_temporary_files) {
00407         _patch_file.unlink();
00408         _orig_file.unlink();
00409 
00410       } else {
00411         // If we're keeping temporary files, rename the orig file to a
00412         // backup.
00413         Filename orig_backup = _orig_file.get_fullpath() + ".orig";
00414         orig_backup.unlink();
00415         _orig_file.rename_to(orig_backup);
00416       }
00417 
00418       // rename the temp file to the original file name
00419       if (!_temp_file.rename_to(_orig_file)) {
00420         express_cat.error()
00421           << "Patchfile::run() failed to rename temp file to: " << _orig_file
00422           << endl;
00423         return EU_error_write_file_rename;
00424       }
00425 
00426       return EU_success;
00427     }
00428   }
00429 
00430   return EU_ok;
00431 }
00432 
00433 ////////////////////////////////////////////////////////////////////
00434 //     Function: Patchfile::apply
00435 //       Access: Public
00436 //  Description:
00437 ////////////////////////////////////////////////////////////////////
00438 bool Patchfile::
00439 apply(Filename &patch_file, Filename &file) {
00440   int ret = initiate(patch_file, file);
00441   if (ret < 0)
00442     return false;
00443   for (;;) {
00444     ret = run();
00445     if (ret == EU_success)
00446       return true;
00447     if (ret < 0)
00448       return false;
00449   }
00450   return false;
00451 }
00452 
00453 ////////////////////////////////////////////////////////////////////
00454 //     Function: Patchfile::internal_read_header
00455 //       Access: Private
00456 //  Description: Reads the header and leaves the patch file open.
00457 ////////////////////////////////////////////////////////////////////
00458 int Patchfile::
00459 internal_read_header(const Filename &patch_file) {
00460   // Open the patch file for read
00461   _patch_file = patch_file;
00462   _patch_file.set_binary();
00463   if (!_patch_file.open_read(_patch_stream)) {
00464     express_cat.error()
00465       << "Patchfile::initiate() - Failed to open file: " << _patch_file << endl;
00466     return get_write_error();
00467   }
00468 
00469   /////////////
00470   // read header, make sure the patch file is valid
00471 
00472   StreamReader patch_reader(_patch_stream);
00473 
00474   // check the magic number
00475   nassertr(_buffer->get_length() >= _v0_header_length, false);
00476   PN_uint32 magic_number = patch_reader.get_uint32();
00477   if (magic_number != _magic_number && magic_number != _v0_magic_number) {
00478     express_cat.error()
00479       << "Invalid patch file: " << _patch_file << endl;
00480     return EU_error_file_invalid;
00481   }
00482 
00483   _version_number = 0;
00484   if (magic_number != _v0_magic_number) {
00485     _version_number = patch_reader.get_uint16();
00486   }
00487   if (_version_number > _current_version) {
00488     express_cat.error()
00489       << "Can't read version " << _version_number << " patch files: "
00490       << _patch_file << endl;
00491     return EU_error_file_invalid;
00492   }
00493 
00494   if (_version_number >= 1) {
00495     // Get the length of the source file.
00496     _source_file_length = patch_reader.get_uint32();
00497     
00498     // get the MD5 of the source file.
00499     _MD5_ofSource.set_value(0, patch_reader.get_uint32());
00500     _MD5_ofSource.set_value(1, patch_reader.get_uint32());
00501     _MD5_ofSource.set_value(2, patch_reader.get_uint32());
00502     _MD5_ofSource.set_value(3, patch_reader.get_uint32());
00503   }
00504 
00505   // get the length of the patched result file
00506   _result_file_length = patch_reader.get_uint32();
00507 
00508   // get the MD5 of the resultant patched file
00509   _MD5_ofResult.set_value(0, patch_reader.get_uint32());
00510   _MD5_ofResult.set_value(1, patch_reader.get_uint32());
00511   _MD5_ofResult.set_value(2, patch_reader.get_uint32());
00512   _MD5_ofResult.set_value(3, patch_reader.get_uint32());
00513 
00514   express_cat.debug()
00515     << "Patchfile::initiate() - valid patchfile" << endl;
00516 
00517   return EU_success;
00518 }
00519 
00520 ////////////////////////////////////////////////////////////////////
00521 ///// PATCH FILE BUILDING MEMBER FUNCTIONS
00522 ////////////////////////////////////////////////////////////////////
00523 
00524 ////////////////////////////////////////////////////////////////////
00525 //     Function: Patchfile::calc_hash
00526 //       Access: Private
00527 //  Description:
00528 ////////////////////////////////////////////////////////////////////
00529 PN_uint16 Patchfile::
00530 calc_hash(const char *buffer) {
00531 #ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
00532   HashVal hash;
00533 
00534   md5_a_buffer((const unsigned char*)buffer, (int)_footprint_length, hash);
00535 
00536   //cout << PN_uint16(hash.get_value(0)) << " ";
00537 
00538   return PN_uint16(hash.get_value(0));
00539 #else
00540   PN_uint16 hash_value = 0;
00541 
00542   for(int i = 0; i < (int)_footprint_length; i++) {
00543     // this is probably not such a good hash. to be replaced
00544     /// --> TRIED MD5, was not worth it for the execution-time hit on 800Mhz PC
00545     hash_value ^= (*buffer) << (i % 8);
00546     buffer++;
00547   }
00548 
00549   //cout << hash_value << " ";
00550 
00551   return hash_value;
00552 #endif
00553 }
00554 
00555 ////////////////////////////////////////////////////////////////////
00556 //     Function: Patchfile::build_hash_link_tables
00557 //       Access: Private
00558 //  Description:
00559 //               The hash and link tables allow for a quick, linear
00560 //               search of all locations in the file that begin with
00561 //               a particular sequence of bytes, or "footprint."
00562 //
00563 //               The hash table is a table of offsets into the file,
00564 //               with one entry for every possible footprint hash
00565 //               value. For a hash of a footprint, the entry at the
00566 //               offset of the hash value provides an initial location
00567 //               in the file that has a matching footprint.
00568 //
00569 //               The link table is a large linked list of file offsets,
00570 //               with one entry for every byte in the file. Each offset
00571 //               in the link table will point to another offset that
00572 //               has the same footprint at the corresponding offset in the
00573 //               actual file. Starting with an offset taken from the hash
00574 //               table, one can rapidly produce a list of offsets that
00575 //               all have the same footprint.
00576 ////////////////////////////////////////////////////////////////////
00577 void Patchfile::
00578 build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
00579   PN_uint32 *hash_table, PN_uint32 *link_table) {
00580 
00581   PN_uint32 i;
00582 
00583   START_PROFILE(clearTables);
00584 
00585   // clear hash table
00586   for(i = 0; i < _HASHTABLESIZE; i++) {
00587     hash_table[i] = _NULL_VALUE;
00588   }
00589 
00590   // clear link table
00591   for(i = 0; i < length_orig; i++) {
00592     link_table[i] = _NULL_VALUE;
00593   }
00594 
00595   END_PROFILE(clearTables, "clearing hash and link tables");
00596 
00597   if(length_orig < _footprint_length) return;
00598 
00599   START_PROFILE(hashingFootprints);
00600 #ifdef PROFILE_PATCH_BUILD
00601   double hashCalc = 0.0;
00602   double linkSearch = 0.0;
00603 #endif
00604 
00605   // run through original file and hash each footprint
00606   for(i = 0; i < (length_orig - _footprint_length); i++) {
00607 
00608 #ifdef PROFILE_PATCH_BUILD
00609     double t = GET_PROFILE_TIME();
00610 #endif
00611 
00612     PN_uint16 hash_value = calc_hash(&buffer_orig[i]);
00613 
00614 #ifdef PROFILE_PATCH_BUILD
00615     hashCalc += GET_PROFILE_TIME() - t;
00616 #endif
00617 
00618     // we must now store this file index in the hash table
00619     // at the offset of the hash value
00620 
00621     // to account for multiple file offsets with identical
00622     // hash values, there is a link table with an entry for
00623     // every footprint in the file. We create linked lists
00624     // of offsets in the link table.
00625 
00626     // first, set the value in the link table for the current
00627     // offset to whatever the current list head is (the
00628     // value in the hash table) (note that this only works
00629     // because the hash and link tables both use
00630     // _NULL_VALUE to indicate a null index)
00631     link_table[i] = hash_table[hash_value];
00632 
00633     // set the new list head; store the current offset in the
00634     // hash table at the offset of the footprint's hash value
00635     hash_table[hash_value] = i;
00636 
00637     /*
00638     if (_NULL_VALUE == hash_table[hash_value]) {
00639       // hash entry is empty, store this offset
00640       hash_table[hash_value] = i;
00641     } else {
00642       // hash entry is taken, go to the link table
00643       PN_uint32 link_offset = hash_table[hash_value];
00644 
00645 #ifdef PROFILE_PATCH_BUILD
00646       double t = GET_PROFILE_TIME();
00647 #endif
00648       while (_NULL_VALUE != link_table[link_offset]) {
00649         link_offset = link_table[link_offset];
00650       }
00651 #ifdef PROFILE_PATCH_BUILD
00652       linkSearch += GET_PROFILE_TIME() - t;
00653 #endif
00654 
00655       link_table[link_offset] = i;
00656     }
00657     */
00658   }
00659 
00660 #ifdef PROFILE_PATCH_BUILD
00661   cout << "calculating footprint hashes took " << hashCalc << " seconds" << endl;
00662   cout << "traversing the link table took " << linkSearch << " seconds" << endl;
00663 #endif
00664 
00665   END_PROFILE(hashingFootprints, "hashing footprints");
00666 }
00667 
00668 ////////////////////////////////////////////////////////////////////
00669 //     Function: Patchfile::calc_match_length
00670 //       Access: Private
00671 //  Description:
00672 //               This function calculates the length of a match between
00673 //               two strings of bytes
00674 ////////////////////////////////////////////////////////////////////
00675 PN_uint32 Patchfile::
00676 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length) {
00677   PN_uint32 length = 0;
00678   while ((length < max_length) && (*buf1 == *buf2)) {
00679     buf1++, buf2++, length++;
00680   }
00681   return length;
00682 }
00683 
00684 ////////////////////////////////////////////////////////////////////
00685 //     Function: Patchfile::find_longest_match
00686 //       Access: Private
00687 //  Description:
00688 //               This function will find the longest string in the
00689 //               original file that matches a string in the new file.
00690 ////////////////////////////////////////////////////////////////////
00691 void Patchfile::
00692 find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_length,
00693   PN_uint32 *hash_table, PN_uint32 *link_table, const char* buffer_orig,
00694   PN_uint32 length_orig, const char* buffer_new, PN_uint32 length_new) {
00695 
00696   // set length to a safe value
00697   copy_length = 0;
00698 
00699   // get offset of matching string (in orig file) from hash table
00700   PN_uint16 hash_value = calc_hash(&buffer_new[new_pos]);
00701 
00702   // if no match, bail
00703   if (_NULL_VALUE == hash_table[hash_value])
00704     return;
00705 
00706   copy_pos = hash_table[hash_value];
00707 
00708   // calc match length
00709   copy_length = (PN_uint16)calc_match_length(&buffer_new[new_pos], &buffer_orig[copy_pos],
00710                   min(min((length_new - new_pos),(length_orig - copy_pos)), _MAX_RUN_LENGTH));
00711 
00712   // run through link table, see if we find any longer matches
00713   PN_uint32 match_offset;
00714   PN_uint16 match_length;
00715   match_offset = link_table[copy_pos];
00716 
00717   while (match_offset != _NULL_VALUE) {
00718     match_length = (PN_uint16)calc_match_length(&buffer_new[new_pos], &buffer_orig[match_offset],
00719                       min(min((length_new - new_pos),(length_orig - match_offset)), _MAX_RUN_LENGTH));
00720 
00721     // have we found a longer match?
00722     if (match_length > copy_length) {
00723       copy_pos = match_offset;
00724       copy_length = match_length;
00725     }
00726 
00727     // traverse the link table
00728     match_offset = link_table[match_offset];
00729   }
00730 }
00731 
00732 ////////////////////////////////////////////////////////////////////
00733 //     Function: Patchfile::emit_ADD
00734 //       Access: Private
00735 //  Description:
00736 ////////////////////////////////////////////////////////////////////
00737 void Patchfile::
00738 emit_ADD(ofstream &write_stream, PN_uint16 length, const char* buffer,
00739          PN_uint32 ADD_pos) {
00740   if (express_cat.is_spam()) {
00741     express_cat.spam()
00742       << "ADD: " << length << " (to " << ADD_pos << ")" << endl;
00743   }
00744 
00745   // write ADD length
00746   StreamWriter patch_writer(write_stream);
00747   patch_writer.add_uint16(length);
00748 
00749   // if there are bytes to add, add them
00750   if (length > 0) {
00751     patch_writer.append_data(buffer, length);
00752   }
00753 }
00754 
00755 ////////////////////////////////////////////////////////////////////
00756 //     Function: Patchfile::emit_COPY
00757 //       Access: Private
00758 //  Description:
00759 ////////////////////////////////////////////////////////////////////
00760 void Patchfile::
00761 emit_COPY(ofstream &write_stream, PN_uint16 length, PN_uint32 COPY_pos,
00762           PN_uint32 last_copy_pos, PN_uint32 ADD_pos) {
00763   PN_int32 offset = (int)COPY_pos - (int)last_copy_pos;
00764   if (express_cat.is_spam()) {
00765     express_cat.spam()
00766       << "COPY: " << length << " bytes from offset " << offset
00767       << " (from " << COPY_pos << " to " << ADD_pos << ")" << endl;
00768   }
00769 
00770   // write COPY length
00771   StreamWriter patch_writer(write_stream);
00772   patch_writer.add_uint16(length);
00773 
00774   if(length > 0) {
00775     // write COPY offset
00776     patch_writer.add_int32(offset);
00777   }
00778 }
00779 
00780 ////////////////////////////////////////////////////////////////////
00781 //     Function: Patchfile::build
00782 //       Access: Public
00783 //  Description:
00784 //               This implementation uses the "greedy differencing
00785 //               algorithm" described in the masters thesis
00786 //               "Differential Compression: A Generalized Solution
00787 //               for Binary Files" by Randal C. Burns (p.13).
00788 //               For an original file of size M and a new file of
00789 //               size N, this algorithm is O(M) in space and O(M*N)
00790 //               in time.
00791 ////////////////////////////////////////////////////////////////////
00792 bool Patchfile::
00793 build(Filename file_orig, Filename file_new, Filename patch_name) {
00794   patch_name.set_binary();
00795 
00796   START_PROFILE(overall);
00797 
00798   START_PROFILE(readFiles);
00799 
00800   // Open the original file for read
00801   ifstream stream_orig;
00802   file_orig.set_binary();
00803   if (!file_orig.open_read(stream_orig)) {
00804     express_cat.error()
00805       << "Patchfile::build() - Failed to open file: " << file_orig << endl;
00806     return false;
00807   }
00808 
00809   // Open the new file for read
00810   ifstream stream_new;
00811   file_new.set_binary();
00812   if (!file_new.open_read(stream_new)) {
00813     express_cat.error()
00814       << "Patchfile::build() - Failed to open file: " << file_new << endl;
00815     return false;
00816   }
00817 
00818   // Open patch file for write
00819   ofstream write_stream;
00820   if (!patch_name.open_write(write_stream)) {
00821     express_cat.error()
00822       << "Patchfile::build() - Failed to open file: " << patch_name << endl;
00823     return false;
00824   }
00825 
00826   // read in original file
00827   stream_orig.seekg(0, ios::end);
00828   _source_file_length = stream_orig.tellg();
00829   char *buffer_orig = new char[_source_file_length];
00830   stream_orig.seekg(0, ios::beg);
00831   stream_orig.read(buffer_orig, _source_file_length);
00832 
00833   // read in new file
00834   stream_new.seekg(0, ios::end);
00835   _result_file_length = stream_new.tellg();
00836   char *buffer_new = new char[_result_file_length];
00837   stream_new.seekg(0, ios::beg);
00838   stream_new.read(buffer_new, _result_file_length);
00839 
00840   // close the original and new files (we have em in memory)
00841   stream_orig.close();
00842   stream_new.close();
00843 
00844   END_PROFILE(readFiles, "reading files");
00845 
00846   START_PROFILE(allocTables);
00847 
00848   // allocate hash/link tables
00849   PN_uint32* hash_table = new PN_uint32[_HASHTABLESIZE];
00850   PN_uint32* link_table = new PN_uint32[_source_file_length];
00851 
00852   END_PROFILE(allocTables, "allocating hash and link tables");
00853 
00854   START_PROFILE(buildTables);
00855 
00856   // build hash and link tables for original file
00857   build_hash_link_tables(buffer_orig, _source_file_length, hash_table, link_table);
00858 
00859   END_PROFILE(buildTables, "building hash and link tables");
00860 
00861   // prepare to write the patch file header
00862 
00863   START_PROFILE(writeHeader);
00864 
00865   // write the patch file header
00866   StreamWriter patch_writer(write_stream);
00867   patch_writer.add_uint32(_magic_number);
00868   patch_writer.add_uint16(_current_version);
00869   patch_writer.add_uint32(_source_file_length);
00870   {
00871     // calc MD5 of original file
00872     md5_a_buffer((const unsigned char*)buffer_orig, (int)_source_file_length, _MD5_ofSource);
00873     // add it to the header
00874     patch_writer.add_uint32(_MD5_ofSource.get_value(0));
00875     patch_writer.add_uint32(_MD5_ofSource.get_value(1));
00876     patch_writer.add_uint32(_MD5_ofSource.get_value(2));
00877     patch_writer.add_uint32(_MD5_ofSource.get_value(3));
00878   }
00879   patch_writer.add_uint32(_result_file_length);
00880   {
00881     // calc MD5 of resultant patched file
00882     md5_a_buffer((const unsigned char*)buffer_new, (int)_result_file_length, _MD5_ofResult);
00883     // add it to the header
00884     patch_writer.add_uint32(_MD5_ofResult.get_value(0));
00885     patch_writer.add_uint32(_MD5_ofResult.get_value(1));
00886     patch_writer.add_uint32(_MD5_ofResult.get_value(2));
00887     patch_writer.add_uint32(_MD5_ofResult.get_value(3));
00888   }
00889 
00890   END_PROFILE(writeHeader, "writing patch file header");
00891 
00892   // run through new file
00893   START_PROFILE(buildPatchfile);
00894 
00895   PN_uint32 new_pos = 0;
00896   PN_uint32 ADD_pos = new_pos; // this is the position for the start of ADD operations
00897 
00898   PN_uint32 last_copy_pos = 0;
00899 
00900   if(((PN_uint32) _result_file_length) >= _footprint_length)
00901   {
00902     while (new_pos < (_result_file_length - _footprint_length)) {
00903 
00904       // find best match for current position
00905       PN_uint32 COPY_pos;
00906       PN_uint16 COPY_length;
00907 
00908       find_longest_match(new_pos, COPY_pos, COPY_length, hash_table, link_table,
00909         buffer_orig, _source_file_length, buffer_new, _result_file_length);
00910 
00911       // if no match or match not longer than footprint length, skip to next byte
00912       if (COPY_length < _footprint_length) {
00913         // go to next byte
00914         new_pos++;
00915       } else {
00916         // emit ADD for all skipped bytes
00917         int num_skipped = (int)new_pos - (int)ADD_pos;
00918         while (num_skipped != (PN_uint16)num_skipped) {
00919           // Overflow.  This chunk is too large to fit into a single
00920           // ADD block, so we have to write it as multiple ADDs.
00921           static const PN_uint16 max_write = 65535;
00922           emit_ADD(write_stream, max_write, &buffer_new[ADD_pos], ADD_pos);
00923           ADD_pos += max_write;
00924           num_skipped -= max_write;
00925           emit_COPY(write_stream, 0, COPY_pos, last_copy_pos, ADD_pos);
00926         }
00927         
00928         emit_ADD(write_stream, num_skipped, &buffer_new[ADD_pos], ADD_pos);
00929         ADD_pos += num_skipped;
00930         nassertr(ADD_pos == new_pos, false);
00931 
00932         // emit COPY for matching string
00933         emit_COPY(write_stream, COPY_length, COPY_pos, last_copy_pos, ADD_pos);
00934         last_copy_pos = COPY_pos + COPY_length;
00935 
00936         // skip past match in new_file
00937         new_pos += (PN_uint32)COPY_length;
00938         ADD_pos = new_pos;
00939       }
00940     }
00941   }
00942 
00943   // are there still more bytes left in the new file?
00944   if ((int)ADD_pos != _result_file_length) {
00945     // emit ADD for all remaining bytes
00946     emit_ADD(write_stream, _result_file_length - ADD_pos, &buffer_new[ADD_pos],
00947              ADD_pos);
00948 
00949     // write null COPY
00950     emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, _result_file_length);
00951   }
00952 
00953   END_PROFILE(buildPatchfile, "building patch file");
00954 
00955   // write terminator (null ADD, null COPY)
00956   emit_ADD(write_stream, 0, NULL, _result_file_length);
00957   emit_COPY(write_stream, 0, last_copy_pos, last_copy_pos, _result_file_length);
00958 
00959   END_PROFILE(overall, "total patch building operation");
00960 
00961   return true;
00962 }
00963 

Generated on Fri May 2 00:38:29 2003 for Panda by doxygen1.3