00001 // Filename: pkFontFile.cxx 00002 // Created by: drose (18Feb01) 00003 // 00004 //////////////////////////////////////////////////////////////////// 00005 // 00006 // PANDA 3D SOFTWARE 00007 // Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved 00008 // 00009 // All use of this software is subject to the terms of the Panda 3d 00010 // Software license. You should have received a copy of this license 00011 // along with this source code; you will also find a current copy of 00012 // the license at http://www.panda3d.org/license.txt . 00013 // 00014 // To contact the maintainers of this program write to 00015 // panda3d@yahoogroups.com . 00016 // 00017 //////////////////////////////////////////////////////////////////// 00018 00019 #include "pkFontFile.h" 00020 #include "charBitmap.h" 00021 00022 /******************************************************************** 00023 00024 I found the following in a source file for pftype called pftype.web. 00025 It's some nutty TeX-based Pascal program, and the documentation is a 00026 little hard to read because of the embedded TeX formatting controls. 00027 But it describes quite thoroughly the format of the pk file. 00028 00029 ********************************************************************* 00030 00031 @* Packed file format. 00032 The packed file format is a compact representation of the data contained in a 00033 \.{GF} file. The information content is the same, but packed (\.{PK}) files 00034 are almost always less than half the size of their \.{GF} counterparts. They 00035 are also easier to convert into a raster representation because they do not 00036 have a profusion of \\{paint}, \\{skip}, and \\{new\_row} commands to be 00037 separately interpreted. In addition, the \.{PK} format expressedly forbids 00038 \&{special} commands within a character. The minimum bounding box for each 00039 character is explicit in the format, and does not need to be scanned for as in 00040 the \.{GF} format. Finally, the width and escapement values are combined with 00041 the raster information into character ``packets'', making it simpler in many 00042 cases to process a character. 00043 00044 A \.{PK} file is organized as a stream of 8-bit bytes. At times, these bytes 00045 might be split into 4-bit nybbles or single bits, or combined into multiple 00046 byte parameters. When bytes are split into smaller pieces, the `first' piece 00047 is always the most significant of the byte. For instance, the first bit of 00048 a byte is the bit with value 128; the first nybble can be found by dividing 00049 a byte by 16. Similarly, when bytes are combined into multiple byte 00050 parameters, the first byte is the most significant of the parameter. If the 00051 parameter is signed, it is represented by two's-complement notation. 00052 00053 The set of possible eight-bit values is separated into two sets, those that 00054 introduce a character definition, and those that do not. The values that 00055 introduce a character definition range from 0 to 239; byte values 00056 above 239 are interpreted as commands. Bytes that introduce character 00057 definitions are called flag bytes, and various fields within the byte indicate 00058 various things about how the character definition is encoded. Command bytes 00059 have zero or more parameters, and can never appear within a character 00060 definition or between parameters of another command, where they would be 00061 interpeted as data. 00062 00063 A \.{PK} file consists of a preamble, followed by a sequence of one or more 00064 character definitions, followed by a postamble. The preamble command must 00065 be the first byte in the file, followed immediately by its parameters. 00066 Any number of character definitions may follow, and any command but the 00067 preamble command and the postamble command may occur between character 00068 definitions. The very last command in the file must be the postamble. 00069 00070 @ The packed file format is intended to be easy to read and interpret by 00071 device drivers. The small size of the file reduces the input/output overhead 00072 each time a font is loaded. For those drivers that load and save each font 00073 file into memory, the small size also helps reduce the memory requirements. 00074 The length of each character packet is specified, allowing the character raster 00075 data to be loaded into memory by simply counting bytes, rather than 00076 interpreting each command; then, each character can be interpreted on a demand 00077 basis. This also makes it possible for a driver to skip a particular 00078 character quickly if it knows that the character is unused. 00079 00080 @ First, the command bytes will be presented; then the format of the 00081 character definitions will be defined. Eight of the possible sixteen 00082 commands (values 240 through 255) are currently defined; the others are 00083 reserved for future extensions. The commands are listed below. Each command 00084 is specified by its symbolic name (e.g., \\{pk\_no\_op}), its opcode byte, 00085 and any parameters. The parameters are followed by a bracketed number 00086 telling how many bytes they occupy, with the number preceded by a plus sign if 00087 it is a signed quantity. (Four byte quantities are always signed, however.) 00088 00089 \yskip\hang|pk_xxx1| 240 |k[1]| |x[k]|. This command is undefined in general; 00090 it functions as a $(k+2)$-byte \\{no\_op} unless special \.{PK}-reading 00091 programs are being used. \MF\ generates \\{xxx} commands when encountering 00092 a \&{special} string. It is recommended that |x| be a string having the form 00093 of a keyword followed by possible parameters relevant to that keyword. 00094 00095 \yskip\hang\\{pk\_xxx2} 241 |k[2]| |x[k]|. Like |pk_xxx1|, but |0<=k<65536|. 00096 00097 \yskip\hang\\{pk\_xxx3} 242 |k[3]| |x[k]|. Like |pk_xxx1|, but 00098 |0<=k<@t$2^{24}$@>|. \MF\ uses this when sending a \&{special} string whose 00099 length exceeds~255. 00100 00101 \yskip\hang\\{pk\_xxx4} 243 |k[4]| |x[k]|. Like |pk_xxx1|, but |k| can be 00102 ridiculously large; |k| musn't be negative. 00103 00104 \yskip\hang|pk_yyy| 244 |y[4]|. This command is undefined in general; it 00105 functions as a five-byte \\{no\_op} unless special \.{PK} reading programs 00106 are being used. \MF\ puts |scaled| numbers into |yyy|'s, as a result of 00107 \&{numspecial} commands; the intent is to provide numeric parameters to 00108 \\{xxx} commands that immediately precede. 00109 00110 \yskip\hang|pk_post| 245. Beginning of the postamble. This command is 00111 followed by enough |pk_no_op| commands to make the file a multiple 00112 of four bytes long. Zero through three bytes are usual, but any number 00113 is allowed. 00114 This should make the file easy to read on machines that pack four bytes to 00115 a word. 00116 00117 \yskip\hang|pk_no_op| 246. No operation, do nothing. Any number of 00118 |pk_no_op|'s may appear between \.{PK} commands, but a |pk_no_op| cannot be 00119 inserted between a command and its parameters, between two parameters, or 00120 inside a character definition. 00121 00122 \yskip\hang|pk_pre| 247 |i[1]| |k[1]| |x[k]| |ds[4]| |cs[4]| |hppp[4]| 00123 |vppp[4]|. Preamble command. Here, |i| is the identification byte of the 00124 file, currently equal to 89. The string |x| is merely a comment, usually 00125 indicating the source of the \.{PK} file. The parameters |ds| and |cs| are 00126 the design size of the file in $1/2^{20}$ points, and the checksum of the 00127 file, respectively. The checksum should match the \.{TFM} file and the 00128 \.{GF} files for this font. Parameters |hppp| and |vppp| are the ratios 00129 of pixels per point, horizontally and vertically, multiplied by $2^{16}$; they 00130 can be used to correlate the font with specific device resolutions, 00131 magnifications, and ``at sizes''. Usually, the name of the \.{PK} file is 00132 formed by concatenating the font name (e.g., cmr10) with the resolution at 00133 which the font is prepared in pixels per inch multiplied by the magnification 00134 factor, and the letters \.{pk}. For instance, cmr10 at 300 dots per inch 00135 should be named \.{cmr10.300pk}; at one thousand dots per inch and magstephalf, 00136 it should be named \.{cmr10.1095pk}. 00137 00138 @ We put a few of the above opcodes into definitions for symbolic use by 00139 this program. 00140 00141 @d pk_id = 89 {the version of \.{PK} file described} 00142 @d pk_xxx1 = 240 {\&{special} commands} 00143 @d pk_yyy = 244 {\&{numspecial} commands} 00144 @d pk_post = 245 {postamble} 00145 @d pk_no_op = 246 {no operation} 00146 @d pk_pre = 247 {preamble} 00147 @d pk_undefined == 248, 249, 250, 251, 252, 253, 254, 255 00148 00149 @ The \.{PK} format has two conflicting goals: to pack character raster and 00150 size information as compactly as possible, while retaining ease of translation 00151 into raster and other forms. A suitable compromise was found in the use of 00152 run-encoding of the raster information. Instead of packing the individual 00153 bits of the character, we instead count the number of consecutive `black' or 00154 `white' pixels in a horizontal raster row, and then encode this number. Run 00155 counts are found for each row from left to right, traversing rows from the 00156 top to bottom. This is essentially the way the \.{GF} format works. 00157 Instead of presenting each row individually, however, we concatenate all 00158 of the horizontal raster rows into one long string of pixels, and encode this 00159 row. With knowledge of the width of the bit-map, the original character glyph 00160 can easily be reconstructed. In addition, we do not need special commands to 00161 mark the end of one row and the beginning of the next. 00162 00163 Next, we place the burden of finding the minimum bounding box on the part 00164 of the font generator, since the characters will usually be used much more 00165 often than they are generated. The minimum bounding box is the smallest 00166 rectangle that encloses all `black' pixels of a character. We also 00167 eliminate the need for a special end of character marker, by supplying 00168 exactly as many bits as are required to fill the minimum bounding box, from 00169 which the end of the character is implicit. 00170 00171 Let us next consider the distribution of the run counts. Analysis of several 00172 dozen pixel files at 300 dots per inch yields a distribution peaking at four, 00173 falling off slowly until ten, then a bit more steeply until twenty, and then 00174 asymptotically approaching the horizontal. Thus, the great majority of our 00175 run counts will fit in a four-bit nybble. The eight-bit byte is attractive for 00176 our run-counts, as it is the standard on many systems; however, the wasted four 00177 bits in the majority of cases seem a high price to pay. Another possibility 00178 is to use a Huffman-type encoding scheme with a variable number of bits for 00179 each run-count; this was rejected because of the overhead in fetching and 00180 examining individual bits in the file. Thus, the character raster definitions 00181 in the \.{PK} file format are based on the four-bit nybble. 00182 00183 @ An analysis of typical pixel files yielded another interesting statistic: 00184 Fully 37\char`\%\ 00185 of the raster rows were duplicates of the previous row. Thus, the \.{PK} 00186 format allows the specification of repeat counts, which indicate how many times 00187 a horizontal raster row is to be repeated. These repeated rows are taken out 00188 of the character glyph before individual rows are concatenated into the long 00189 string of pixels. 00190 00191 For elegance, we disallow a run count of zero. The case of a null raster 00192 description should be gleaned from the character width and height being equal 00193 to zero, and no raster data should be read. No other zero counts are ever 00194 necessary. Also, in the absence of repeat counts, the repeat value is set to 00195 be zero (only the original row is sent.) If a repeat count is seen, it takes 00196 effect on the current row. The current row is defined as the row on which the 00197 first pixel of the next run count will lie. The repeat count is set back to 00198 zero when the last pixel in the current row is seen, and the row is sent out. 00199 00200 This poses a problem for entirely black and entirely white rows, however. Let 00201 us say that the current row ends with four white pixels, and then we have five 00202 entirely empty rows, followed by a black pixel at the beginning of the next 00203 row, and the character width is ten pixels. We would like to use a repeat 00204 count, but there is no legal place to put it. If we put it before the white 00205 run count, it will apply to the current row. If we put it after, it applies 00206 to the row with the black pixel at the beginning. Thus, entirely white or 00207 entirely black repeated rows are always packed as large run counts (in this 00208 case, a white run count of 54) rather than repeat counts. 00209 00210 @ Now we turn our attention to the actual packing of the run counts and 00211 repeat counts into nybbles. There are only sixteen possible nybble values. 00212 We need to indicate run counts and repeat counts. Since the run counts are 00213 much more common, we will devote the majority of the nybble values to them. 00214 We therefore indicate a repeat count by a nybble of 14 followed by a packed 00215 number, where a packed number will be explained later. Since the repeat 00216 count value of one is so common, we indicate a repeat one command by a single 00217 nybble of 15. A 14 followed by the packed number 1 is still legal for a 00218 repeat one count. The run counts are coded directly as packed 00219 numbers. 00220 00221 For packed numbers, therefore, we have the nybble values 0 through 13. We 00222 need to represent the positive integers up to, say, $2^{31}-1$. We would 00223 like the more common smaller numbers to take only one or two nybbles, and 00224 the infrequent large numbers to take three or more. We could therefore 00225 allocate one nybble value to indicate a large run count taking three or more 00226 nybbles. We do this with the value 0. 00227 00228 @ We are left with the values 1 through 13. We can allocate some of these, say 00229 |dyn_f|, to be one-nybble run counts. 00230 These will work for the run counts |1..dyn_f|. For subsequent run 00231 counts, we will use a nybble greater than |dyn_f|, followed by a second nybble, 00232 whose value can run from 0 through 15. Thus, the two-nybble values will 00233 run from |dyn_f+1..(13-dyn_f)*16+dyn_f|. We have our definition of large run 00234 count values now, being all counts greater than |(13-dyn_f)*16+dyn_f|. 00235 00236 We can analyze our several dozen pixel files and determine an optimal value of 00237 |dyn_f|, and use this value for all of the characters. Unfortunately, values 00238 of |dyn_f| that pack small characters well tend to pack the large characters 00239 poorly, and values that pack large characters well are not efficient for the 00240 smaller characters. Thus, we choose the optimal |dyn_f| on a character basis, 00241 picking the value that will pack each individual character in the smallest 00242 number of nybbles. Legal values of |dyn_f| run from 0 (with no one-nybble run 00243 counts) to 13 (with no two-nybble run counts). 00244 00245 @ Our only remaining task in the coding of packed numbers is the large run 00246 counts. We use a scheme suggested by D.~E.~Knuth 00247 @^Knuth, Donald Ervin@> 00248 that simply and elegantly represents arbitrarily large values. The 00249 general scheme to represent an integer |i| is to write its hexadecimal 00250 representation, with leading zeros removed. Then we count the number of 00251 digits, and prepend one less than that many zeros before the hexadecimal 00252 representation. Thus, the values from one to fifteen occupy one nybble; 00253 the values sixteen through 255 occupy three, the values 256 through 4095 00254 require five, etc. 00255 00256 For our purposes, however, we have already represented the numbers one 00257 through |(13-dyn_f)*16+dyn_f|. In addition, the one-nybble values have 00258 already been taken by our other commands, which means that only the values 00259 from sixteen up are available to us for long run counts. Thus, we simply 00260 normalize our long run counts, by subtracting |(13-dyn_f)*16+dyn_f+1| and 00261 adding 16, and then we represent the result according to the scheme above. 00262 00263 @ The final algorithm for decoding the run counts based on the above scheme 00264 looks like this, assuming that a procedure called \\{pk\_nyb} is available 00265 to get the next nybble from the file, and assuming that the global 00266 |repeat_count| indicates whether a row needs to be repeated. Note that this 00267 routine is recursive, but since a repeat count can never directly follow 00268 another repeat count, it can only be recursive to one level. 00269 00270 @<Packed number procedure@>= 00271 function pk_packed_num : integer ; 00272 var i, @!j : integer ; 00273 begin 00274 i := get_nyb ; 00275 if i = 0 then begin 00276 repeat j := get_nyb ; incr(i) ; until j <> 0 ; 00277 while i > 0 do begin j := j * 16 + get_nyb ; decr(i) ; end ; 00278 pk_packed_num := j - 15 + (13-dyn_f)*16 + dyn_f ; 00279 end else if i <= dyn_f then 00280 pk_packed_num := i 00281 else if i < 14 then 00282 pk_packed_num := (i-dyn_f-1)*16+get_nyb+dyn_f+1 00283 else begin 00284 if repeat_count <> 0 then abort('Second repeat count for this row!') ; 00285 @.Second repeat count...@> 00286 repeat_count := 1; {prevent recursion more than one level} 00287 if i = 14 then repeat_count := pk_packed_num; 00288 send_out(true, repeat_count) ; 00289 pk_packed_num := pk_packed_num ; 00290 end ; 00291 end ; 00292 00293 @ For low resolution fonts, or characters with `gray' areas, run encoding can 00294 often make the character many times larger. Therefore, for those characters 00295 that cannot be encoded efficiently with run counts, the \.{PK} format allows 00296 bit-mapping of the characters. This is indicated by a |dyn_f| value of 00297 14. The bits are packed tightly, by concatenating all of the horizontal raster 00298 rows into one long string, and then packing this string eight bits to a byte. 00299 The number of bytes required can be calculated by |(width*height+7) div 8|. 00300 This format should only be used when packing the character by run counts takes 00301 more bytes than this, although, of course, it is legal for any character. 00302 Any extra bits in the last byte should be set to zero. 00303 00304 @ At this point, we are ready to introduce the format for a character 00305 descriptor. It consists of three parts: a flag byte, a character preamble, 00306 and the raster data. The most significant four bits of the flag byte 00307 yield the |dyn_f| value for that character. (Notice that only values of 00308 0 through 14 are legal for |dyn_f|, with 14 indicating a bit mapped character; 00309 thus, the flag bytes do not conflict with the command bytes, whose upper nybble 00310 is always 15.) The next bit (with weight 8) indicates whether the first run 00311 count is a black count or a white count, with a one indicating a black count. 00312 For bit-mapped characters, this bit should be set to a zero. The next bit 00313 (with weight 4) indicates whether certain later parameters (referred to as size 00314 parameters) are given in one-byte or two-byte quantities, with a one indicating 00315 that they are in two-byte quantities. The last two bits are concatenated on to 00316 the beginning of the packet-length parameter in the character preamble, 00317 which will be explained below. 00318 00319 However, if the last three bits of the flag byte are all set (normally 00320 indicating that the size parameters are two-byte values and that a 3 should be 00321 prepended to the length parameter), then a long format of the character 00322 preamble should be used instead of one of the short forms. 00323 00324 Therefore, there are three formats for the character preamble; the one that 00325 is used depends on the least significant three bits of the flag byte. If the 00326 least significant three bits are in the range zero through three, the short 00327 format is used. If they are in the range four through six, the extended short 00328 format is used. Otherwise, if the least significant bits are all set, then 00329 the long form of the character preamble is used. The preamble formats are 00330 explained below. 00331 00332 \yskip\hang Short form: |flag[1]| |pl[1]| |cc[1]| |tfm[3]| |dm[1]| |w[1]| 00333 |h[1]| |hoff[+1]| |voff[+1]|. 00334 If this format of the character preamble is used, the above 00335 parameters must all fit in the indicated number of bytes, signed or unsigned 00336 as indicated. Almost all of the standard \TeX\ font characters fit; the few 00337 exceptions are fonts such as \.{cminch}. 00338 00339 \yskip\hang Extended short form: |flag[1]| |pl[2]| |cc[1]| |tfm[3]| |dm[2]| 00340 |w[2]| |h[2]| |hoff[+2]| |voff[+2]|. Larger characters use this extended 00341 format. 00342 00343 \yskip\hang Long form: |flag[1]| |pl[4]| |cc[4]| |tfm[4]| |dx[4]| |dy[4]| 00344 |w[4]| |h[4]| |hoff[4]| |voff[4]|. This is the general format that 00345 allows all of the 00346 parameters of the \.{GF} file format, including vertical escapement. 00347 \vskip\baselineskip 00348 The |flag| parameter is the flag byte. The parameter |pl| (packet length) 00349 contains the offset 00350 of the byte following this character descriptor, with respect to the beginning 00351 of the |tfm| width parameter. This is given so a \.{PK} reading program can, 00352 once it has read the flag byte, packet length, and character code (|cc|), skip 00353 over the character by simply reading this many more bytes. For the two short 00354 forms of the character preamble, the last two bits of the flag byte should be 00355 considered the two most-significant bits of the packet length. For the short 00356 format, the true packet length might be calculated as |(flag mod 4)*256+pl|; 00357 for the short extended format, it might be calculated as 00358 |(flag mod 4)*65536+pl|. 00359 00360 The |w| parameter is the width and the |h| parameter is the height in pixels 00361 of the minimum bounding box. The |dx| and |dy| parameters are the horizontal 00362 and vertical escapements, respectively. In the short formats, |dy| is assumed 00363 to be zero and |dm| is |dx| but in pixels; 00364 in the long format, |dx| and |dy| are both 00365 in pixels multiplied by $2^{16}$. The |hoff| is the horizontal offset from the 00366 upper left pixel to the reference pixel; the |voff| is the vertical offset. 00367 They are both given in pixels, with right and down being positive. The 00368 reference pixel is the pixel that occupies the unit square in \MF; the 00369 \MF\ reference point is the lower left hand corner of this pixel. (See the 00370 example below.) 00371 00372 @ \TeX\ requires all characters that have the same character codes 00373 modulo 256 to have also the same |tfm| widths and escapement values. The \.{PK} 00374 format does not itself make this a requirement, but in order for the font to 00375 work correctly with the \TeX\ software, this constraint should be observed. 00376 (The standard version of \TeX\ cannot output character codes greater 00377 than 255, but extended versions do exist.) 00378 00379 Following the character preamble is the raster information for the 00380 character, packed by run counts or by bits, as indicated by the flag byte. 00381 If the character is packed by run counts and the required number of nybbles 00382 is odd, then the last byte of the raster description should have a zero 00383 for its least significant nybble. 00384 00385 @ As an illustration of the \.{PK} format, the character \char4\ from the font 00386 amr10 at 300 dots per inch will be encoded. This character was chosen 00387 because it illustrates some 00388 of the borderline cases. The raster for the character looks like this (the 00389 row numbers are chosen for convenience, and are not \MF's row numbers.) 00390 00391 \vskip\baselineskip 00392 {\def\smbox{\vrule height 7pt width 7pt depth 0pt \hskip 3pt}% 00393 \catcode`\*=\active \let*=\smbox 00394 \centerline{\vbox{\baselineskip=10pt 00395 \halign{\hfil#\quad&&\hfil#\hfil\cr 00396 0& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00397 1& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00398 2& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00399 3& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00400 4& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00401 5& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00402 6& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00403 7\cr 00404 8\cr 00405 9& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00406 10& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00407 11& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00408 12& & & & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*& & \cr 00409 13& & & & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*& & \cr 00410 14& & & & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*& & \cr 00411 15& & & & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*& & \cr 00412 16& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00413 17& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00414 18& & & & &*&*& & & & & & & & & & & & &*&*& & \cr 00415 19\cr 00416 20\cr 00417 21\cr 00418 22& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00419 23& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00420 24& & &*&*& & & & & & & & & & & & & & & & &*&*\cr 00421 25& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00422 26& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00423 27& & &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00424 28&+& &*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*&*\cr 00425 &\hphantom{*}&\hphantom{*}\cr 00426 }}}} 00427 The width of the minimum bounding box for this character is 20; its height 00428 is 29. The `+' represents the reference pixel; notice how it lies outside the 00429 minimum bounding box. The |hoff| value is $-2$, and the |voff| is~28. 00430 00431 The first task is to calculate the run counts and repeat counts. The repeat 00432 counts are placed at the first transition (black to white or white to black) 00433 in a row, and are enclosed in brackets. White counts are enclosed in 00434 parentheses. It is relatively easy to generate the counts list: 00435 \vskip\baselineskip 00436 \centerline{82 [2] (16) 2 (42) [2] 2 (12) 2 (4) [3]} 00437 \centerline{16 (4) [2] 2 (12) 2 (62) [2] 2 (16) 82} 00438 \vskip\baselineskip 00439 Note that any duplicated rows that are not all white or all black are removed 00440 before the run counts are calculated. The rows thus removed are rows 5, 6, 00441 10, 11, 13, 14, 15, 17, 18, 23, and 24. 00442 00443 @ The next step in the encoding of this character is to calculate the optimal 00444 value of |dyn_f|. The details of how this calculation is done are not 00445 important here; suffice it to say that there is a simple algorithm that can 00446 determine the best value of |dyn_f| in one pass over the count list. For this 00447 character, the optimal value turns out to be 8 (atypically low). Thus, all 00448 count values less than or equal to 8 are packed in one nybble; those from 00449 nine to $(13-8)*16+8$ or 88 are packed in two nybbles. The run encoded values 00450 now become (in hex, separated according to the above list): 00451 \vskip\baselineskip 00452 \centerline{\tt D9 E2 97 2 B1 E2 2 93 2 4 E3} 00453 \centerline{\tt 97 4 E2 2 93 2 C5 E2 2 97 D9} 00454 \vskip\baselineskip\noindent 00455 which comes to 36 nybbles, or 18 bytes. This is shorter than the 73 bytes 00456 required for the bit map, so we use the run count packing. 00457 00458 @ The short form of the character preamble is used because all of the 00459 parameters fit in their respective lengths. The packet length is therefore 00460 18 bytes for the raster, plus 00461 eight bytes for the character preamble parameters following the character 00462 code, or 26. The |tfm| width for this character is 640796, or {\tt 9C71C} in 00463 hexadecimal. The horizontal escapement is 25 pixels. The flag byte is 00464 88 hex, indicating the short preamble, the black first count, and the 00465 |dyn_f| value of 8. The final total character packet, in hexadecimal, is: 00466 \vskip\baselineskip 00467 $$\vbox{\halign{\hfil #\quad&&{\tt #\ }\cr 00468 Flag byte&88\cr 00469 Packet length&1A\cr 00470 Character code&04\cr 00471 |tfm| width&09&C7&1C\cr 00472 Horizontal escapement (pixels)&19\cr 00473 Width of bit map&14\cr 00474 Height of bit map&1D\cr 00475 Horizontal offset (signed)&FE\cr 00476 Vertical offset&1C\cr 00477 Raster data&D9&E2&97\cr 00478 &2B&1E&22\cr 00479 &93&24&E3\cr 00480 &97&4E&22\cr 00481 &93&2C&5E\cr 00482 &22&97&D9\cr}}$$ 00483 ********************************************************************/ 00484 00485 #define PK_XXX1 240 00486 #define PK_XXX2 241 00487 #define PK_XXX3 242 00488 #define PK_XXX4 243 00489 #define PK_YYY 244 00490 #define PK_POST 245 00491 #define PK_NO_OP 246 00492 #define PK_PRE 247 00493 00494 00495 //////////////////////////////////////////////////////////////////// 00496 // Function: PkFontFile::Constructor 00497 // Access: Public 00498 // Description: 00499 //////////////////////////////////////////////////////////////////// 00500 PkFontFile:: 00501 PkFontFile() { 00502 } 00503 00504 //////////////////////////////////////////////////////////////////// 00505 // Function: PkFontFile::read 00506 // Access: Public, Virtual 00507 // Description: Attempts to read the font from the indicated file. 00508 // Returns true if successful, false if there is an 00509 // error. 00510 //////////////////////////////////////////////////////////////////// 00511 bool PkFontFile:: 00512 read(const Filename &filename, bool extract_all, const string &extract_only) { 00513 Filename input_filename = filename; 00514 00515 input_filename.set_binary(); 00516 ifstream pk_file; 00517 if (!input_filename.open_read(pk_file)) { 00518 return false; 00519 } 00520 00521 // First, read the whole thing into a memory buffer, so we can 00522 // easily access bytes at random locations in the file. 00523 unsigned char c = pk_file.get(); 00524 while (pk_file && !pk_file.eof()) { 00525 _pk.push_back(c); 00526 c = pk_file.get(); 00527 } 00528 00529 _p = 0; 00530 _high = true; 00531 _post = false; 00532 _post_warning = false; 00533 00534 _extract_all = extract_all; 00535 _extract_only = extract_only; 00536 00537 return read_pk(); 00538 } 00539 00540 //////////////////////////////////////////////////////////////////// 00541 // Function: PkFontFile::fetch_nibble 00542 // Access: Private 00543 // Description: Returns the next 4-bit nibble from the pk stream. 00544 //////////////////////////////////////////////////////////////////// 00545 unsigned int PkFontFile:: 00546 fetch_nibble() { 00547 assert(_p < (int)_pk.size()); 00548 if (_high) { 00549 _high = false; 00550 return _pk[_p] >> 4; 00551 } else { 00552 _high = true; 00553 return _pk[_p++] & 0xf; 00554 } 00555 } 00556 00557 00558 //////////////////////////////////////////////////////////////////// 00559 // Function: PkFontFile::fetch_packed_int 00560 // Access: Private 00561 // Description: Returns the next packed integer from the pk stream. 00562 //////////////////////////////////////////////////////////////////// 00563 unsigned int PkFontFile:: 00564 fetch_packed_int() { 00565 int i = fetch_nibble(); 00566 if (i == 0) { 00567 int j; 00568 do { 00569 j = fetch_nibble(); 00570 i++; 00571 } while (j == 0); 00572 while (i > 0) { 00573 j = (j << 4) | fetch_nibble(); 00574 i--; 00575 } 00576 return j - 15 + (13 - _dyn_f)*16 + _dyn_f; 00577 00578 } else if (i <= _dyn_f) { 00579 return i; 00580 00581 } else if (i < 14) { 00582 return (i - _dyn_f - 1)*16 + fetch_nibble() + _dyn_f + 1; 00583 00584 } else { 00585 _repeat_count = 1; 00586 if (i == 14) { 00587 _repeat_count = fetch_packed_int(); 00588 } 00589 // nout << "[" << _repeat_count << "]"; 00590 return fetch_packed_int(); 00591 } 00592 } 00593 00594 //////////////////////////////////////////////////////////////////// 00595 // Function: PkFontFile::fetch_byte 00596 // Access: Private 00597 // Description: Returns the next 8-bit unsigned byte from the pk 00598 // stream. 00599 //////////////////////////////////////////////////////////////////// 00600 unsigned int PkFontFile:: 00601 fetch_byte() { 00602 assert(_high); 00603 assert(_p < (int)_pk.size()); 00604 return _pk[_p++]; 00605 } 00606 00607 //////////////////////////////////////////////////////////////////// 00608 // Function: PkFontFile::fetch_int 00609 // Access: Private 00610 // Description: Returns the next n-byte unsigned int from 00611 // the pk stream. 00612 //////////////////////////////////////////////////////////////////// 00613 unsigned int PkFontFile:: 00614 fetch_int(int n) { 00615 assert(_high); 00616 00617 unsigned int result = 0; 00618 for (int i = 0; i < n; i++) { 00619 assert(_p < (int)_pk.size()); 00620 result = (result << 8) | _pk[_p]; 00621 _p++; 00622 } 00623 00624 return result; 00625 } 00626 00627 //////////////////////////////////////////////////////////////////// 00628 // Function: PkFontFile::fetch_signed_int 00629 // Access: Private 00630 // Description: Returns the next n-byte signed int from 00631 // the pk stream. 00632 //////////////////////////////////////////////////////////////////// 00633 int PkFontFile:: 00634 fetch_signed_int(int n) { 00635 assert(_high); 00636 00637 assert(_p < (int)_pk.size()); 00638 int result = (signed char)_pk[_p]; 00639 _p++; 00640 for (int i = 1; i < n; i++) { 00641 assert(_p < (int)_pk.size()); 00642 result = (result << 8) | _pk[_p]; 00643 _p++; 00644 } 00645 00646 return result; 00647 } 00648 00649 00650 //////////////////////////////////////////////////////////////////// 00651 // Function: PkFontFile::do_character 00652 // Access: Private 00653 // Description: Reads a single character from the pk file and 00654 // processes it. Returns true if successful, false if 00655 // something bad happened. 00656 //////////////////////////////////////////////////////////////////// 00657 bool PkFontFile:: 00658 do_character(int flag_byte) { 00659 // int start_p = _p - 1; 00660 _dyn_f = (flag_byte >> 4); 00661 bool first_black = ((flag_byte & 0x8) != 0); 00662 int bsize = (flag_byte & 0x4) ? 2 : 1; 00663 int prepend_length = (flag_byte & 0x3); 00664 00665 bool use_long_form = ((flag_byte & 0x7) == 0x7); 00666 00667 unsigned int pl, cc, itfm, w, h; 00668 int hoff, voff; 00669 unsigned int idx = 0; 00670 unsigned int idy = 0; 00671 int next_p; 00672 00673 if (use_long_form) { 00674 pl = fetch_int(); 00675 cc = fetch_int(); 00676 next_p = _p + pl; 00677 itfm = fetch_int(); 00678 idx = fetch_int(); 00679 idy = fetch_int(); 00680 w = fetch_int(); 00681 h = fetch_int(); 00682 hoff = fetch_signed_int(); 00683 voff = fetch_signed_int(); 00684 } else { 00685 pl = fetch_int(bsize) | (prepend_length << bsize*8); 00686 cc = fetch_byte(); 00687 next_p = _p + pl; 00688 itfm = fetch_int(3); 00689 idx = fetch_int(bsize) << 16; 00690 w = fetch_int(bsize); 00691 h = fetch_int(bsize); 00692 hoff = fetch_signed_int(bsize); 00693 voff = fetch_signed_int(bsize); 00694 } 00695 00696 // double tfm = (double)itfm / (double)(1 << 24); 00697 double dx = (double)idx / (double)(1 << 16); 00698 double dy = (double)idy / (double)(1 << 16); 00699 // double di_width = tfm * _ppu * _hppp / _vppp; 00700 00701 if (_extract_all || 00702 ((cc >= 33 && cc <= 127) && 00703 (_extract_only.empty() || _extract_only.find((char)cc) != string::npos))) { 00704 nout << " " << cc; 00705 00706 CharBitmap *bm = new CharBitmap(cc, w, h, hoff, voff, dx, dy); 00707 00708 if (_dyn_f == 14) { 00709 // A bitmapped character: this character has the actual w x h 00710 // bits stored directly in the pk file. This kind of character 00711 // is quite rare, but it's come up at least once, so the code 00712 // has been seen to work. 00713 unsigned int bit = 0; 00714 unsigned int byte = 0; 00715 for (unsigned int y = 0; y < h; y++) { 00716 for (unsigned int x = 0; x < w; x++) { 00717 if (bit == 0) { 00718 bit = 0x80; 00719 byte = fetch_byte(); 00720 } 00721 bm->_block[y][x] = ((byte & bit)!=0); 00722 bit >>= 1; 00723 } 00724 } 00725 00726 } else { 00727 // A normal, rle character. This character has sequences of 00728 // black and white runs stored in the pk file. Most characters 00729 // will be stored this way. 00730 bool black = first_black; 00731 _repeat_count = 0; 00732 00733 int count = fetch_packed_int(); 00734 while (bm->paint(black, count, _repeat_count)) { 00735 /* 00736 if (black) { 00737 nout << count; 00738 } else { 00739 nout << "(" << count << ")"; 00740 } 00741 */ 00742 black = !black; 00743 count = fetch_packed_int(); 00744 } 00745 // nout << "\n"; 00746 } 00747 00748 _chars.push_back(bm); 00749 00750 /* 00751 for (int y = 0; y < h; y++) { 00752 for (int x = 0; x < w; x++) { 00753 nout << (bm->_block[y][x] ? ' ' : '*'); 00754 } 00755 nout << "\n"; 00756 } 00757 */ 00758 00759 if (!_high) { 00760 _p++; 00761 _high = true; 00762 } 00763 00764 if (_p != next_p) { 00765 nout << "Expected p == " << next_p << " got " << _p << "\n"; 00766 } 00767 00768 } else { 00769 nout << " (" << cc << ")"; 00770 } 00771 00772 _p = next_p; 00773 return true; 00774 } 00775 00776 00777 //////////////////////////////////////////////////////////////////// 00778 // Function: PkFontFile::do_xxx 00779 // Access: Private 00780 // Description: The xxx1 .. xxx4 series of commands specify an 00781 // embedded comment or some such silliness in the pk 00782 // file that must be skipped. 00783 //////////////////////////////////////////////////////////////////// 00784 void PkFontFile:: 00785 do_xxx(int num_bytes) { 00786 _p += fetch_int(num_bytes); 00787 } 00788 00789 //////////////////////////////////////////////////////////////////// 00790 // Function: PkFontFile::do_yyy 00791 // Access: Private 00792 // Description: The yyy command is an encoded number which might have 00793 // meaning to a preceding xxx block, but means nothing 00794 // to us. 00795 //////////////////////////////////////////////////////////////////// 00796 void PkFontFile:: 00797 do_yyy() { 00798 _p += 4; 00799 } 00800 00801 //////////////////////////////////////////////////////////////////// 00802 // Function: PkFontFile::do_post 00803 // Access: Private 00804 // Description: The beginning of the postamble. 00805 //////////////////////////////////////////////////////////////////// 00806 void PkFontFile:: 00807 do_post() { 00808 _post = true; 00809 } 00810 00811 //////////////////////////////////////////////////////////////////// 00812 // Function: PkFontFile::do_pre 00813 // Access: Private 00814 // Description: The preamble. 00815 //////////////////////////////////////////////////////////////////// 00816 void PkFontFile:: 00817 do_pre() { 00818 int id = fetch_byte(); 00819 if (id != 89) { 00820 nout << "Warning: PK file had an unexpected ID, " << id << "\n"; 00821 } 00822 00823 int comment_len = fetch_byte(); 00824 00825 assert(_p + comment_len <= (int)_pk.size()); 00826 nout.write((const char *)&_pk[_p], comment_len); 00827 nout << "\n"; 00828 _p += comment_len; 00829 00830 int ds = fetch_int(); 00831 fetch_int(); // cs 00832 int hppp = fetch_int(); 00833 int vppp = fetch_int(); 00834 00835 _ds = (double)ds / (double)(1 << 20); 00836 _hppp = (double)hppp / (double)(1 << 16); 00837 _vppp = (double)vppp / (double)(1 << 16); 00838 00839 nout << "Font size is " << get_ds() << " points, rasterized at " 00840 << get_dpi() << " DPI.\n"; 00841 } 00842 00843 00844 //////////////////////////////////////////////////////////////////// 00845 // Function: PkFontFile::read_pk 00846 // Access: Private 00847 // Description: 00848 //////////////////////////////////////////////////////////////////// 00849 bool PkFontFile:: 00850 read_pk() { 00851 if (_p >= (int)_pk.size()) { 00852 nout << "PK file is empty.\n"; 00853 return false; 00854 } 00855 unsigned int cmd = fetch_byte(); 00856 if (cmd != PK_PRE) { 00857 nout << "Not a PK file.\n"; 00858 return false; 00859 } 00860 do_pre(); 00861 00862 nout << "Characters:"; 00863 00864 while (_p < (int)_pk.size()) { 00865 unsigned int cmd = fetch_byte(); 00866 if (_post && !_post_warning && cmd != PK_NO_OP) { 00867 _post_warning = true; 00868 nout << "\nWarning: postamble was not the last command.\n"; 00869 } 00870 if (cmd < 240) { 00871 if (!do_character(cmd)) { 00872 return true; 00873 } 00874 } else { 00875 switch (cmd) { 00876 case PK_XXX1: 00877 do_xxx(1); 00878 break; 00879 00880 case PK_XXX2: 00881 do_xxx(2); 00882 break; 00883 00884 case PK_XXX3: 00885 do_xxx(3); 00886 break; 00887 00888 case PK_XXX4: 00889 do_xxx(4); 00890 break; 00891 00892 case PK_YYY: 00893 do_yyy(); 00894 break; 00895 00896 case PK_POST: 00897 do_post(); 00898 break; 00899 00900 case PK_NO_OP: 00901 break; 00902 00903 default: 00904 nout << "\nUnexpected command " << cmd << " encountered in PK file\n"; 00905 return false; 00906 } 00907 } 00908 } 00909 nout << "\n"; 00910 00911 if (!_post) { 00912 nout << "Warning: did not encounter postamble.\n"; 00913 } 00914 00915 return true; 00916 }