1 /// Compression and decompression streaming algorithms. 2 /// 3 /// Each compression or decompression algorithm is represented by a struct 4 /// that contains parameters for compression/decompression. 5 /// Besides the parameters they carry, algorithms have no state. Each 6 /// algorithm instance can be used for an unlimited number of parallel jobs. 7 /// 8 /// The algorithms create a stream, which carry the state and allocated 9 /// resources of the ongoing compression. 10 /// 11 /// The compression/decompression jobs are run by the `squiz` function, 12 /// or one of the related helpers built upon it (e.g. deflate, deflateGz, inflate, ...). 13 /// 14 /// `squiz` and related functions take and InputRange of ubyte[] and return an InputRange of ubyte[]. 15 /// This allows streaming in the most natural way for a D program and provide 16 /// the greatest versatility. 17 /// It is possible to read the data from any source (file, network, memory), 18 /// process the data, and write to any kind of destination. 19 /// This also allows to process gigabytes of data with little memory usage. 20 /// 21 /// Compression often wraps the compressed data with header and trailer 22 /// that give the decompression algorithm useful information, especially 23 /// to check the integrity of the data after decompression. 24 /// This is called the format. 25 /// Some compressions algorithms offer different formats, and sometimes 26 /// the possibility to not wrap the data at all (raw format), in which 27 /// case integrity check is not performed. This is usually used when 28 /// an external integrity check is done, for example when archiving 29 /// compressed stream in Zip or 7z archives. 30 module squiz_box.squiz; 31 32 import squiz_box.c.zlib; 33 import squiz_box.priv; 34 35 import std.datetime.systime; 36 import std.exception; 37 import std.range; 38 import std.typecons; 39 40 version (HaveSquizBzip2) 41 { 42 import squiz_box.c.bzip2; 43 } 44 version (HaveSquizLzma) 45 { 46 import squiz_box.c.lzma; 47 } 48 version (HaveSquizZstandard) 49 { 50 import squiz_box.c.zstd; 51 } 52 53 /// default chunk size for data exchanges and I/O operations 54 enum defaultChunkSize = 8192; 55 56 /// definition of a byte chunk, which is the unit of data 57 /// exchanged during I/O and data transformation operations 58 alias ByteChunk = const(ubyte)[]; 59 60 /// A dynamic type of input range of chunks of bytes 61 alias ByteRange = InputRange!ByteChunk; 62 63 /// Static check that a type is a byte range. 64 template isByteRange(BR) 65 { 66 import std.traits : isArray, Unqual; 67 import std.range : ElementType, isInputRange; 68 69 alias Arr = ElementType!BR; 70 alias El = ElementType!Arr; 71 72 enum isByteRange = isInputRange!BR && is(Unqual!El == ubyte); 73 } 74 75 static assert(isByteRange!ByteRange); 76 77 /// Exception thrown when inconsistent data is given to 78 /// a decompression algorithm. 79 /// I.e. the data was not compressed with the corresponding algorithm 80 /// or the wrapping format is not the one expected. 81 @safe class DataException : Exception 82 { 83 mixin basicExceptionCtors!(); 84 } 85 86 /// Check whether a type is a proper squiz algorithm. 87 template isSquizAlgo(A) 88 { 89 enum isSquizAlgo = is(typeof((A algo) { 90 auto stream = algo.initialize(); 91 Flag!"streamEnded" ended = algo.process(stream, Yes.lastChunk); 92 algo.reset(stream); 93 algo.end(stream); 94 static assert(is(typeof(stream) : SquizStream)); 95 })); 96 } 97 98 /// Get the type of a SquizStream for the Squiz algorithm 99 template StreamType(A) if (isSquizAlgo!A) 100 { 101 import std.traits : ReturnType; 102 103 alias StreamType = ReturnType!(A.initialize); 104 } 105 106 /// A squiz algorithm whom type is erased behind an interface. 107 /// This helps to choose algorithm at run time. 108 interface SquizAlgo 109 { 110 /// Initialize a new stream for processing data 111 /// with this algorithm. 112 SquizStream initialize() @safe; 113 114 /// Processes the input stream data to produce output stream data. 115 /// lastChunk indicates that the input chunk in stream is the last one. 116 /// This is an indication to the algorithm that it can start to finish 117 /// the work. 118 /// Returned value indicates that there won't be more output generated 119 /// than the one in stream.output 120 Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe; 121 122 /// Reset the state of this stream, yet reusing the same 123 /// allocating resources, in order to start processing 124 /// another data stream. 125 void reset(SquizStream stream) @safe; 126 127 /// Release the resources used by this stream. 128 /// Most of the memory (if not all) used by algorithm 129 /// is allocating with the garbage collector, so not 130 /// calling this function has little consequence (if not none). 131 void end(SquizStream stream) @safe; 132 } 133 134 static assert(isSquizAlgo!SquizAlgo); 135 136 /// Get a runtime type for the provided algorithm 137 SquizAlgo squizAlgo(A)(A algo) @safe if (isSquizAlgo!A) 138 { 139 return new CSquizAlgo!A(algo); 140 } 141 142 /// 143 @("squizAlgo") 144 unittest 145 { 146 import test.util; 147 import std.array : join; 148 149 auto ctAlgo = Deflate.init; 150 auto rtAlgo = squizAlgo(Deflate.init); 151 152 const len = 10_000; 153 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 154 const input = generateRepetitiveData(len, phrase).join(); 155 156 const ctSquized = only(input).squiz(ctAlgo).join(); 157 const rtSquized = only(input).squiz(rtAlgo).join(); 158 159 assert(ctSquized == rtSquized); 160 } 161 162 private class CSquizAlgo(A) : SquizAlgo 163 { 164 alias Stream = StreamType!A; 165 166 A algo; 167 168 private this(A algo) @safe 169 { 170 this.algo = algo; 171 } 172 173 private Stream checkStream(SquizStream stream) 174 { 175 auto s = cast(Stream) stream; 176 assert(s, "provided stream is not produced by this algorithm"); 177 return s; 178 } 179 180 SquizStream initialize() @safe 181 { 182 return algo.initialize(); 183 } 184 185 Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe 186 { 187 return algo.process(checkStream(stream), lastChunk); 188 } 189 190 void reset(SquizStream stream) @safe 191 { 192 return algo.reset(checkStream(stream)); 193 } 194 195 void end(SquizStream stream) @safe 196 { 197 return algo.end(checkStream(stream)); 198 } 199 } 200 201 /// A state carrying, processing stream for squiz algorithms. 202 /// The stream does not carry any buffer, only slices to external buffer. 203 /// One may normally not use this directly as everything is handled 204 /// by the `squiz` function. 205 interface SquizStream 206 { 207 /// Input data for the algorithm 208 /// The slice is reduced by its begining as the processing moves on. 209 /// Must be refilled when empty before calling the algorithm `process` method. 210 @property const(ubyte)[] input() const @safe; 211 /// Ditto 212 @property void input(const(ubyte)[] inp) @safe; 213 214 /// How many bytes read since the start of the stream processing. 215 @property size_t totalInput() const @safe; 216 217 /// Output buffer for the algorithm to write to. 218 /// This is NOT the data ready after process, but where the 219 /// algorithm must write next. 220 /// after a call to process, the slice is reduced by its beginning, 221 /// and the data written is therefore the one before the slice. 222 @property inout(ubyte)[] output() inout @safe; 223 @property void output(ubyte[] outp) @safe; 224 225 /// How many bytes written since the start of the stream processing. 226 @property size_t totalOutput() const @safe; 227 } 228 229 private template isZlibLikeStream(S) 230 { 231 enum isZlibLikeStream = is(typeof((S stream) { 232 stream.next_in = cast(const(ubyte)*) null; 233 stream.avail_in = 0; 234 stream.next_out = cast(ubyte*) null; 235 stream.avail_out = 0; 236 })); 237 } 238 239 private mixin template ZlibLikeStreamImpl(S) if (isZlibLikeStream!S) 240 { 241 private S strm; 242 243 @property const(ubyte)[] input() const @trusted 244 { 245 return strm.next_in[0 .. strm.avail_in]; 246 } 247 248 @property void input(const(ubyte)[] inp) @trusted 249 { 250 strm.next_in = inp.ptr; 251 strm.avail_in = cast(typeof(strm.avail_in)) inp.length; 252 } 253 254 @property inout(ubyte)[] output() inout @trusted 255 { 256 return strm.next_out[0 .. strm.avail_out]; 257 } 258 259 @property void output(ubyte[] outp) @trusted 260 { 261 strm.next_out = outp.ptr; 262 strm.avail_out = cast(typeof(strm.avail_out)) outp.length; 263 } 264 } 265 266 mixin template ZlibLikeTotalInOutImpl() 267 { 268 @property size_t totalInput() const 269 { 270 return cast(size_t) strm.total_in; 271 } 272 273 @property size_t totalOutput() const 274 { 275 return cast(size_t) strm.total_out; 276 } 277 } 278 279 /// Returns an InputRange containing the input data processed through the supplied algorithm. 280 auto squiz(I, A)(I input, A algo, size_t chunkSize = defaultChunkSize) 281 if (isByteRange!I && isSquizAlgo!A) 282 { 283 return squiz(input, algo, new ubyte[chunkSize]); 284 } 285 286 /// ditto 287 auto squiz(I, A)(I input, A algo, ubyte[] chunkBuffer) 288 if (isByteRange!I && isSquizAlgo!A) 289 { 290 auto stream = algo.initialize(); 291 return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, ulong.max); 292 } 293 294 /// Returns an InputRange containing the input data processed through the supplied algorithm. 295 /// To the difference of `squiz`, `squizReuse` will not manage the state (aka stream) of the algorithm, 296 /// which allows to reuse it (and its allocated resources) for several jobs. 297 /// squizReuse will drive the algorithm and move the stream forward until processing is over. 298 /// The stream must be either freshly initialized or freshly reset before being passed 299 /// to this function. 300 auto squizReuse(I, A, S)(I input, A algo, S stream, ubyte[] chunkBuffer) 301 if (isByteRange!I && isSquizAlgo!A) 302 { 303 static assert(is(StreamType!A == S), S.strinof ~ " is not the stream produced by " ~ A.stringof); 304 return Squiz!(I, A, No.endStream)(input, algo, stream, chunkBuffer, ulong.max); 305 } 306 307 /// Same as squiz, but will stop encoding/decoding after len bytes has been written out 308 /// Useful to decode some raw encoded streams where the uncompressed size is known 309 /// and the algorithm not always report Yes.streamEnded. 310 auto squizMaxOut(I, A)(I input, A algo, ulong maxOut, size_t chunkSize = defaultChunkSize) 311 { 312 import std.algorithm : min; 313 314 const sz = cast(size_t) min(maxOut, chunkSize); 315 auto chunkBuffer = new ubyte[sz]; 316 auto stream = algo.initialize(); 317 return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, maxOut); 318 } 319 320 // Common transformation range for all compression/decompression functions. 321 // I is a byte input range 322 // A is a squiz algorithm 323 // if Yes.end, the stream is ended when data is done processing 324 private struct Squiz(I, A, Flag!"endStream" endStream) 325 { 326 private alias Stream = StreamType!A; 327 328 // Byte input range (by chunks) 329 private I input; 330 331 // The algorithm 332 private A algo; 333 334 // Processed stream stream 335 private Stream stream; 336 337 // Buffer used to store the front chunk 338 private ubyte[] chunkBuffer; 339 // Slice of the buffer that is valid for read out 340 private ByteChunk chunk; 341 342 // maximum number of bytes to write out 343 private ulong maxLen; 344 345 /// Whether the end of stream was reported by the Policy 346 private bool ended; 347 348 private this(I input, A algo, Stream stream, ubyte[] chunkBuffer, ulong maxLen) 349 { 350 this.input = input; 351 this.algo = algo; 352 this.stream = stream; 353 this.chunkBuffer = chunkBuffer; 354 this.maxLen = maxLen; 355 prime(); 356 } 357 358 @property bool empty() 359 { 360 return chunk.length == 0; 361 } 362 363 @property ByteChunk front() 364 { 365 return chunk; 366 } 367 368 void popFront() 369 { 370 chunk = null; 371 if (!ended) 372 prime(); 373 } 374 375 private void prime() 376 { 377 import std.algorithm : min; 378 379 while (chunk.length < chunkBuffer.length) 380 { 381 if (stream.input.length == 0 && !input.empty) 382 stream.input = input.front; 383 384 const len = min(chunkBuffer.length - chunk.length, maxLen); 385 stream.output = chunkBuffer[chunk.length .. chunk.length + len]; 386 387 const streamEnded = algo.process(stream, cast(Flag!"lastChunk") input.empty); 388 389 chunk = chunkBuffer[0 .. $ - stream.output.length]; 390 maxLen -= len; 391 392 // popFront must be called at the end because it invalidates inChunk 393 if (stream.input.length == 0 && !input.empty) 394 input.popFront(); 395 396 if (streamEnded || maxLen == 0) 397 { 398 ended = true; 399 static if (endStream) 400 algo.end(stream); 401 break; 402 } 403 } 404 } 405 } 406 407 version (HaveSquizLzma) 408 { 409 @("squizMaxOut") 410 unittest 411 { 412 // encoded header of test/data/archive.7z 413 const(ubyte)[] dataIn = [ 414 0x00, 0x00, 0x81, 0x33, 0x07, 0xae, 0x0f, 0xd1, 0xf2, 0xfb, 0xfd, 0x40, 415 0xc0, 0x90, 0xd2, 0xff, 0x7d, 0x69, 0x4d, 0x90, 0xd3, 0x2c, 0x42, 0x66, 416 0xb0, 0xc6, 0xcc, 0xeb, 0xcf, 0x59, 0xcc, 0x96, 0x23, 0xf9, 0x91, 0xc8, 417 0x75, 0x49, 0xe9, 0x9d, 0x1a, 0xa8, 0xa5, 0x9d, 0xf7, 0x75, 0x29, 0x1a, 418 0x90, 0x78, 0x18, 0x8e, 0x42, 0x1a, 0x97, 0x0c, 0x40, 0xb7, 0xaa, 0xb6, 419 0x16, 0xa9, 0x91, 0x0c, 0x58, 0xad, 0x75, 0xf7, 0x8f, 0xaf, 0x8f, 0x45, 420 0xdb, 0x78, 0xd0, 0x8e, 0xc6, 0x1b, 0x72, 0xa5, 0xf4, 0xd2, 0x46, 0xf7, 421 0xe1, 0xce, 0x01, 0x80, 0x7f, 0x3d, 0x66, 0xa5, 0x2d, 0x64, 0xd7, 0xb0, 422 0x41, 0xdc, 0x92, 0x59, 0x88, 0xb0, 0x4c, 0x67, 0x34, 0xb6, 0x4e, 0xd3, 423 0xd5, 0x01, 0x8d, 0x43, 0x13, 0x9c, 0x82, 0x78, 0x4d, 0xcf, 0x8c, 0x51, 424 0x25, 0x0f, 0xd5, 0x1d, 0x80, 0x4b, 0x80, 0xea, 0x18, 0xc1, 0x29, 0x49, 425 0xe4, 0x4d, 0x4d, 0x8b, 0xb9, 0xa1, 0xfc, 0x17, 0x2b, 0xb3, 0xe6, 0x00, 426 0x00, 0x00 427 ]; 428 // decoded header data of test/data/archive.7z 429 const(ubyte)[] expectedDataOut = [ 430 0x01, 0x04, 0x06, 0x00, 0x01, 0x09, 0x40, 0x00, 0x07, 0x0b, 0x01, 0x00, 431 0x01, 0x21, 0x21, 0x01, 0x00, 0x0c, 0x8d, 0xe2, 0x00, 0x08, 0x0d, 0x03, 432 0x09, 0x8d, 0xc1, 0x07, 0x0a, 0x01, 0x84, 0x4d, 0x4d, 0xa8, 0x9e, 0xf4, 433 0xb3, 0xdb, 0x12, 0xed, 0x64, 0x40, 0x00, 0x00, 0x05, 0x03, 0x19, 0x0d, 434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 435 0x00, 0x11, 0x55, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 436 0x20, 0x00, 0x32, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 437 0x00, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x31, 0x00, 438 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 439 0x6f, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x65, 0x00, 0x72, 0x00, 0x2f, 0x00, 440 0x63, 0x00, 0x68, 0x00, 0x6d, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x20, 0x00, 441 0x36, 0x00, 0x36, 0x00, 0x36, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 442 0x74, 0x00, 0x00, 0x00, 0x14, 0x1a, 0x01, 0x00, 0x80, 0x96, 0x9f, 0xd5, 443 0xc8, 0x53, 0xd8, 0x01, 0x80, 0x50, 0x82, 0x4f, 0xc6, 0x53, 0xd8, 0x01, 444 0x00, 0xff, 0x13, 0x13, 0xb7, 0x52, 0xd8, 0x01, 0x15, 0x0e, 0x01, 0x00, 445 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xb6, 0x81, 446 0x00, 0x00 447 ]; 448 449 auto algo = DecompressLzma(LzmaFormat.rawLegacy); 450 451 const dataOut = only(dataIn) 452 .squizMaxOut(algo, expectedDataOut.length) 453 .join(); 454 455 assert(dataOut == expectedDataOut); 456 } 457 } 458 459 /// Copy algorithm do not transform data at all 460 /// This is useful in cases of reading/writing data 461 /// that may or may not be compressed. Using Copy 462 /// allows that the same code handles both kind of streams. 463 final class CopyStream : SquizStream 464 { 465 private const(ubyte)[] _inp; 466 size_t _totalIn; 467 private ubyte[] _outp; 468 size_t _totalOut; 469 470 @property const(ubyte)[] input() const @safe 471 { 472 return _inp; 473 } 474 475 @property void input(const(ubyte)[] inp) @safe 476 { 477 _inp = inp; 478 } 479 480 @property size_t totalInput() const @safe 481 { 482 return _totalIn; 483 } 484 485 @property inout(ubyte)[] output() inout @safe 486 { 487 return _outp; 488 } 489 490 @property void output(ubyte[] outp) @safe 491 { 492 _outp = outp; 493 } 494 495 @property size_t totalOutput() const @safe 496 { 497 return _totalOut; 498 } 499 } 500 501 /// ditto 502 struct Copy 503 { 504 static assert(isSquizAlgo!Copy); 505 506 CopyStream initialize() @safe 507 { 508 return new CopyStream; 509 } 510 511 Flag!"streamEnded" process(CopyStream stream, Flag!"lastChunk" lastChunk) @safe 512 { 513 import std.algorithm : min; 514 515 const len = min(stream._inp.length, stream._outp.length); 516 517 stream._outp[0 .. len] = stream._inp[0 .. len]; 518 519 stream._inp = stream._inp[len .. $]; 520 stream._outp = stream._outp[len .. $]; 521 stream._totalIn += len; 522 stream._totalOut += len; 523 524 return cast(Flag!"streamEnded")(lastChunk && stream._inp.length == 0); 525 } 526 527 void reset(CopyStream stream) @safe 528 { 529 stream._inp = null; 530 stream._outp = null; 531 stream._totalIn = 0; 532 stream._totalOut = 0; 533 } 534 535 void end(CopyStream) @safe 536 { 537 } 538 } 539 540 /// ditto 541 auto copy(I)(I input, size_t chunkSize = defaultChunkSize) 542 { 543 return squiz(input, Copy.init, chunkSize); 544 } 545 546 /// 547 @("Copy") 548 unittest 549 { 550 import test.util : generateRepetitiveData; 551 import std.array : join; 552 553 const len = 10_000; 554 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 555 const input = generateRepetitiveData(len, phrase).join(); 556 557 /// copying with arbitrary chunk sizes on input and output 558 const cop1 = generateRepetitiveData(len, phrase, 1231).copy(234).join(); 559 const cop2 = generateRepetitiveData(len, phrase, 296).copy(6712).join(); 560 561 assert(input == cop1); 562 assert(input == cop2); 563 } 564 565 /// Describe what type of header and trailer are wrapping 566 /// a deflated stream. 567 enum ZlibFormat 568 { 569 /// Zlib header and trailer 570 zlib, 571 /// Gzip header and trailer 572 gz, 573 /// Auto detection of Zlib or Gzip format (only used with Inflate) 574 autoDetect, 575 /// No header and trailer, therefore no integrity check included. 576 /// This to be used in other formats such as Zip. 577 /// When using raw, it is advised to use an external integrity check. 578 raw, 579 } 580 581 private size_t strnlen(const(byte)* str, size_t maxlen) @system 582 { 583 if (!str) 584 return 0; 585 586 size_t l; 587 while (*str != 0 && l < maxlen) 588 { 589 str++; 590 l++; 591 } 592 return l; 593 } 594 595 @("strnlen") 596 unittest 597 { 598 assert(strnlen(null, 0) == 0); 599 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 15) == 10); 600 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 10) == 10); 601 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 9) == 9); 602 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 0) == 0); 603 assert(strnlen(cast(const(byte)*)("\0bcdefghij\0klmn".ptr), 15) == 0); 604 } 605 606 /// Header data for the Gzip format. 607 /// Gzip includes metadata about the file which is compressed. 608 /// These can be specified here when compressing from a stream 609 /// rather than directly from a file. 610 struct GzHeader 611 { 612 import core.stdc.config : c_ulong; 613 614 /// operating system encoded in the Gz header 615 /// Not all possible values are listed here, only 616 /// the most useful ones 617 enum Os 618 { 619 fatFs = 0, 620 unix = 3, 621 macintosh = 7, 622 ntFs = 11, 623 unknown = 255, 624 } 625 626 version (OSX) 627 enum defaultOs = Os.macintosh; 628 else version (iOS) 629 enum defaultOs = Os.macintosh; 630 else version (Posix) 631 enum defaultOs = Os.unix; 632 else version (Windows) 633 enum defaultOs = Os.ntFs; 634 635 /// Whether the content is believed to be text 636 Flag!"text" text; 637 638 // storing in unix format to avoid 639 // negative numbers with SysTime.init 640 private c_ulong _mtime; 641 642 /// Modification time 643 @property SysTime mtime() const @safe 644 { 645 return SysTime(unixTimeToStdTime(_mtime)); 646 } 647 648 /// ditto 649 @property void mtime(SysTime time) @safe 650 { 651 _mtime = stdTimeToUnixTime(time.stdTime); 652 } 653 654 /// Operating system that wrote the gz file 655 Os os = defaultOs; 656 657 /// Filename to be included in the header 658 string filename; 659 660 /// Comment to be included in the header 661 string comment; 662 663 private enum bufSize = 256; 664 665 private string fromLatin1z(const(byte)* ptr) @system 666 { 667 // ptr points to a buffer of bufSize characters. 668 // End of string is a null character or end of buffer. 669 // Encoding is latin 1. 670 import std.encoding : Latin1Char, transcode; 671 672 const len = strnlen(ptr, bufSize); 673 auto str = cast(const(Latin1Char)[]) ptr[0 .. len]; 674 675 string res; 676 transcode(str, res); 677 return res; 678 } 679 680 private byte* toLatin1z(string str) @trusted 681 { 682 import std.encoding : Latin1Char, transcode; 683 684 Latin1Char[] l1; 685 transcode(str, l1); 686 auto res = (cast(byte[]) l1) ~ 0; 687 return res.ptr; 688 } 689 690 private this(gz_headerp gzh) @system 691 { 692 text = gzh.text ? Yes.text : No.text; 693 _mtime = gzh.time; 694 os = cast(Os) gzh.os; 695 if (gzh.name) 696 filename = fromLatin1z(gzh.name); 697 if (gzh.comment) 698 comment = fromLatin1z(gzh.comment); 699 } 700 701 private gz_headerp toZlib() @safe 702 { 703 import core.stdc.config : c_long; 704 705 auto gzh = new gz_header; 706 gzh.text = text ? 1 : 0; 707 gzh.time = _mtime; 708 gzh.os = cast(int) os; 709 if (filename) 710 gzh.name = toLatin1z(filename); 711 if (comment) 712 gzh.comment = toLatin1z(comment); 713 return gzh; 714 } 715 } 716 717 /// Type of delegate to use as callback for Inflate.gzHeaderDg 718 alias GzHeaderDg = void delegate(GzHeader header); 719 720 /// Helper to set GzHeader.text 721 /// Will check if the data are all ascii characters 722 Flag!"text" isText(const(ubyte)[] data) 723 { 724 import std.algorithm : all; 725 726 return cast(Flag!"text") data.all!( 727 c => c == 0x0a || c == 0x0d || (c >= 0x20 && c <= 0x7e) 728 ); 729 } 730 731 class ZlibStream : SquizStream 732 { 733 mixin ZlibLikeStreamImpl!z_stream; 734 mixin ZlibLikeTotalInOutImpl!(); 735 736 private this() @safe 737 { 738 strm.zalloc = &(gcAlloc!uint); 739 strm.zfree = &gcFree; 740 } 741 } 742 743 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 744 /// The produced stream of data is wrapped by Zlib header and trailer. 745 auto deflate(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 746 { 747 return squiz(input, Deflate.init, chunkSize); 748 } 749 750 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 751 /// The produced stream of data is wrapped by Gzip header and trailer. 752 /// Suppliying a header is entirely optional. Zlib produces a default header if not supplied. 753 /// The default header has text false, mtime zero, unknown os, and 754 /// no name or comment. 755 auto deflateGz(I)(I input, GzHeader header, size_t chunkSize = defaultChunkSize) 756 if (isByteRange!I) 757 { 758 auto algo = Deflate.init; 759 algo.format = ZlibFormat.gz; 760 algo.gzHeader = header; 761 return squiz(input, algo, chunkSize); 762 } 763 764 /// ditto 765 auto deflateGz(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 766 { 767 auto algo = Deflate.init; 768 algo.format = ZlibFormat.gz; 769 return squiz(input, algo, chunkSize); 770 } 771 772 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 773 /// The produced stream of data isn't wrapped by any header or trailer. 774 auto deflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 775 { 776 auto algo = Deflate.init; 777 algo.format = ZlibFormat.raw; 778 return squiz(input, algo, chunkSize); 779 } 780 781 /// Zlib's deflate algorithm 782 struct Deflate 783 { 784 static assert(isSquizAlgo!Deflate); 785 static assert(is(StreamType!Deflate == Stream)); 786 787 /// Which format to use for the deflated stream. 788 /// In case ZlibFormat.gz, the gzHeader field will be used if supplied, 789 /// other wise default values will be used. 790 ZlibFormat format; 791 792 /// Compression level from 1 (fastest) to 9 (best compression). 793 int level = 6; 794 795 /// The GzHeader to be used with ZlibFormat.gz. 796 Nullable!GzHeader gzHeader; 797 798 /// Advanced parameters 799 /// See zlib's documentation of `deflateInit2`. 800 /// windowBits must be between 9 and 15 included 801 /// and is adjusted according chosen format. 802 int windowBits = 15; 803 /// ditto 804 int memLevel = 8; 805 /// ditto 806 int strategy = Z_DEFAULT_STRATEGY; 807 808 static final class Stream : ZlibStream 809 { 810 } 811 812 Stream initialize() @safe 813 { 814 assert( 815 9 <= windowBits && windowBits <= 15, 816 "inconsistent windowBits" 817 ); 818 int wb = windowBits; 819 final switch (format) 820 { 821 case ZlibFormat.zlib: 822 break; 823 case ZlibFormat.gz: 824 wb += 16; 825 break; 826 case ZlibFormat.autoDetect: 827 throw new Exception("invalid ZlibFormat for Deflate"); 828 case ZlibFormat.raw: 829 wb = -wb; 830 break; 831 } 832 833 auto stream = new Stream(); 834 835 const res = (() @trusted => deflateInit2( 836 &stream.strm, level, Z_DEFLATED, 837 wb, memLevel, cast(int) strategy, 838 ))(); 839 840 enforce( 841 res == Z_OK, 842 "Could not initialize Zlib deflate stream: " ~ zResultToString(res) 843 ); 844 845 if (format == ZlibFormat.gz && !gzHeader.isNull) 846 { 847 auto head = gzHeader.get.toZlib(); 848 (() @trusted => deflateSetHeader(&stream.strm, head))(); 849 } 850 851 return stream; 852 } 853 854 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 855 { 856 const flush = lastChunk ? Z_FINISH : Z_NO_FLUSH; 857 const res = (() @trusted => squiz_box.c.zlib.deflate(&stream.strm, flush))(); 858 859 enforce( 860 res == Z_OK || res == Z_STREAM_END, 861 "Zlib deflate failed with code: " ~ zResultToString(res) 862 ); 863 864 return cast(Flag!"streamEnded")(res == Z_STREAM_END); 865 } 866 867 void reset(Stream stream) @trusted 868 { 869 deflateReset(&stream.strm); 870 } 871 872 void end(Stream stream) @trusted 873 { 874 deflateEnd(&stream.strm); 875 } 876 } 877 878 /// Returns an InputRange streaming over data inflated with Zlib. 879 /// The input data must be deflated with a zlib format. 880 auto inflate(I)(I input, size_t chunkSize = defaultChunkSize) 881 { 882 return squiz(input, Inflate.init, chunkSize); 883 } 884 885 /// Returns an InputRange streaming over data inflated with Zlib. 886 /// The input data must be deflated with a gz format. 887 /// If headerDg is not null, it will be called 888 /// as soon as the header is read from the stream. 889 auto inflateGz(I)(I input, GzHeaderDg headerDg, size_t chunkSize = defaultChunkSize) 890 { 891 auto algo = Inflate.init; 892 algo.format = ZlibFormat.gz; 893 algo.gzHeaderDg = headerDg; 894 return squiz(input, algo, chunkSize); 895 } 896 897 /// ditto 898 auto inflateGz(I)(I input, size_t chunkSize = defaultChunkSize) 899 { 900 return inflateGz(input, null, chunkSize); 901 } 902 903 /// Returns an InputRange streaming over data inflated with Zlib. 904 /// The input must be raw deflated data 905 auto inflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) 906 { 907 auto algo = Inflate.init; 908 algo.format = ZlibFormat.raw; 909 return squiz(input, algo, chunkSize); 910 } 911 912 /// Zlib's inflate algorithm 913 struct Inflate 914 { 915 static assert(isSquizAlgo!Inflate); 916 917 /// Which format to use for the deflated stream. 918 /// In case ZlibFormat.gz, the gzHeader field will be written if set. 919 ZlibFormat format; 920 921 /// If set, will be assigned to the Gz header once it is known 922 GzHeaderDg gzHeaderDg; 923 924 /// Advanced parameters 925 /// See zlib's documentation of `deflateInit2`. 926 /// windowBits can be 0 if format is ZlibFormat.zlib. 927 /// Otherwise it must be between 9 and 15 included. 928 int windowBits = 15; 929 930 private static final class Gzh 931 { 932 private gz_header gzh; 933 private byte[GzHeader.bufSize] nameBuf; 934 private byte[GzHeader.bufSize] commentBuf; 935 936 private GzHeaderDg dg; 937 private bool dgCalled; 938 939 this(GzHeaderDg dg) @safe 940 { 941 gzh.name = &nameBuf[0]; 942 gzh.name_max = cast(uint) nameBuf.length; 943 gzh.comment = &commentBuf[0]; 944 gzh.comm_max = cast(uint) commentBuf.length; 945 946 this.dg = dg; 947 } 948 } 949 950 static final class Stream : ZlibStream 951 { 952 Gzh gzh; 953 } 954 955 Stream initialize() @safe 956 { 957 assert( 958 (windowBits == 0 && format == ZlibFormat.zlib) || 959 (9 <= windowBits && windowBits <= 15), 960 "inconsistent windowBits" 961 ); 962 int wb = windowBits; 963 final switch (format) 964 { 965 case ZlibFormat.zlib: 966 break; 967 case ZlibFormat.gz: 968 wb += 16; 969 break; 970 case ZlibFormat.autoDetect: 971 wb += 32; 972 break; 973 case ZlibFormat.raw: 974 wb = -wb; 975 break; 976 } 977 978 auto stream = new Stream(); 979 980 const res = (() @trusted => inflateInit2(&stream.strm, wb))(); 981 982 enforce( 983 res == Z_OK, 984 "Could not initialize Zlib's inflate stream: " ~ zResultToString(res) 985 ); 986 987 if (gzHeaderDg) 988 { 989 stream.gzh = new Gzh(gzHeaderDg); 990 (() @trusted => inflateGetHeader(&stream.strm, &stream.gzh.gzh))(); 991 } 992 993 return stream; 994 } 995 996 package Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" /+ lastChunk +/ ) 997 { 998 const res = (() @trusted => squiz_box.c.zlib.inflate(&stream.strm, Z_NO_FLUSH))(); 999 // 1000 if (res == Z_DATA_ERROR) 1001 throw new DataException("Improper data given to deflate"); 1002 1003 enforce( 1004 res == Z_OK || res == Z_STREAM_END, 1005 "Zlib inflate failed with code: " ~ zResultToString(res) 1006 ); 1007 1008 auto gzh = stream.gzh; 1009 if (gzh && !gzh.dgCalled && gzh.gzh.done) 1010 { 1011 auto head = (() @trusted => GzHeader(&gzh.gzh))(); 1012 gzh.dg(head); 1013 gzh.dgCalled = true; 1014 } 1015 1016 return cast(Flag!"streamEnded")(res == Z_STREAM_END); 1017 } 1018 1019 package void reset(Stream stream) @trusted 1020 { 1021 inflateReset(&stream.strm); 1022 } 1023 1024 package void end(Stream stream) @trusted 1025 { 1026 inflateEnd(&stream.strm); 1027 } 1028 } 1029 1030 /// 1031 @("Deflate / Inflate") 1032 unittest 1033 { 1034 import test.util; 1035 import std.array : join; 1036 1037 auto def = Deflate.init; 1038 auto inf = Inflate.init; 1039 1040 const len = 100_000; 1041 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1042 const input = generateRepetitiveData(len, phrase).join(); 1043 1044 // deflating 1045 const squized = only(input).squiz(def).join(); 1046 1047 // re-inflating 1048 const output = only(squized).squiz(inf).join(); 1049 1050 assert(squized.length < input.length); 1051 assert(output == input); 1052 1053 // for such long and repetitive data, ratio is around 0.3% 1054 const ratio = cast(double) squized.length / cast(double) input.length; 1055 assert(ratio < 0.004); 1056 } 1057 1058 /// 1059 @("Deflate / Inflate in Gz format and custom header") 1060 unittest 1061 { 1062 import test.util; 1063 import std.array : join; 1064 1065 const len = 100_000; 1066 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1067 const input = generateRepetitiveData(len, phrase).join(); 1068 1069 GzHeader inHead; 1070 inHead.mtime = Clock.currTime; 1071 inHead.os = GzHeader.Os.fatFs; 1072 inHead.text = Yes.text; 1073 inHead.filename = "boring.txt"; 1074 inHead.comment = "A very boring file"; 1075 1076 // deflating 1077 const squized = only(input) 1078 .deflateGz(inHead) 1079 .join(); 1080 1081 // re-inflating 1082 GzHeader outHead; 1083 int numCalls; 1084 void setOutHead(GzHeader gzh) 1085 { 1086 outHead = gzh; 1087 numCalls++; 1088 } 1089 1090 const output = only(squized) 1091 .inflateGz(&setOutHead) 1092 .join(); 1093 1094 assert(squized.length < input.length); 1095 assert(output == input); 1096 assert(inHead == outHead); 1097 assert(numCalls == 1); 1098 } 1099 1100 /// 1101 @("Deflate / Inflate in raw format") 1102 unittest 1103 { 1104 import test.util; 1105 import std.array : join; 1106 1107 const len = 100_000; 1108 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1109 const input = generateRepetitiveData(len, phrase).join(); 1110 1111 // deflating 1112 const squized = only(input) 1113 .deflateRaw() 1114 .join(); 1115 1116 // re-inflating 1117 const output = only(squized) 1118 .inflateRaw() 1119 .join(); 1120 1121 assert(squized.length < input.length); 1122 assert(output == input); 1123 } 1124 1125 package string zResultToString(int res) @safe pure nothrow @nogc 1126 { 1127 switch (res) 1128 { 1129 case Z_OK: 1130 return "OK"; 1131 case Z_STREAM_END: 1132 return "STREAM_END"; 1133 case Z_NEED_DICT: 1134 return "NEED_DICT"; 1135 case Z_ERRNO: 1136 return "ERRNO"; 1137 case Z_STREAM_ERROR: 1138 return "STREAM_ERROR"; 1139 case Z_DATA_ERROR: 1140 return "DATA_ERROR"; 1141 case Z_MEM_ERROR: 1142 return "MEM_ERROR"; 1143 case Z_BUF_ERROR: 1144 return "BUF_ERROR"; 1145 case Z_VERSION_ERROR: 1146 return "VERSION_ERROR"; 1147 default: 1148 return "(Unknown result)"; 1149 } 1150 } 1151 1152 package string zFlushToString(int flush) @safe pure nothrow @nogc 1153 { 1154 switch (flush) 1155 { 1156 case Z_NO_FLUSH: 1157 return "NO_FLUSH"; 1158 case Z_PARTIAL_FLUSH: 1159 return "PARTIAL_FLUSH"; 1160 case Z_SYNC_FLUSH: 1161 return "SYNC_FLUSH"; 1162 case Z_FULL_FLUSH: 1163 return "FULL_FLUSH"; 1164 case Z_FINISH: 1165 return "FINISH"; 1166 case Z_BLOCK: 1167 return "BLOCK"; 1168 case Z_TREES: 1169 return "TREES"; 1170 default: 1171 return "(Unknown flush)"; 1172 } 1173 } 1174 1175 version (HaveSquizBzip2) 1176 { 1177 1178 /// Returns an InputRange containing the input data processed through Bzip2 compression. 1179 auto compressBzip2(I)(I input, size_t chunkSize = defaultChunkSize) 1180 if (isByteRange!I) 1181 { 1182 return squiz(input, CompressBzip2.init, chunkSize); 1183 } 1184 1185 final class Bz2Stream : SquizStream 1186 { 1187 mixin ZlibLikeStreamImpl!(bz_stream); 1188 1189 @property size_t totalInput() const @safe 1190 { 1191 ulong hi = strm.total_in_hi32; 1192 return cast(size_t)( 1193 (hi << 32) | strm.total_in_lo32 1194 ); 1195 } 1196 1197 @property size_t totalOutput() const @safe 1198 { 1199 ulong hi = strm.total_out_hi32; 1200 return cast(size_t)( 1201 (hi << 32) | strm.total_out_lo32 1202 ); 1203 } 1204 1205 this() @safe 1206 { 1207 strm.bzalloc = &(gcAlloc!int); 1208 strm.bzfree = &gcFree; 1209 } 1210 } 1211 1212 /// Compression with the Bzip2 algorithm. 1213 /// 1214 /// Although having better compression capabilities than Zlib (deflate), 1215 /// Bzip2 has poor latenty when it comes to streaming. 1216 /// I.e. it can swallow several Mb of data before starting to produce output. 1217 /// If streaming latenty is an important factor, deflate/inflate 1218 /// should be the favorite algorithm. 1219 /// 1220 /// This algorithm does not support resource reuse, so calling reset 1221 /// is equivalent to a call to end followed by initialize. 1222 /// (but the same instance of stream is kept). 1223 struct CompressBzip2 1224 { 1225 static assert(isSquizAlgo!CompressBzip2); 1226 1227 /// Advanced Bzip2 parameters 1228 /// See Bzip2 documentation 1229 /// https://www.sourceware.org/bzip2/manual/manual.html#bzcompress-init 1230 int blockSize100k = 9; 1231 /// ditto 1232 int verbosity = 0; 1233 /// ditto 1234 int workFactor = 30; 1235 1236 alias Stream = Bz2Stream; 1237 1238 Stream initialize() @safe 1239 { 1240 auto stream = new Stream; 1241 1242 const res = (() @trusted => BZ2_bzCompressInit( 1243 &stream.strm, blockSize100k, verbosity, workFactor, 1244 ))(); 1245 enforce( 1246 res == BZ_OK, 1247 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res) 1248 ); 1249 return stream; 1250 } 1251 1252 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1253 { 1254 const action = lastChunk ? BZ_FINISH : BZ_RUN; 1255 const res = (() @trusted => BZ2_bzCompress(&stream.strm, action))(); 1256 1257 if (res == BZ_STREAM_END) 1258 return Yes.streamEnded; 1259 1260 enforce( 1261 (action == BZ_RUN && res == BZ_RUN_OK) || 1262 (action == BZ_FINISH && res == BZ_FINISH_OK), 1263 "Bzip2 compress failed with code: " ~ bzResultToString(res) 1264 ); 1265 1266 return No.streamEnded; 1267 } 1268 1269 void reset(Stream stream) @safe 1270 { 1271 (() @trusted => BZ2_bzCompressEnd(&stream.strm))(); 1272 1273 stream.strm = bz_stream.init; 1274 stream.strm.bzalloc = &(gcAlloc!int); 1275 stream.strm.bzfree = &gcFree; 1276 1277 const res = (() @trusted => BZ2_bzCompressInit( 1278 &stream.strm, blockSize100k, verbosity, workFactor, 1279 ))(); 1280 enforce( 1281 res == BZ_OK, 1282 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res) 1283 ); 1284 } 1285 1286 void end(Stream stream) @trusted 1287 { 1288 BZ2_bzCompressEnd(&stream.strm); 1289 } 1290 } 1291 1292 /// Returns an InputRange streaming over data decompressed with Bzip2. 1293 auto decompressBzip2(I)(I input, size_t chunkSize = defaultChunkSize) 1294 if (isByteRange!I) 1295 { 1296 return squiz(input, DecompressBzip2.init, chunkSize); 1297 } 1298 1299 /// Decompression of data encoded with Bzip2. 1300 /// 1301 /// This algorithm does not support resource reuse, so calling reset 1302 /// is equivalent to a call to end followed by initialize. 1303 /// (but the same instance of stream is kept). 1304 struct DecompressBzip2 1305 { 1306 static assert(isSquizAlgo!DecompressBzip2); 1307 1308 /// Advanced Bzip2 parameters 1309 /// See Bzip2 documentation 1310 /// https://www.sourceware.org/bzip2/manual/manual.html#bzDecompress-init 1311 int verbosity; 1312 /// ditto 1313 bool small; 1314 1315 alias Stream = Bz2Stream; 1316 1317 Stream initialize() @safe 1318 { 1319 auto stream = new Stream; 1320 1321 const res = (() @trusted => BZ2_bzDecompressInit( 1322 &stream.strm, verbosity, small ? 1 : 0, 1323 ))(); 1324 enforce( 1325 res == BZ_OK, 1326 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res) 1327 ); 1328 return stream; 1329 } 1330 1331 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe 1332 { 1333 const res = (() @trusted => BZ2_bzDecompress(&stream.strm))(); 1334 1335 if (res == BZ_DATA_ERROR) 1336 throw new DataException("Input data was not compressed with Bzip2"); 1337 1338 enforce( 1339 res == BZ_OK || res == BZ_STREAM_END, 1340 "Bzip2 decompress failed with code: " ~ bzResultToString(res) 1341 ); 1342 1343 return cast(Flag!"streamEnded")(res == BZ_STREAM_END); 1344 } 1345 1346 void reset(Stream stream) @safe 1347 { 1348 (() @trusted => BZ2_bzDecompressEnd(&stream.strm))(); 1349 1350 stream.strm = bz_stream.init; 1351 stream.strm.bzalloc = &(gcAlloc!int); 1352 stream.strm.bzfree = &gcFree; 1353 1354 const res = (() @trusted => BZ2_bzDecompressInit( 1355 &stream.strm, verbosity, small ? 1 : 0, 1356 ))(); 1357 enforce( 1358 res == BZ_OK, 1359 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res) 1360 ); 1361 } 1362 1363 void end(Stream stream) @trusted 1364 { 1365 BZ2_bzDecompressEnd(&stream.strm); 1366 } 1367 } 1368 1369 /// 1370 @("Compress / Decompress Bzip2") 1371 unittest 1372 { 1373 import test.util; 1374 import std.array : join; 1375 1376 const len = 100_000; 1377 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1378 const input = generateRepetitiveData(len, phrase).join(); 1379 1380 const squized = only(input) 1381 .compressBzip2() 1382 .join(); 1383 1384 const output = only(squized) 1385 .decompressBzip2() 1386 .join(); 1387 1388 assert(squized.length < input.length); 1389 assert(output == input); 1390 1391 // for such long and repetitive data, ratio is around 0.12% 1392 const ratio = cast(double) squized.length / cast(double) input.length; 1393 assert(ratio < 0.002); 1394 } 1395 1396 private string bzActionToString(int action) @safe pure nothrow @nogc 1397 { 1398 switch (action) 1399 { 1400 case BZ_RUN: 1401 return "RUN"; 1402 case BZ_FLUSH: 1403 return "FLUSH"; 1404 case BZ_FINISH: 1405 return "FINISH"; 1406 default: 1407 return "(Unknown result)"; 1408 } 1409 } 1410 1411 private string bzResultToString(int res) @safe pure nothrow @nogc 1412 { 1413 switch (res) 1414 { 1415 case BZ_OK: 1416 return "OK"; 1417 case BZ_RUN_OK: 1418 return "RUN_OK"; 1419 case BZ_FLUSH_OK: 1420 return "FLUSH_OK"; 1421 case BZ_FINISH_OK: 1422 return "FINISH_OK"; 1423 case BZ_STREAM_END: 1424 return "STREAM_END"; 1425 case BZ_SEQUENCE_ERROR: 1426 return "SEQUENCE_ERROR"; 1427 case BZ_PARAM_ERROR: 1428 return "PARAM_ERROR"; 1429 case BZ_MEM_ERROR: 1430 return "MEM_ERROR"; 1431 case BZ_DATA_ERROR: 1432 return "DATA_ERROR"; 1433 case BZ_DATA_ERROR_MAGIC: 1434 return "DATA_ERROR_MAGIC"; 1435 case BZ_IO_ERROR: 1436 return "IO_ERROR"; 1437 case BZ_UNEXPECTED_EOF: 1438 return "UNEXPECTED_EOF"; 1439 case BZ_OUTBUFF_FULL: 1440 return "OUTBUFF_FULL"; 1441 case BZ_CONFIG_ERROR: 1442 return "CONFIG_ERROR"; 1443 default: 1444 return "(Unknown result)"; 1445 } 1446 } 1447 } 1448 1449 version (HaveSquizLzma) 1450 { 1451 final class LzmaStream : SquizStream 1452 { 1453 mixin ZlibLikeStreamImpl!(lzma_stream); 1454 mixin ZlibLikeTotalInOutImpl!(); 1455 1456 private lzma_allocator alloc; 1457 private lzma_options_delta optsDelta; 1458 private lzma_options_lzma optsLzma; 1459 private lzma_filter[] filterChain; 1460 1461 this() @safe 1462 { 1463 alloc.alloc = &(gcAlloc!size_t); 1464 alloc.free = &gcFree; 1465 strm.allocator = &alloc; 1466 } 1467 1468 private lzma_filter[] buildFilterChain(LzmaFormat format, LzmaFilter[] filters, 1469 uint preset, uint deltaDist) @safe 1470 { 1471 lzma_filter[] res; 1472 foreach (f; filters) 1473 { 1474 final switch (f) 1475 { 1476 case LzmaFilter.delta: 1477 optsDelta.dist = deltaDist; 1478 res ~= lzma_filter(LZMA_FILTER_DELTA, cast(void*)&optsDelta); 1479 break; 1480 case LzmaFilter.bcjX86: 1481 res ~= lzma_filter(LZMA_FILTER_X86, null); 1482 break; 1483 case LzmaFilter.bcjPowerPc: 1484 res ~= lzma_filter(LZMA_FILTER_POWERPC, null); 1485 break; 1486 case LzmaFilter.bcjIa64: 1487 res ~= lzma_filter(LZMA_FILTER_IA64, null); 1488 break; 1489 case LzmaFilter.bcjArm: 1490 res ~= lzma_filter(LZMA_FILTER_ARM, null); 1491 break; 1492 case LzmaFilter.bcjArmThumb: 1493 res ~= lzma_filter(LZMA_FILTER_ARMTHUMB, null); 1494 break; 1495 case LzmaFilter.bcjSparc: 1496 res ~= lzma_filter(LZMA_FILTER_SPARC, null); 1497 break; 1498 } 1499 } 1500 1501 enforce(res.length <= 3, "Too many filters supplied"); 1502 1503 if (format != LzmaFormat.rawCopy) 1504 { 1505 (() @trusted => lzma_lzma_preset(&optsLzma, preset))(); 1506 const compFilter = format.isLegacy ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; 1507 res ~= lzma_filter(compFilter, cast(void*)&optsLzma); 1508 } 1509 1510 res ~= lzma_filter(LZMA_VLI_UNKNOWN, null); // end marker 1511 1512 filterChain = res; 1513 return res; 1514 } 1515 1516 } 1517 1518 /// Header/trailer format for Lzma compression 1519 enum LzmaFormat 1520 { 1521 /// Lzma with Xz format, suitable to write *.xz files 1522 xz, 1523 /// LZMA1 encoding and format, suitable for legacy *.lzma files 1524 /// This format doesn't support filters. 1525 legacy, 1526 /// Raw LZMA2 compression, without header/trailer. 1527 /// Use this to include compressed LZMA data in 1528 /// a container defined externally (e.g. this is used 1529 /// for the *.7z archives) 1530 raw, 1531 /// Raw LZMA1 compression, without header/trailer. 1532 /// This one is still found in some *.7z files. 1533 rawLegacy, 1534 /// Just copy bytes out. 1535 /// You may use this in combination with a filter to observe its 1536 /// effect, but has otherwise no use. 1537 rawCopy, 1538 } 1539 1540 /// Whether this is a legacy format 1541 bool isLegacy(LzmaFormat format) @safe pure nothrow @nogc 1542 { 1543 return format == LzmaFormat.legacy || format == LzmaFormat.rawLegacy; 1544 } 1545 1546 /// Whether this is a raw format 1547 bool isRaw(LzmaFormat format) @safe pure nothrow @nogc 1548 { 1549 return cast(int) format >= cast(int) LzmaFormat.raw; 1550 } 1551 1552 /// Filters to use with the LZMA compression. 1553 /// 1554 /// Up to 3 filters can be used from this list. 1555 /// These filters transform the input to increase 1556 /// redundancy of the data supplied to the LZMA compression. 1557 enum LzmaFilter 1558 { 1559 /// Delta filter, which store differences between bytes 1560 /// to produce more repetitive data in some circumstances. 1561 /// Works with `deltaDist` parameter of `CompressLzma`. 1562 delta, 1563 1564 /// BCJ (Branch/Call/Jump) filters aim optimize machine code 1565 /// compression by converting relative branches, calls and jumps 1566 /// to absolute addresses. This increases redundancy and can be 1567 /// exploited by the LZMA compression. 1568 /// 1569 /// BCJ filters are available for a set of CPU architectures. 1570 /// Use one (or two) of them when compressing compiled binaries. 1571 bcjX86, 1572 /// ditto 1573 bcjPowerPc, 1574 /// ditto 1575 bcjIa64, 1576 /// ditto 1577 bcjArm, 1578 /// ditto 1579 bcjArmThumb, 1580 /// ditto 1581 bcjSparc, 1582 } 1583 1584 /// Integrity check to include in the compressed data 1585 /// (only for the Xz format) 1586 /// Default for xz is CRC-64. 1587 enum LzmaCheck 1588 { 1589 /// No integrity check included 1590 none, 1591 /// CRC-32 integrity check 1592 crc32, 1593 /// CRC-64 integrity check 1594 crc64, 1595 /// SHA-256 integrity check 1596 sha256, 1597 } 1598 1599 private lzma_check toLzma(LzmaCheck check) @safe pure nothrow @nogc 1600 { 1601 final switch (check) 1602 { 1603 case LzmaCheck.none: 1604 return lzma_check.NONE; 1605 case LzmaCheck.crc32: 1606 return lzma_check.CRC32; 1607 case LzmaCheck.crc64: 1608 return lzma_check.CRC64; 1609 case LzmaCheck.sha256: 1610 return lzma_check.SHA256; 1611 } 1612 } 1613 1614 auto compressXz(I)(I input, size_t chunkSize = defaultChunkSize) 1615 { 1616 return squiz(input, CompressLzma.init, chunkSize); 1617 } 1618 1619 auto compressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize) 1620 { 1621 CompressLzma algo; 1622 algo.format = LzmaFormat.raw; 1623 return squiz(input, algo, chunkSize); 1624 } 1625 1626 struct CompressLzma 1627 { 1628 import std.conv : to; 1629 1630 static assert(isSquizAlgo!CompressLzma); 1631 1632 /// The format of the compressed stream 1633 LzmaFormat format; 1634 1635 /// The integrity check to include in compressed stream. 1636 /// Only used with XZ format. 1637 LzmaCheck check = LzmaCheck.crc64; 1638 1639 /// The compression preset between 0 (fast) to 9 (higher compression). 1640 /// The default is 6. 1641 uint preset = 6; 1642 1643 /// Makes the encoding significantly slower for marginal compression 1644 /// improvement. Only useful if you don't mind about CPU time at all. 1645 Flag!"extreme" extreme; 1646 1647 /// Filters to include in the encoding. 1648 /// Maximum three filters can be provided. 1649 /// For most input, no filtering is necessary. 1650 LzmaFilter[] filters; 1651 1652 /// Number of bytes between 1 and 256 to use for the Delta filter. 1653 /// For example for 16bit PCM stero audio, you should use 4. 1654 /// For RGB data 8bit per channel, you should use 3. 1655 uint deltaDist; 1656 1657 alias Stream = LzmaStream; 1658 1659 private void initStream(Stream stream) @trusted 1660 { 1661 uint pres = preset; 1662 if (extreme) 1663 pres |= LZMA_PRESET_EXTREME; 1664 1665 lzma_ret res; 1666 final switch (format) 1667 { 1668 case LzmaFormat.xz: 1669 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1670 res = lzma_stream_encoder(&stream.strm, chain.ptr, check.toLzma()); 1671 break; 1672 case LzmaFormat.legacy: 1673 enforce(filters.length == 0, "Filters are not supported with the legacy format"); 1674 lzma_lzma_preset(&stream.optsLzma, preset); 1675 res = lzma_alone_encoder(&stream.strm, &stream.optsLzma); 1676 break; 1677 case LzmaFormat.raw: 1678 case LzmaFormat.rawLegacy: 1679 case LzmaFormat.rawCopy: 1680 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1681 res = lzma_raw_encoder(&stream.strm, chain.ptr); 1682 break; 1683 } 1684 1685 enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string); 1686 } 1687 1688 Stream initialize() @safe 1689 { 1690 auto stream = new LzmaStream; 1691 initStream(stream); 1692 return stream; 1693 } 1694 1695 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1696 { 1697 return lzmaCode(stream, lastChunk); 1698 } 1699 1700 void reset(Stream stream) @safe 1701 { 1702 // Lzma supports reset out of the box by recalling initialization 1703 // function without calling lzma_end. 1704 1705 initStream(stream); 1706 } 1707 1708 void end(Stream stream) @trusted 1709 { 1710 lzma_end(&stream.strm); 1711 } 1712 } 1713 1714 auto decompressXz(I)(I input, size_t chunkSize = defaultChunkSize) 1715 { 1716 return squiz(input, DecompressLzma.init, chunkSize); 1717 } 1718 1719 auto decompressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize) 1720 { 1721 DecompressLzma algo; 1722 algo.format = LzmaFormat.raw; 1723 return squiz(input, algo, chunkSize); 1724 } 1725 1726 struct DecompressLzma 1727 { 1728 import std.conv : to; 1729 1730 static assert(isSquizAlgo!DecompressLzma); 1731 1732 /// The format of the compressed stream 1733 LzmaFormat format; 1734 1735 /// The memory usage limit in bytes. 1736 /// by default no limit is enforced 1737 size_t memLimit = size_t.max; 1738 1739 /// Parameters for the raw decompression. 1740 /// They are the same than for the compression. 1741 /// As there is no header to tell Lzma what filters were used during 1742 /// compression, it is the responsibility of the programmer to 1743 /// correctly ensure that the same options are used for decompression. 1744 /// All these options are ignored when decompressing .xz stream. 1745 uint preset = 6; 1746 /// ditto 1747 Flag!"extreme" extreme; 1748 /// ditto 1749 LzmaFilter[] filters; 1750 /// ditto 1751 uint deltaDist; 1752 1753 alias Stream = LzmaStream; 1754 1755 this(LzmaFormat format) @safe 1756 { 1757 this.format = format; 1758 } 1759 1760 /// convenience constructor to copy parameters of the compression 1761 /// for the decompression. Especially useful for the raw decompression, 1762 /// to ensure that the parameters fit the ones used for compression. 1763 this(CompressLzma compress) @safe 1764 { 1765 format = compress.format; 1766 preset = compress.preset; 1767 extreme = compress.extreme; 1768 filters = compress.filters; 1769 deltaDist = compress.deltaDist; 1770 } 1771 1772 private void initStream(Stream stream) @trusted 1773 { 1774 ulong memlim = memLimit; 1775 if (memLimit == size_t.max) 1776 memlim = ulong.max; 1777 1778 lzma_ret res; 1779 1780 final switch (format) 1781 { 1782 case LzmaFormat.xz: 1783 res = lzma_stream_decoder(&stream.strm, memlim, 0); 1784 break; 1785 case LzmaFormat.legacy: 1786 res = lzma_alone_decoder(&stream.strm, memlim); 1787 break; 1788 case LzmaFormat.raw: 1789 case LzmaFormat.rawLegacy: 1790 case LzmaFormat.rawCopy: 1791 uint pres = preset; 1792 if (extreme) 1793 pres |= LZMA_PRESET_EXTREME; 1794 1795 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1796 1797 res = lzma_raw_decoder(&stream.strm, chain.ptr); 1798 } 1799 enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string); 1800 } 1801 1802 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1803 { 1804 return lzmaCode(stream, lastChunk); 1805 } 1806 1807 Stream initialize() @safe 1808 { 1809 auto stream = new LzmaStream; 1810 initStream(stream); 1811 return stream; 1812 } 1813 1814 void reset(Stream stream) @safe 1815 { 1816 // Lzma supports reset out of the box by recalling initialization 1817 // function without calling lzma_end. 1818 1819 initStream(stream); 1820 } 1821 1822 void end(Stream stream) @trusted 1823 { 1824 lzma_end(&stream.strm); 1825 } 1826 } 1827 1828 private Flag!"streamEnded" lzmaCode(LzmaStream stream, Flag!"lastChunk" lastChunk) @safe 1829 { 1830 import std.conv : to; 1831 1832 const action = lastChunk ? lzma_action.FINISH : lzma_action.RUN; 1833 const res = (() @trusted => lzma_code(&stream.strm, action))(); 1834 1835 enforce( 1836 res == lzma_ret.OK || res == lzma_ret.STREAM_END, 1837 "LZMA encoding failed with code: " ~ res.to!string 1838 ); 1839 1840 return cast(Flag!"streamEnded")(res == lzma_ret.STREAM_END); 1841 } 1842 1843 /// 1844 @("Compress / Decompress XZ") 1845 unittest 1846 { 1847 import test.util; 1848 import std.array : join; 1849 1850 const len = 100_000; 1851 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1852 const input = generateRepetitiveData(len, phrase).join(); 1853 1854 const squized = only(input) 1855 .compressXz() 1856 .join(); 1857 1858 const output = only(squized) 1859 .decompressXz() 1860 .join(); 1861 1862 assert(squized.length < input.length); 1863 assert(output == input); 1864 1865 // for such long and repetitive data, ratio is around 0.2% 1866 const ratio = cast(double) squized.length / cast(double) input.length; 1867 assert(ratio < 0.003); 1868 } 1869 1870 /// 1871 @("Integrity check XZ") 1872 unittest 1873 { 1874 import test.util; 1875 import std.array : join; 1876 1877 const len = 100_000; 1878 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1879 const input = generateRepetitiveData(len, phrase).join(); 1880 1881 auto squized = only(input) 1882 .compressXz() 1883 .join() 1884 .dup; // dup because const(ubyte)[] is returned 1885 1886 squized[squized.length / 2] += 1; 1887 1888 assertThrown( 1889 only(squized) 1890 .decompressXz() 1891 .join() 1892 ); 1893 } 1894 1895 /// 1896 @("Compress / Decompress XZ with filter") 1897 unittest 1898 { 1899 import test.util; 1900 import std.array : join; 1901 1902 const len = 100_000; 1903 const input = generateSequentialData(len, 1245, 27).join(); 1904 1905 const reference = only(input) 1906 .compressXz() 1907 .join(); 1908 1909 CompressLzma comp; 1910 comp.filters ~= LzmaFilter.delta; 1911 comp.deltaDist = 8; // sequential data of 8 byte integers 1912 1913 const withDelta = only(input) 1914 .squiz(comp) 1915 .join(); 1916 1917 const output = only(withDelta) 1918 .decompressXz() 1919 .join(); 1920 1921 assert(output == input); 1922 // < 20% compression without filter (sequential data is tough) 1923 // < 0.5% compression with delta (peace of cake) 1924 assert(input.length > reference.length * 5); 1925 assert(input.length > withDelta.length * 200); 1926 } 1927 1928 /// 1929 @("Compress / Decompress Lzma Raw") 1930 unittest 1931 { 1932 import test.util; 1933 import std.array : join; 1934 1935 const len = 100_000; 1936 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1937 const input = generateRepetitiveData(len, phrase).join(); 1938 1939 const reference = only(input) 1940 .compressXz() 1941 .join(); 1942 1943 const squized = only(input) 1944 .compressLzmaRaw() 1945 .join(); 1946 1947 const output = only(squized) 1948 .decompressLzmaRaw() 1949 .join(); 1950 1951 assert(output == input); 1952 assert(squized.length < input.length); 1953 assert(squized.length < reference.length); // win header/trailer space 1954 1955 // for such repetitive data, ratio is around 1.13% 1956 // also generally better than zlib, bzip2 struggles a lot for repetitive data 1957 const ratio = cast(double) squized.length / cast(double) input.length; 1958 assert(ratio < 0.003); 1959 } 1960 1961 /// 1962 @("Compress / Decompress Lzma Raw with filter") 1963 unittest 1964 { 1965 import test.util; 1966 import std.array : join; 1967 1968 const len = 100_000; 1969 const input = generateSequentialData(len, 1245, 27).join(); 1970 1971 const reference = only(input) 1972 .compressLzmaRaw() 1973 .join(); 1974 1975 CompressLzma comp; 1976 comp.format = LzmaFormat.raw; 1977 comp.filters ~= LzmaFilter.delta; 1978 comp.deltaDist = 8; // sequential data of 8 byte integers 1979 1980 const withDelta = only(input) 1981 .squiz(comp) 1982 .join(); 1983 1984 const output = only(withDelta) // using compression parameters for decompression 1985 .squiz(DecompressLzma(comp)) 1986 .join(); 1987 1988 assert(output == input); 1989 // < 20% compression without filter (sequential data is tough) 1990 // < 0.4% compression with delta (peace of cake) 1991 assert(input.length > reference.length * 5); 1992 assert(input.length > withDelta.length * 250); 1993 } 1994 1995 /// 1996 @("Compress / Decompress Lzma Legacy") 1997 unittest 1998 { 1999 import test.util; 2000 import std.array : join; 2001 2002 const len = 100_000; 2003 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 2004 const input = generateRepetitiveData(len, phrase).join(); 2005 2006 auto comp = CompressLzma(LzmaFormat.legacy); 2007 auto decomp = DecompressLzma(comp); 2008 2009 const squized = only(input) 2010 .squiz(comp) 2011 .join(); 2012 2013 const output = only(squized) 2014 .squiz(decomp) 2015 .join(); 2016 2017 assert(squized.length < input.length); 2018 assert(output == input); 2019 2020 // for such repetitive data, ratio is around 1.13% 2021 // also generally better than zlib, bzip2 struggles a lot for repetitive data 2022 const ratio = cast(double) squized.length / cast(double) input.length; 2023 assert(ratio < 0.003); 2024 } 2025 2026 /// 2027 @("Compress / Decompress Lzma Raw Legacy") 2028 unittest 2029 { 2030 import test.util; 2031 import std.array : join; 2032 2033 const len = 100_000; 2034 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 2035 const input = generateRepetitiveData(len, phrase).join(); 2036 2037 auto comp = CompressLzma(LzmaFormat.rawLegacy); 2038 auto decomp = DecompressLzma(comp); 2039 2040 const squized = only(input) 2041 .squiz(comp) 2042 .join(); 2043 2044 const output = only(squized) 2045 .squiz(decomp) 2046 .join(); 2047 2048 assert(squized.length < input.length); 2049 assert(output == input); 2050 2051 // for such repetitive data, ratio is around 1.13% 2052 // also generally better than zlib, bzip2 struggles a lot for repetitive data 2053 const ratio = cast(double) squized.length / cast(double) input.length; 2054 assert(ratio < 0.003); 2055 } 2056 2057 /// 2058 @("Compress / Decompress Lzma rawLegacy with filter") 2059 unittest 2060 { 2061 import test.util; 2062 import std.array : join; 2063 2064 const len = 100_000; 2065 const input = generateSequentialData(len, 1245, 27).join(); 2066 2067 const reference = only(input) 2068 .squiz(CompressLzma(LzmaFormat.legacy)) 2069 .join(); 2070 2071 CompressLzma comp; 2072 comp.format = LzmaFormat.rawLegacy; 2073 comp.filters ~= LzmaFilter.delta; 2074 comp.deltaDist = 8; // sequential data of 8 byte integers 2075 2076 auto decomp = DecompressLzma(comp); 2077 2078 const withDelta = only(input) 2079 .squiz(comp) 2080 .join(); 2081 2082 const output = only(withDelta) 2083 .squiz(decomp) 2084 .join(); 2085 2086 assert(output == input); 2087 // < 20% compression without filter (sequential data is tough) 2088 // < 0.4% compression with delta (peace of cake) 2089 assert(input.length > reference.length * 5); 2090 assert(input.length > withDelta.length * 250); 2091 } 2092 } 2093 2094 version (HaveSquizZstandard) 2095 { 2096 auto compressZstd(I)(I input, size_t chunkSize = defaultChunkSize) 2097 { 2098 return squiz(input, CompressZstd.init, chunkSize); 2099 } 2100 2101 auto decompressZstd(I)(I input, size_t chunkSize = defaultChunkSize) 2102 { 2103 return squiz(input, DecompressZstd.init, chunkSize); 2104 } 2105 2106 class ZstdStream : SquizStream 2107 { 2108 private ZSTD_inBuffer inBuf; 2109 private ZSTD_outBuffer outBuf; 2110 private size_t totalIn; 2111 private size_t totalOut; 2112 2113 @property const(ubyte)[] input() const @trusted 2114 { 2115 auto ptr = cast(const(ubyte)*) inBuf.src; 2116 return ptr[inBuf.pos .. inBuf.size]; 2117 } 2118 2119 @property void input(const(ubyte)[] inp) @trusted 2120 { 2121 totalIn += inBuf.pos; 2122 inBuf.pos = 0; 2123 inBuf.src = cast(const(void)*) inp.ptr; 2124 inBuf.size = inp.length; 2125 } 2126 2127 @property size_t totalInput() const @safe 2128 { 2129 return totalIn + inBuf.pos; 2130 } 2131 2132 @property inout(ubyte)[] output() inout @trusted 2133 { 2134 auto ptr = cast(inout(ubyte)*) outBuf.dst; 2135 return ptr[outBuf.pos .. outBuf.size]; 2136 } 2137 2138 @property void output(ubyte[] outp) @trusted 2139 { 2140 totalOut += outBuf.pos; 2141 outBuf.pos = 0; 2142 outBuf.dst = cast(void*) outp.ptr; 2143 outBuf.size = outp.length; 2144 } 2145 2146 @property size_t totalOutput() const @safe 2147 { 2148 return totalOut + outBuf.pos; 2149 } 2150 2151 override string toString() const @safe 2152 { 2153 import std.format : format; 2154 2155 string res; 2156 res ~= "ZstdStream:\n"; 2157 res ~= " Input:\n"; 2158 res ~= format!" start 0x%016x\n"(inBuf.src); 2159 res ~= format!" pos %s\n"(inBuf.pos); 2160 res ~= format!" size %s\n"(inBuf.size); 2161 res ~= format!" total %s\n"(totalInput); 2162 res ~= " Output:\n"; 2163 res ~= format!" start 0x%016x\n"(outBuf.dst); 2164 res ~= format!" pos %s\n"(outBuf.pos); 2165 res ~= format!" size %s\n"(outBuf.size); 2166 res ~= format!" total %s"(totalOutput); 2167 2168 return res; 2169 } 2170 } 2171 2172 private string zstdSetCParam(string name) 2173 { 2174 return "if (" ~ name ~ ") " ~ 2175 "ZSTD_CCtx_setParameter(cctx, ZSTD_cParameter." ~ name ~ ", " ~ name ~ ");"; 2176 } 2177 2178 private void zstdError(size_t code, string desc) @trusted 2179 { 2180 import std.string : fromStringz; 2181 2182 if (ZSTD_isError(code)) 2183 { 2184 const msg = fromStringz(ZSTD_getErrorName(code)); 2185 throw new Exception((desc ~ ": " ~ msg).idup); 2186 } 2187 } 2188 2189 /// Zstandard is a fast compression algorithm designed for streaming. 2190 /// See zstd.h (enum ZSTD_cParameter) for details. 2191 struct CompressZstd 2192 { 2193 static assert(isSquizAlgo!CompressZstd); 2194 2195 /// Common paramters. 2196 /// A value of zero indicates that the default should be used. 2197 int compressionLevel; 2198 /// ditto 2199 int windowLog; 2200 /// ditto 2201 int hashLog; 2202 /// ditto 2203 int chainLog; 2204 /// ditto 2205 int searchLog; 2206 /// ditto 2207 int minMatch; 2208 /// ditto 2209 int targetLength; 2210 /// ditto 2211 int strategy; 2212 2213 /// Long distance matching parameters (LDM) 2214 /// Can be activated for large inputs to improve the compression ratio. 2215 /// Increases memory usage and the window size 2216 /// A value of zero indicate that the default should be used. 2217 bool enableLongDistanceMatching; 2218 /// ditto 2219 int ldmHashLog; 2220 /// ditto 2221 int ldmMinMatch; 2222 /// ditto 2223 int ldmBucketSizeLog; 2224 /// ditto 2225 int ldmHashRateLog; 2226 2227 // frame parameters 2228 2229 /// If input data content size is known, before 2230 /// start of streaming, set contentSize to its value. 2231 /// It will enable the size to be written in the header 2232 /// and checked after decompression. 2233 ulong contentSize = ulong.max; 2234 /// Include a checksum of the content in the trailer. 2235 bool checksumFlag = false; 2236 /// When applicable, dictionary's ID is written in the header 2237 bool dictIdFlag = true; 2238 2239 /// Multi-threading parameters 2240 int nbWorkers; 2241 /// ditto 2242 int jobSize; 2243 /// ditto 2244 int overlapLog; 2245 2246 static final class Stream : ZstdStream 2247 { 2248 private ZSTD_CStream* strm; 2249 } 2250 2251 private void setParams(Stream stream) @trusted 2252 { 2253 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2254 2255 mixin(zstdSetCParam("compressionLevel")); 2256 mixin(zstdSetCParam("windowLog")); 2257 mixin(zstdSetCParam("hashLog")); 2258 mixin(zstdSetCParam("chainLog")); 2259 mixin(zstdSetCParam("searchLog")); 2260 mixin(zstdSetCParam("minMatch")); 2261 mixin(zstdSetCParam("targetLength")); 2262 mixin(zstdSetCParam("strategy")); 2263 2264 if (enableLongDistanceMatching) 2265 { 2266 ZSTD_CCtx_setParameter(cctx, 2267 ZSTD_cParameter.enableLongDistanceMatching, 2268 1 2269 ); 2270 2271 mixin(zstdSetCParam("ldmHashLog")); 2272 mixin(zstdSetCParam("ldmMinMatch")); 2273 mixin(zstdSetCParam("ldmBucketSizeLog")); 2274 mixin(zstdSetCParam("ldmHashRateLog")); 2275 } 2276 2277 if (contentSize != size_t.max) 2278 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize); 2279 if (checksumFlag) 2280 ZSTD_CCtx_setParameter( 2281 cctx, 2282 ZSTD_cParameter.checksumFlag, 2283 1 2284 ); 2285 if (!dictIdFlag) 2286 ZSTD_CCtx_setParameter( 2287 cctx, 2288 ZSTD_cParameter.checksumFlag, 2289 0 2290 ); 2291 2292 mixin(zstdSetCParam("nbWorkers")); 2293 mixin(zstdSetCParam("jobSize")); 2294 mixin(zstdSetCParam("overlapLog")); 2295 } 2296 2297 Stream initialize() @trusted 2298 { 2299 auto stream = new Stream; 2300 2301 stream.strm = ZSTD_createCStream(); 2302 2303 setParams(stream); 2304 2305 return stream; 2306 } 2307 2308 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 2309 { 2310 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2311 const directive = lastChunk ? ZSTD_EndDirective.end : ZSTD_EndDirective._continue; 2312 2313 const res = (() @trusted => ZSTD_compressStream2(cctx, &stream.outBuf, &stream.inBuf, directive))(); 2314 2315 zstdError(res, "Could not compress data with Zstandard"); 2316 return cast(Flag!"streamEnded")(lastChunk && res == 0); 2317 } 2318 2319 void reset(Stream stream) @trusted 2320 { 2321 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2322 ZSTD_CCtx_reset(cctx, ZSTD_ResetDirective.session_only); 2323 2324 if (contentSize != size_t.max) 2325 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize); 2326 2327 stream.inBuf = ZSTD_inBuffer.init; 2328 stream.outBuf = ZSTD_outBuffer.init; 2329 stream.totalIn = 0; 2330 stream.totalOut = 0; 2331 } 2332 2333 void end(Stream stream) @trusted 2334 { 2335 ZSTD_freeCStream(stream.strm); 2336 } 2337 } 2338 2339 struct DecompressZstd 2340 { 2341 static assert(isSquizAlgo!DecompressZstd); 2342 2343 int windowLogMax; 2344 2345 static final class Stream : ZstdStream 2346 { 2347 private ZSTD_DStream* strm; 2348 } 2349 2350 private void setParams(Stream stream) @trusted 2351 { 2352 auto dctx = cast(ZSTD_DCtx*) stream.strm; 2353 2354 if (windowLogMax) 2355 ZSTD_DCtx_setParameter(dctx, 2356 ZSTD_dParameter.windowLogMax, windowLogMax); 2357 } 2358 2359 Stream initialize() @trusted 2360 { 2361 auto stream = new Stream; 2362 2363 stream.strm = ZSTD_createDStream(); 2364 2365 setParams(stream); 2366 2367 return stream; 2368 } 2369 2370 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe 2371 { 2372 const res = (() @trusted => ZSTD_decompressStream(stream.strm, &stream.outBuf, &stream 2373 .inBuf))(); 2374 2375 zstdError(res, "Could not decompress data with Zstandard"); 2376 return cast(Flag!"streamEnded")(res == 0); 2377 } 2378 2379 void reset(Stream stream) @trusted 2380 { 2381 auto dctx = cast(ZSTD_DCtx*) stream.strm; 2382 ZSTD_DCtx_reset(dctx, ZSTD_ResetDirective.session_only); 2383 } 2384 2385 void end(Stream stream) @trusted 2386 { 2387 ZSTD_freeDStream(stream.strm); 2388 } 2389 } 2390 2391 /// 2392 @("Compress / Decompress Zstandard") 2393 unittest 2394 { 2395 import test.util; 2396 import std.array : join; 2397 2398 const len = 100_000; 2399 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 2400 const input = generateRepetitiveData(len, phrase).join(); 2401 2402 const squized = only(input) 2403 .compressZstd() 2404 .join(); 2405 2406 const output = only(squized) 2407 .decompressZstd() 2408 .join(); 2409 2410 assert(squized.length < input.length); 2411 assert(output == input); 2412 2413 // for such long and repetitive data, ratio is around 0.047% 2414 const ratio = cast(double) squized.length / cast(double) input.length; 2415 assert(ratio < 0.0005); 2416 } 2417 2418 }