1 /// Compression and decompression streaming algorithms. 2 /// 3 /// Each compression or decompression algorithm is represented by a struct 4 /// that contains parameters for compression/decompression. 5 /// Besides the parameters they carry, algorithms have no state. Each 6 /// algorithm instance can be used for an unlimited number of parallel jobs. 7 /// 8 /// The algorithms create a stream, which carry the state and allocated 9 /// resources of the ongoing compression. 10 /// 11 /// The compression/decompression jobs are run by the `squiz` function, 12 /// or one of the related helpers built upon it (e.g. deflate, deflateGz, inflate, ...). 13 /// 14 /// `squiz` and related functions take and InputRange of ubyte[] and return an InputRange of ubyte[]. 15 /// This allows streaming in the most natural way for a D program and provide 16 /// the greatest versatility. 17 /// It is possible to read the data from any source (file, network, memory), 18 /// process the data, and write to any kind of destination. 19 /// This also allows to process gigabytes of data with little memory usage. 20 /// 21 /// Compression often wraps the compressed data with header and trailer 22 /// that give the decompression algorithm useful information, especially 23 /// to check the integrity of the data after decompression. 24 /// This is called the format. 25 /// Some compressions algorithms offer different formats, and sometimes 26 /// the possibility to not wrap the data at all (raw format), in which 27 /// case integrity check is not performed. This is usually used when 28 /// an external integrity check is done, for example when archiving 29 /// compressed stream in Zip or 7z archives. 30 module squiz_box.squiz; 31 32 import squiz_box.c.bzip2; 33 import squiz_box.c.lzma; 34 import squiz_box.c.zlib; 35 import squiz_box.c.zstd; 36 import squiz_box.priv; 37 38 import std.datetime.systime; 39 import std.exception; 40 import std.range; 41 import std.typecons; 42 43 /// default chunk size for data exchanges and I/O operations 44 enum defaultChunkSize = 8192; 45 46 /// definition of a byte chunk, which is the unit of data 47 /// exchanged during I/O and data transformation operations 48 alias ByteChunk = const(ubyte)[]; 49 50 /// A dynamic type of input range of chunks of bytes 51 alias ByteRange = InputRange!ByteChunk; 52 53 /// Static check that a type is a byte range. 54 template isByteRange(BR) 55 { 56 import std.traits : isArray, Unqual; 57 import std.range : ElementType, isInputRange; 58 59 alias Arr = ElementType!BR; 60 alias El = ElementType!Arr; 61 62 enum isByteRange = isInputRange!BR && is(Unqual!El == ubyte); 63 } 64 65 static assert(isByteRange!ByteRange); 66 67 /// Exception thrown when inconsistent data is given to 68 /// a decompression algorithm. 69 /// I.e. the data was not compressed with the corresponding algorithm 70 /// or the wrapping format is not the one expected. 71 @safe class DataException : Exception 72 { 73 mixin basicExceptionCtors!(); 74 } 75 76 /// Check whether a type is a proper squiz algorithm. 77 template isSquizAlgo(A) 78 { 79 enum isSquizAlgo = is(typeof((A algo) { 80 auto stream = algo.initialize(); 81 Flag!"streamEnded" ended = algo.process(stream, Yes.lastChunk); 82 algo.reset(stream); 83 algo.end(stream); 84 static assert(is(typeof(stream) : SquizStream)); 85 })); 86 } 87 88 /// Get the type of a SquizStream for the Squiz algorithm 89 template StreamType(A) if (isSquizAlgo!A) 90 { 91 import std.traits : ReturnType; 92 93 alias StreamType = ReturnType!(A.initialize); 94 } 95 96 /// A squiz algorithm whom type is erased behind an interface. 97 /// This helps to choose algorithm at run time. 98 interface SquizAlgo 99 { 100 /// Initialize a new stream for processing data 101 /// with this algorithm. 102 SquizStream initialize() @safe; 103 104 /// Processes the input stream data to produce output stream data. 105 /// lastChunk indicates that the input chunk in stream is the last one. 106 /// This is an indication to the algorithm that it can start to finish 107 /// the work. 108 /// Returned value indicates that there won't be more output generated 109 /// than the one in stream.output 110 Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe; 111 112 /// Reset the state of this stream, yet reusing the same 113 /// allocating resources, in order to start processing 114 /// another data stream. 115 void reset(SquizStream stream) @safe; 116 117 /// Release the resources used by this stream. 118 /// Most of the memory (if not all) used by algorithm 119 /// is allocating with the garbage collector, so not 120 /// calling this function has little consequence (if not none). 121 void end(SquizStream stream) @safe; 122 } 123 124 static assert(isSquizAlgo!SquizAlgo); 125 126 /// Get a runtime type for the provided algorithm 127 SquizAlgo squizAlgo(A)(A algo) @safe if (isSquizAlgo!A) 128 { 129 return new CSquizAlgo!A(algo); 130 } 131 132 /// 133 @("squizAlgo") 134 unittest 135 { 136 import test.util; 137 import std.array : join; 138 139 auto ctAlgo = Deflate.init; 140 auto rtAlgo = squizAlgo(Deflate.init); 141 142 const len = 10_000; 143 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 144 const input = generateRepetitiveData(len, phrase).join(); 145 146 const ctSquized = only(input).squiz(ctAlgo).join(); 147 const rtSquized = only(input).squiz(rtAlgo).join(); 148 149 assert(ctSquized == rtSquized); 150 } 151 152 private class CSquizAlgo(A) : SquizAlgo 153 { 154 alias Stream = StreamType!A; 155 156 A algo; 157 158 private this(A algo) @safe 159 { 160 this.algo = algo; 161 } 162 163 private Stream checkStream(SquizStream stream) 164 { 165 auto s = cast(Stream) stream; 166 assert(s, "provided stream is not produced by this algorithm"); 167 return s; 168 } 169 170 SquizStream initialize() @safe 171 { 172 return algo.initialize(); 173 } 174 175 Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe 176 { 177 return algo.process(checkStream(stream), lastChunk); 178 } 179 180 void reset(SquizStream stream) @safe 181 { 182 return algo.reset(checkStream(stream)); 183 } 184 185 void end(SquizStream stream) @safe 186 { 187 return algo.end(checkStream(stream)); 188 } 189 } 190 191 /// A state carrying, processing stream for squiz algorithms. 192 /// The stream does not carry any buffer, only slices to external buffer. 193 /// One may normally not use this directly as everything is handled 194 /// by the `squiz` function. 195 interface SquizStream 196 { 197 /// Input data for the algorithm 198 /// The slice is reduced by its begining as the processing moves on. 199 /// Must be refilled when empty before calling the algorithm `process` method. 200 @property const(ubyte)[] input() const @safe; 201 /// Ditto 202 @property void input(const(ubyte)[] inp) @safe; 203 204 /// How many bytes read since the start of the stream processing. 205 @property size_t totalInput() const @safe; 206 207 /// Output buffer for the algorithm to write to. 208 /// This is NOT the data ready after process, but where the 209 /// algorithm must write next. 210 /// after a call to process, the slice is reduced by its beginning, 211 /// and the data written is therefore the one before the slice. 212 @property inout(ubyte)[] output() inout @safe; 213 @property void output(ubyte[] outp) @safe; 214 215 /// How many bytes written since the start of the stream processing. 216 @property size_t totalOutput() const @safe; 217 } 218 219 private template isZlibLikeStream(S) 220 { 221 enum isZlibLikeStream = is(typeof((S stream) { 222 stream.next_in = cast(const(ubyte)*) null; 223 stream.avail_in = 0; 224 stream.next_out = cast(ubyte*) null; 225 stream.avail_out = 0; 226 })); 227 } 228 229 private mixin template ZlibLikeStreamImpl(S) if (isZlibLikeStream!S) 230 { 231 private S strm; 232 233 @property const(ubyte)[] input() const @trusted 234 { 235 return strm.next_in[0 .. strm.avail_in]; 236 } 237 238 @property void input(const(ubyte)[] inp) @trusted 239 { 240 strm.next_in = inp.ptr; 241 strm.avail_in = cast(typeof(strm.avail_in)) inp.length; 242 } 243 244 @property inout(ubyte)[] output() inout @trusted 245 { 246 return strm.next_out[0 .. strm.avail_out]; 247 } 248 249 @property void output(ubyte[] outp) @trusted 250 { 251 strm.next_out = outp.ptr; 252 strm.avail_out = cast(typeof(strm.avail_out)) outp.length; 253 } 254 } 255 256 mixin template ZlibLikeTotalInOutImpl() 257 { 258 @property size_t totalInput() const 259 { 260 return cast(size_t) strm.total_in; 261 } 262 263 @property size_t totalOutput() const 264 { 265 return cast(size_t) strm.total_out; 266 } 267 } 268 269 /// Returns an InputRange containing the input data processed through the supplied algorithm. 270 auto squiz(I, A)(I input, A algo, size_t chunkSize = defaultChunkSize) 271 if (isByteRange!I && isSquizAlgo!A) 272 { 273 return squiz(input, algo, new ubyte[chunkSize]); 274 } 275 276 /// ditto 277 auto squiz(I, A)(I input, A algo, ubyte[] chunkBuffer) 278 if (isByteRange!I && isSquizAlgo!A) 279 { 280 auto stream = algo.initialize(); 281 return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, ulong.max); 282 } 283 284 /// Returns an InputRange containing the input data processed through the supplied algorithm. 285 /// To the difference of `squiz`, `squizReuse` will not manage the state (aka stream) of the algorithm, 286 /// which allows to reuse it (and its allocated resources) for several jobs. 287 /// squizReuse will drive the algorithm and move the stream forward until processing is over. 288 /// The stream must be either freshly initialized or freshly reset before being passed 289 /// to this function. 290 auto squizReuse(I, A, S)(I input, A algo, S stream, ubyte[] chunkBuffer) 291 if (isByteRange!I && isSquizAlgo!A) 292 { 293 static assert(is(StreamType!A == S), S.strinof ~ " is not the stream produced by " ~ A.stringof); 294 return Squiz!(I, A, No.endStream)(input, algo, stream, chunkBuffer, ulong.max); 295 } 296 297 /// Same as squiz, but will stop encoding/decoding after len bytes has been written out 298 /// Useful to decode some raw encoded streams where the uncompressed size is known 299 /// and the algorithm not always report Yes.streamEnded. 300 auto squizMaxOut(I, A)(I input, A algo, ulong maxOut, size_t chunkSize = defaultChunkSize) 301 { 302 import std.algorithm : min; 303 304 const sz = cast(size_t) min(maxOut, chunkSize); 305 auto chunkBuffer = new ubyte[sz]; 306 auto stream = algo.initialize(); 307 return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, maxOut); 308 } 309 310 // Common transformation range for all compression/decompression functions. 311 // I is a byte input range 312 // A is a squiz algorithm 313 // if Yes.end, the stream is ended when data is done processing 314 private struct Squiz(I, A, Flag!"endStream" endStream) 315 { 316 private alias Stream = StreamType!A; 317 318 // Byte input range (by chunks) 319 private I input; 320 321 // The algorithm 322 private A algo; 323 324 // Processed stream stream 325 private Stream stream; 326 327 // Buffer used to store the front chunk 328 private ubyte[] chunkBuffer; 329 // Slice of the buffer that is valid for read out 330 private ByteChunk chunk; 331 332 // maximum number of bytes to write out 333 private ulong maxLen; 334 335 /// Whether the end of stream was reported by the Policy 336 private bool ended; 337 338 private this(I input, A algo, Stream stream, ubyte[] chunkBuffer, ulong maxLen) 339 { 340 this.input = input; 341 this.algo = algo; 342 this.stream = stream; 343 this.chunkBuffer = chunkBuffer; 344 this.maxLen = maxLen; 345 prime(); 346 } 347 348 @property bool empty() 349 { 350 return chunk.length == 0; 351 } 352 353 @property ByteChunk front() 354 { 355 return chunk; 356 } 357 358 void popFront() 359 { 360 chunk = null; 361 if (!ended) 362 prime(); 363 } 364 365 private void prime() 366 { 367 import std.algorithm : min; 368 369 while (chunk.length < chunkBuffer.length) 370 { 371 if (stream.input.length == 0 && !input.empty) 372 stream.input = input.front; 373 374 const len = min(chunkBuffer.length - chunk.length, maxLen); 375 stream.output = chunkBuffer[chunk.length .. chunk.length + len]; 376 377 const streamEnded = algo.process(stream, cast(Flag!"lastChunk") input.empty); 378 379 chunk = chunkBuffer[0 .. $ - stream.output.length]; 380 maxLen -= len; 381 382 // popFront must be called at the end because it invalidates inChunk 383 if (stream.input.length == 0 && !input.empty) 384 input.popFront(); 385 386 if (streamEnded || maxLen == 0) 387 { 388 ended = true; 389 static if (endStream) 390 algo.end(stream); 391 break; 392 } 393 } 394 } 395 } 396 397 @("squizMaxOut") 398 unittest 399 { 400 // encoded header of test/data/archive.7z 401 const(ubyte)[] dataIn = [ 402 0x00, 0x00, 0x81, 0x33, 0x07, 0xae, 0x0f, 0xd1, 0xf2, 0xfb, 0xfd, 0x40, 403 0xc0, 0x90, 0xd2, 0xff, 0x7d, 0x69, 0x4d, 0x90, 0xd3, 0x2c, 0x42, 0x66, 404 0xb0, 0xc6, 0xcc, 0xeb, 0xcf, 0x59, 0xcc, 0x96, 0x23, 0xf9, 0x91, 0xc8, 405 0x75, 0x49, 0xe9, 0x9d, 0x1a, 0xa8, 0xa5, 0x9d, 0xf7, 0x75, 0x29, 0x1a, 406 0x90, 0x78, 0x18, 0x8e, 0x42, 0x1a, 0x97, 0x0c, 0x40, 0xb7, 0xaa, 0xb6, 407 0x16, 0xa9, 0x91, 0x0c, 0x58, 0xad, 0x75, 0xf7, 0x8f, 0xaf, 0x8f, 0x45, 408 0xdb, 0x78, 0xd0, 0x8e, 0xc6, 0x1b, 0x72, 0xa5, 0xf4, 0xd2, 0x46, 0xf7, 409 0xe1, 0xce, 0x01, 0x80, 0x7f, 0x3d, 0x66, 0xa5, 0x2d, 0x64, 0xd7, 0xb0, 410 0x41, 0xdc, 0x92, 0x59, 0x88, 0xb0, 0x4c, 0x67, 0x34, 0xb6, 0x4e, 0xd3, 411 0xd5, 0x01, 0x8d, 0x43, 0x13, 0x9c, 0x82, 0x78, 0x4d, 0xcf, 0x8c, 0x51, 412 0x25, 0x0f, 0xd5, 0x1d, 0x80, 0x4b, 0x80, 0xea, 0x18, 0xc1, 0x29, 0x49, 413 0xe4, 0x4d, 0x4d, 0x8b, 0xb9, 0xa1, 0xfc, 0x17, 0x2b, 0xb3, 0xe6, 0x00, 414 0x00, 0x00 415 ]; 416 // decoded header data of test/data/archive.7z 417 const(ubyte)[] expectedDataOut = [ 418 0x01, 0x04, 0x06, 0x00, 0x01, 0x09, 0x40, 0x00, 0x07, 0x0b, 0x01, 0x00, 419 0x01, 0x21, 0x21, 0x01, 0x00, 0x0c, 0x8d, 0xe2, 0x00, 0x08, 0x0d, 0x03, 420 0x09, 0x8d, 0xc1, 0x07, 0x0a, 0x01, 0x84, 0x4d, 0x4d, 0xa8, 0x9e, 0xf4, 421 0xb3, 0xdb, 0x12, 0xed, 0x64, 0x40, 0x00, 0x00, 0x05, 0x03, 0x19, 0x0d, 422 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 423 0x00, 0x11, 0x55, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 424 0x20, 0x00, 0x32, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 425 0x00, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x31, 0x00, 426 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 427 0x6f, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x65, 0x00, 0x72, 0x00, 0x2f, 0x00, 428 0x63, 0x00, 0x68, 0x00, 0x6d, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x20, 0x00, 429 0x36, 0x00, 0x36, 0x00, 0x36, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 430 0x74, 0x00, 0x00, 0x00, 0x14, 0x1a, 0x01, 0x00, 0x80, 0x96, 0x9f, 0xd5, 431 0xc8, 0x53, 0xd8, 0x01, 0x80, 0x50, 0x82, 0x4f, 0xc6, 0x53, 0xd8, 0x01, 432 0x00, 0xff, 0x13, 0x13, 0xb7, 0x52, 0xd8, 0x01, 0x15, 0x0e, 0x01, 0x00, 433 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xb6, 0x81, 434 0x00, 0x00 435 ]; 436 437 auto algo = DecompressLzma(LzmaFormat.rawLegacy); 438 439 const dataOut = only(dataIn) 440 .squizMaxOut(algo, expectedDataOut.length) 441 .join(); 442 443 assert(dataOut == expectedDataOut); 444 } 445 446 /// Copy algorithm do not transform data at all 447 /// This is useful in cases of reading/writing data 448 /// that may or may not be compressed. Using Copy 449 /// allows that the same code handles both kind of streams. 450 final class CopyStream : SquizStream 451 { 452 private const(ubyte)[] _inp; 453 size_t _totalIn; 454 private ubyte[] _outp; 455 size_t _totalOut; 456 457 @property const(ubyte)[] input() const @safe 458 { 459 return _inp; 460 } 461 462 @property void input(const(ubyte)[] inp) @safe 463 { 464 _inp = inp; 465 } 466 467 @property size_t totalInput() const @safe 468 { 469 return _totalIn; 470 } 471 472 @property inout(ubyte)[] output() inout @safe 473 { 474 return _outp; 475 } 476 477 @property void output(ubyte[] outp) @safe 478 { 479 _outp = outp; 480 } 481 482 @property size_t totalOutput() const @safe 483 { 484 return _totalOut; 485 } 486 } 487 488 /// ditto 489 struct Copy 490 { 491 static assert(isSquizAlgo!Copy); 492 493 CopyStream initialize() @safe 494 { 495 return new CopyStream; 496 } 497 498 Flag!"streamEnded" process(CopyStream stream, Flag!"lastChunk" lastChunk) @safe 499 { 500 import std.algorithm : min; 501 502 const len = min(stream._inp.length, stream._outp.length); 503 504 stream._outp[0 .. len] = stream._inp[0 .. len]; 505 506 stream._inp = stream._inp[len .. $]; 507 stream._outp = stream._outp[len .. $]; 508 stream._totalIn += len; 509 stream._totalOut += len; 510 511 return cast(Flag!"streamEnded")(lastChunk && stream._inp.length == 0); 512 } 513 514 void reset(CopyStream stream) @safe 515 { 516 stream._inp = null; 517 stream._outp = null; 518 stream._totalIn = 0; 519 stream._totalOut = 0; 520 } 521 522 void end(CopyStream) @safe 523 { 524 } 525 } 526 527 /// ditto 528 auto copy(I)(I input, size_t chunkSize = defaultChunkSize) 529 { 530 return squiz(input, Copy.init, chunkSize); 531 } 532 533 /// 534 @("Copy") 535 unittest 536 { 537 import test.util : generateRepetitiveData; 538 import std.array : join; 539 540 const len = 10_000; 541 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 542 const input = generateRepetitiveData(len, phrase).join(); 543 544 /// copying with arbitrary chunk sizes on input and output 545 const cop1 = generateRepetitiveData(len, phrase, 1231).copy(234).join(); 546 const cop2 = generateRepetitiveData(len, phrase, 296).copy(6712).join(); 547 548 assert(input == cop1); 549 assert(input == cop2); 550 } 551 552 /// Describe what type of header and trailer are wrapping 553 /// a deflated stream. 554 enum ZlibFormat 555 { 556 /// Zlib header and trailer 557 zlib, 558 /// Gzip header and trailer 559 gz, 560 /// Auto detection of Zlib or Gzip format (only used with Inflate) 561 autoDetect, 562 /// No header and trailer, therefore no integrity check included. 563 /// This to be used in other formats such as Zip. 564 /// When using raw, it is advised to use an external integrity check. 565 raw, 566 } 567 568 private size_t strnlen(const(byte)* str, size_t maxlen) @system 569 { 570 if (!str) 571 return 0; 572 573 size_t l; 574 while (*str != 0 && l < maxlen) 575 { 576 str++; 577 l++; 578 } 579 return l; 580 } 581 582 @("strnlen") 583 unittest 584 { 585 assert(strnlen(null, 0) == 0); 586 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 15) == 10); 587 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 10) == 10); 588 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 9) == 9); 589 assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 0) == 0); 590 assert(strnlen(cast(const(byte)*)("\0bcdefghij\0klmn".ptr), 15) == 0); 591 } 592 593 /// Header data for the Gzip format. 594 /// Gzip includes metadata about the file which is compressed. 595 /// These can be specified here when compressing from a stream 596 /// rather than directly from a file. 597 struct GzHeader 598 { 599 import core.stdc.config : c_ulong; 600 601 /// operating system encoded in the Gz header 602 /// Not all possible values are listed here, only 603 /// the most useful ones 604 enum Os 605 { 606 fatFs = 0, 607 unix = 3, 608 macintosh = 7, 609 ntFs = 11, 610 unknown = 255, 611 } 612 613 version (OSX) 614 enum defaultOs = Os.macintosh; 615 else version (iOS) 616 enum defaultOs = Os.macintosh; 617 else version (Posix) 618 enum defaultOs = Os.unix; 619 else version (Windows) 620 enum defaultOs = Os.ntFs; 621 622 /// Whether the content is believed to be text 623 Flag!"text" text; 624 625 // storing in unix format to avoid 626 // negative numbers with SysTime.init 627 private c_ulong _mtime; 628 629 /// Modification time 630 @property SysTime mtime() const @safe 631 { 632 return SysTime(unixTimeToStdTime(_mtime)); 633 } 634 635 /// ditto 636 @property void mtime(SysTime time) @safe 637 { 638 _mtime = stdTimeToUnixTime(time.stdTime); 639 } 640 641 /// Operating system that wrote the gz file 642 Os os = defaultOs; 643 644 /// Filename to be included in the header 645 string filename; 646 647 /// Comment to be included in the header 648 string comment; 649 650 private enum bufSize = 256; 651 652 private string fromLatin1z(const(byte)* ptr) @system 653 { 654 // ptr points to a buffer of bufSize characters. 655 // End of string is a null character or end of buffer. 656 // Encoding is latin 1. 657 import std.encoding : Latin1Char, transcode; 658 659 const len = strnlen(ptr, bufSize); 660 auto str = cast(const(Latin1Char)[]) ptr[0 .. len]; 661 662 string res; 663 transcode(str, res); 664 return res; 665 } 666 667 private byte* toLatin1z(string str) @trusted 668 { 669 import std.encoding : Latin1Char, transcode; 670 671 Latin1Char[] l1; 672 transcode(str, l1); 673 auto res = (cast(byte[]) l1) ~ 0; 674 return res.ptr; 675 } 676 677 private this(gz_headerp gzh) @system 678 { 679 text = gzh.text ? Yes.text : No.text; 680 _mtime = gzh.time; 681 os = cast(Os) gzh.os; 682 if (gzh.name) 683 filename = fromLatin1z(gzh.name); 684 if (gzh.comment) 685 comment = fromLatin1z(gzh.comment); 686 } 687 688 private gz_headerp toZlib() @safe 689 { 690 import core.stdc.config : c_long; 691 692 auto gzh = new gz_header; 693 gzh.text = text ? 1 : 0; 694 gzh.time = _mtime; 695 gzh.os = cast(int) os; 696 if (filename) 697 gzh.name = toLatin1z(filename); 698 if (comment) 699 gzh.comment = toLatin1z(comment); 700 return gzh; 701 } 702 } 703 704 /// Type of delegate to use as callback for Inflate.gzHeaderDg 705 alias GzHeaderDg = void delegate(GzHeader header); 706 707 /// Helper to set GzHeader.text 708 /// Will check if the data are all ascii characters 709 Flag!"text" isText(const(ubyte)[] data) 710 { 711 import std.algorithm : all; 712 713 return cast(Flag!"text") data.all!( 714 c => c == 0x0a || c == 0x0d || (c >= 0x20 && c <= 0x7e) 715 ); 716 } 717 718 class ZlibStream : SquizStream 719 { 720 mixin ZlibLikeStreamImpl!z_stream; 721 mixin ZlibLikeTotalInOutImpl!(); 722 723 private this() @safe 724 { 725 strm.zalloc = &(gcAlloc!uint); 726 strm.zfree = &gcFree; 727 } 728 } 729 730 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 731 /// The produced stream of data is wrapped by Zlib header and trailer. 732 auto deflate(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 733 { 734 return squiz(input, Deflate.init, chunkSize); 735 } 736 737 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 738 /// The produced stream of data is wrapped by Gzip header and trailer. 739 /// Suppliying a header is entirely optional. Zlib produces a default header if not supplied. 740 /// The default header has text false, mtime zero, unknown os, and 741 /// no name or comment. 742 auto deflateGz(I)(I input, GzHeader header, size_t chunkSize = defaultChunkSize) 743 if (isByteRange!I) 744 { 745 auto algo = Deflate.init; 746 algo.format = ZlibFormat.gz; 747 algo.gzHeader = header; 748 return squiz(input, algo, chunkSize); 749 } 750 751 /// ditto 752 auto deflateGz(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 753 { 754 auto algo = Deflate.init; 755 algo.format = ZlibFormat.gz; 756 return squiz(input, algo, chunkSize); 757 } 758 759 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm. 760 /// The produced stream of data isn't wrapped by any header or trailer. 761 auto deflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I) 762 { 763 auto algo = Deflate.init; 764 algo.format = ZlibFormat.raw; 765 return squiz(input, algo, chunkSize); 766 } 767 768 /// Zlib's deflate algorithm 769 struct Deflate 770 { 771 static assert(isSquizAlgo!Deflate); 772 static assert(is(StreamType!Deflate == Stream)); 773 774 /// Which format to use for the deflated stream. 775 /// In case ZlibFormat.gz, the gzHeader field will be used if supplied, 776 /// other wise default values will be used. 777 ZlibFormat format; 778 779 /// Compression level from 1 (fastest) to 9 (best compression). 780 int level = 6; 781 782 /// The GzHeader to be used with ZlibFormat.gz. 783 Nullable!GzHeader gzHeader; 784 785 /// Advanced parameters 786 /// See zlib's documentation of `deflateInit2`. 787 /// windowBits must be between 9 and 15 included 788 /// and is adjusted according chosen format. 789 int windowBits = 15; 790 /// ditto 791 int memLevel = 8; 792 /// ditto 793 int strategy = Z_DEFAULT_STRATEGY; 794 795 static final class Stream : ZlibStream 796 { 797 } 798 799 Stream initialize() @safe 800 { 801 assert( 802 9 <= windowBits && windowBits <= 15, 803 "inconsistent windowBits" 804 ); 805 int wb = windowBits; 806 final switch (format) 807 { 808 case ZlibFormat.zlib: 809 break; 810 case ZlibFormat.gz: 811 wb += 16; 812 break; 813 case ZlibFormat.autoDetect: 814 throw new Exception("invalid ZlibFormat for Deflate"); 815 case ZlibFormat.raw: 816 wb = -wb; 817 break; 818 } 819 820 auto stream = new Stream(); 821 822 const res = (() @trusted => deflateInit2( 823 &stream.strm, level, Z_DEFLATED, 824 wb, memLevel, cast(int) strategy, 825 ))(); 826 827 enforce( 828 res == Z_OK, 829 "Could not initialize Zlib deflate stream: " ~ zResultToString(res) 830 ); 831 832 if (format == ZlibFormat.gz && !gzHeader.isNull) 833 { 834 auto head = gzHeader.get.toZlib(); 835 (() @trusted => deflateSetHeader(&stream.strm, head))(); 836 } 837 838 return stream; 839 } 840 841 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 842 { 843 const flush = lastChunk ? Z_FINISH : Z_NO_FLUSH; 844 const res = (() @trusted => squiz_box.c.zlib.deflate(&stream.strm, flush))(); 845 846 enforce( 847 res == Z_OK || res == Z_STREAM_END, 848 "Zlib deflate failed with code: " ~ zResultToString(res) 849 ); 850 851 return cast(Flag!"streamEnded")(res == Z_STREAM_END); 852 } 853 854 void reset(Stream stream) @trusted 855 { 856 deflateReset(&stream.strm); 857 } 858 859 void end(Stream stream) @trusted 860 { 861 deflateEnd(&stream.strm); 862 } 863 } 864 865 /// Returns an InputRange streaming over data inflated with Zlib. 866 /// The input data must be deflated with a zlib format. 867 auto inflate(I)(I input, size_t chunkSize = defaultChunkSize) 868 { 869 return squiz(input, Inflate.init, chunkSize); 870 } 871 872 /// Returns an InputRange streaming over data inflated with Zlib. 873 /// The input data must be deflated with a gz format. 874 /// If headerDg is not null, it will be called 875 /// as soon as the header is read from the stream. 876 auto inflateGz(I)(I input, GzHeaderDg headerDg, size_t chunkSize = defaultChunkSize) 877 { 878 auto algo = Inflate.init; 879 algo.format = ZlibFormat.gz; 880 algo.gzHeaderDg = headerDg; 881 return squiz(input, algo, chunkSize); 882 } 883 884 /// ditto 885 auto inflateGz(I)(I input, size_t chunkSize = defaultChunkSize) 886 { 887 return inflateGz(input, null, chunkSize); 888 } 889 890 /// Returns an InputRange streaming over data inflated with Zlib. 891 /// The input must be raw deflated data 892 auto inflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) 893 { 894 auto algo = Inflate.init; 895 algo.format = ZlibFormat.raw; 896 return squiz(input, algo, chunkSize); 897 } 898 899 /// Zlib's inflate algorithm 900 struct Inflate 901 { 902 static assert(isSquizAlgo!Inflate); 903 904 /// Which format to use for the deflated stream. 905 /// In case ZlibFormat.gz, the gzHeader field will be written if set. 906 ZlibFormat format; 907 908 /// If set, will be assigned to the Gz header once it is known 909 GzHeaderDg gzHeaderDg; 910 911 /// Advanced parameters 912 /// See zlib's documentation of `deflateInit2`. 913 /// windowBits can be 0 if format is ZlibFormat.zlib. 914 /// Otherwise it must be between 9 and 15 included. 915 int windowBits = 15; 916 917 private static final class Gzh 918 { 919 private gz_header gzh; 920 private byte[GzHeader.bufSize] nameBuf; 921 private byte[GzHeader.bufSize] commentBuf; 922 923 private GzHeaderDg dg; 924 private bool dgCalled; 925 926 this(GzHeaderDg dg) @safe 927 { 928 gzh.name = &nameBuf[0]; 929 gzh.name_max = cast(uint) nameBuf.length; 930 gzh.comment = &commentBuf[0]; 931 gzh.comm_max = cast(uint) commentBuf.length; 932 933 this.dg = dg; 934 } 935 } 936 937 static final class Stream : ZlibStream 938 { 939 Gzh gzh; 940 } 941 942 Stream initialize() @safe 943 { 944 assert( 945 (windowBits == 0 && format == ZlibFormat.zlib) || 946 (9 <= windowBits && windowBits <= 15), 947 "inconsistent windowBits" 948 ); 949 int wb = windowBits; 950 final switch (format) 951 { 952 case ZlibFormat.zlib: 953 break; 954 case ZlibFormat.gz: 955 wb += 16; 956 break; 957 case ZlibFormat.autoDetect: 958 wb += 32; 959 break; 960 case ZlibFormat.raw: 961 wb = -wb; 962 break; 963 } 964 965 auto stream = new Stream(); 966 967 const res = (() @trusted => inflateInit2(&stream.strm, wb))(); 968 969 enforce( 970 res == Z_OK, 971 "Could not initialize Zlib's inflate stream: " ~ zResultToString(res) 972 ); 973 974 if (gzHeaderDg) 975 { 976 stream.gzh = new Gzh(gzHeaderDg); 977 (() @trusted => inflateGetHeader(&stream.strm, &stream.gzh.gzh))(); 978 } 979 980 return stream; 981 } 982 983 package Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" /+ lastChunk +/ ) 984 { 985 const res = (() @trusted => squiz_box.c.zlib.inflate(&stream.strm, Z_NO_FLUSH))(); 986 // 987 if (res == Z_DATA_ERROR) 988 throw new DataException("Improper data given to deflate"); 989 990 enforce( 991 res == Z_OK || res == Z_STREAM_END, 992 "Zlib inflate failed with code: " ~ zResultToString(res) 993 ); 994 995 auto gzh = stream.gzh; 996 if (gzh && !gzh.dgCalled && gzh.gzh.done) 997 { 998 auto head = (() @trusted => GzHeader(&gzh.gzh))(); 999 gzh.dg(head); 1000 gzh.dgCalled = true; 1001 } 1002 1003 return cast(Flag!"streamEnded")(res == Z_STREAM_END); 1004 } 1005 1006 package void reset(Stream stream) @trusted 1007 { 1008 inflateReset(&stream.strm); 1009 } 1010 1011 package void end(Stream stream) @trusted 1012 { 1013 inflateEnd(&stream.strm); 1014 } 1015 } 1016 1017 /// 1018 @("Deflate / Inflate") 1019 unittest 1020 { 1021 import test.util; 1022 import std.array : join; 1023 1024 auto def = Deflate.init; 1025 auto inf = Inflate.init; 1026 1027 const len = 100_000; 1028 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1029 const input = generateRepetitiveData(len, phrase).join(); 1030 1031 // deflating 1032 const squized = only(input).squiz(def).join(); 1033 1034 // re-inflating 1035 const output = only(squized).squiz(inf).join(); 1036 1037 assert(squized.length < input.length); 1038 assert(output == input); 1039 1040 // for such long and repetitive data, ratio is around 0.3% 1041 const ratio = cast(double) squized.length / cast(double) input.length; 1042 assert(ratio < 0.004); 1043 } 1044 1045 /// 1046 @("Deflate / Inflate in Gz format and custom header") 1047 unittest 1048 { 1049 import test.util; 1050 import std.array : join; 1051 1052 const len = 100_000; 1053 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1054 const input = generateRepetitiveData(len, phrase).join(); 1055 1056 GzHeader inHead; 1057 inHead.mtime = Clock.currTime; 1058 inHead.os = GzHeader.Os.fatFs; 1059 inHead.text = Yes.text; 1060 inHead.filename = "boring.txt"; 1061 inHead.comment = "A very boring file"; 1062 1063 // deflating 1064 const squized = only(input) 1065 .deflateGz(inHead) 1066 .join(); 1067 1068 // re-inflating 1069 GzHeader outHead; 1070 int numCalls; 1071 void setOutHead(GzHeader gzh) 1072 { 1073 outHead = gzh; 1074 numCalls++; 1075 } 1076 1077 const output = only(squized) 1078 .inflateGz(&setOutHead) 1079 .join(); 1080 1081 assert(squized.length < input.length); 1082 assert(output == input); 1083 assert(inHead == outHead); 1084 assert(numCalls == 1); 1085 } 1086 1087 /// 1088 @("Deflate / Inflate in raw format") 1089 unittest 1090 { 1091 import test.util; 1092 import std.array : join; 1093 1094 const len = 100_000; 1095 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1096 const input = generateRepetitiveData(len, phrase).join(); 1097 1098 // deflating 1099 const squized = only(input) 1100 .deflateRaw() 1101 .join(); 1102 1103 // re-inflating 1104 const output = only(squized) 1105 .inflateRaw() 1106 .join(); 1107 1108 assert(squized.length < input.length); 1109 assert(output == input); 1110 } 1111 1112 package string zResultToString(int res) @safe pure nothrow @nogc 1113 { 1114 switch (res) 1115 { 1116 case Z_OK: 1117 return "OK"; 1118 case Z_STREAM_END: 1119 return "STREAM_END"; 1120 case Z_NEED_DICT: 1121 return "NEED_DICT"; 1122 case Z_ERRNO: 1123 return "ERRNO"; 1124 case Z_STREAM_ERROR: 1125 return "STREAM_ERROR"; 1126 case Z_DATA_ERROR: 1127 return "DATA_ERROR"; 1128 case Z_MEM_ERROR: 1129 return "MEM_ERROR"; 1130 case Z_BUF_ERROR: 1131 return "BUF_ERROR"; 1132 case Z_VERSION_ERROR: 1133 return "VERSION_ERROR"; 1134 default: 1135 return "(Unknown result)"; 1136 } 1137 } 1138 1139 package string zFlushToString(int flush) @safe pure nothrow @nogc 1140 { 1141 switch (flush) 1142 { 1143 case Z_NO_FLUSH: 1144 return "NO_FLUSH"; 1145 case Z_PARTIAL_FLUSH: 1146 return "PARTIAL_FLUSH"; 1147 case Z_SYNC_FLUSH: 1148 return "SYNC_FLUSH"; 1149 case Z_FULL_FLUSH: 1150 return "FULL_FLUSH"; 1151 case Z_FINISH: 1152 return "FINISH"; 1153 case Z_BLOCK: 1154 return "BLOCK"; 1155 case Z_TREES: 1156 return "TREES"; 1157 default: 1158 return "(Unknown flush)"; 1159 } 1160 } 1161 1162 /// Returns an InputRange containing the input data processed through Bzip2 compression. 1163 auto compressBzip2(I)(I input, size_t chunkSize = defaultChunkSize) 1164 if (isByteRange!I) 1165 { 1166 return squiz(input, CompressBzip2.init, chunkSize); 1167 } 1168 1169 final class Bz2Stream : SquizStream 1170 { 1171 mixin ZlibLikeStreamImpl!(bz_stream); 1172 1173 @property size_t totalInput() const @safe 1174 { 1175 ulong hi = strm.total_in_hi32; 1176 return cast(size_t)( 1177 (hi << 32) | strm.total_in_lo32 1178 ); 1179 } 1180 1181 @property size_t totalOutput() const @safe 1182 { 1183 ulong hi = strm.total_out_hi32; 1184 return cast(size_t)( 1185 (hi << 32) | strm.total_out_lo32 1186 ); 1187 } 1188 1189 this() @safe 1190 { 1191 strm.bzalloc = &(gcAlloc!int); 1192 strm.bzfree = &gcFree; 1193 } 1194 } 1195 1196 /// Compression with the Bzip2 algorithm. 1197 /// 1198 /// Although having better compression capabilities than Zlib (deflate), 1199 /// Bzip2 has poor latenty when it comes to streaming. 1200 /// I.e. it can swallow several Mb of data before starting to produce output. 1201 /// If streaming latenty is an important factor, deflate/inflate 1202 /// should be the favorite algorithm. 1203 /// 1204 /// This algorithm does not support resource reuse, so calling reset 1205 /// is equivalent to a call to end followed by initialize. 1206 /// (but the same instance of stream is kept). 1207 struct CompressBzip2 1208 { 1209 static assert(isSquizAlgo!CompressBzip2); 1210 1211 /// Advanced Bzip2 parameters 1212 /// See Bzip2 documentation 1213 /// https://www.sourceware.org/bzip2/manual/manual.html#bzcompress-init 1214 int blockSize100k = 9; 1215 /// ditto 1216 int verbosity = 0; 1217 /// ditto 1218 int workFactor = 30; 1219 1220 alias Stream = Bz2Stream; 1221 1222 Stream initialize() @safe 1223 { 1224 auto stream = new Stream; 1225 1226 const res = (() @trusted => BZ2_bzCompressInit( 1227 &stream.strm, blockSize100k, verbosity, workFactor, 1228 ))(); 1229 enforce( 1230 res == BZ_OK, 1231 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res) 1232 ); 1233 return stream; 1234 } 1235 1236 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1237 { 1238 const action = lastChunk ? BZ_FINISH : BZ_RUN; 1239 const res = (() @trusted => BZ2_bzCompress(&stream.strm, action))(); 1240 1241 if (res == BZ_STREAM_END) 1242 return Yes.streamEnded; 1243 1244 enforce( 1245 (action == BZ_RUN && res == BZ_RUN_OK) || 1246 (action == BZ_FINISH && res == BZ_FINISH_OK), 1247 "Bzip2 compress failed with code: " ~ bzResultToString(res) 1248 ); 1249 1250 return No.streamEnded; 1251 } 1252 1253 void reset(Stream stream) @safe 1254 { 1255 (() @trusted => BZ2_bzCompressEnd(&stream.strm))(); 1256 1257 stream.strm = bz_stream.init; 1258 stream.strm.bzalloc = &(gcAlloc!int); 1259 stream.strm.bzfree = &gcFree; 1260 1261 const res = (() @trusted => BZ2_bzCompressInit( 1262 &stream.strm, blockSize100k, verbosity, workFactor, 1263 ))(); 1264 enforce( 1265 res == BZ_OK, 1266 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res) 1267 ); 1268 } 1269 1270 void end(Stream stream) @trusted 1271 { 1272 BZ2_bzCompressEnd(&stream.strm); 1273 } 1274 } 1275 1276 /// Returns an InputRange streaming over data decompressed with Bzip2. 1277 auto decompressBzip2(I)(I input, size_t chunkSize = defaultChunkSize) 1278 if (isByteRange!I) 1279 { 1280 return squiz(input, DecompressBzip2.init, chunkSize); 1281 } 1282 1283 /// Decompression of data encoded with Bzip2. 1284 /// 1285 /// This algorithm does not support resource reuse, so calling reset 1286 /// is equivalent to a call to end followed by initialize. 1287 /// (but the same instance of stream is kept). 1288 struct DecompressBzip2 1289 { 1290 static assert(isSquizAlgo!DecompressBzip2); 1291 1292 /// Advanced Bzip2 parameters 1293 /// See Bzip2 documentation 1294 /// https://www.sourceware.org/bzip2/manual/manual.html#bzDecompress-init 1295 int verbosity; 1296 /// ditto 1297 bool small; 1298 1299 alias Stream = Bz2Stream; 1300 1301 Stream initialize() @safe 1302 { 1303 auto stream = new Stream; 1304 1305 const res = (() @trusted => BZ2_bzDecompressInit( 1306 &stream.strm, verbosity, small ? 1 : 0, 1307 ))(); 1308 enforce( 1309 res == BZ_OK, 1310 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res) 1311 ); 1312 return stream; 1313 } 1314 1315 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe 1316 { 1317 const res = (() @trusted => BZ2_bzDecompress(&stream.strm))(); 1318 1319 if (res == BZ_DATA_ERROR) 1320 throw new DataException("Input data was not compressed with Bzip2"); 1321 1322 enforce( 1323 res == BZ_OK || res == BZ_STREAM_END, 1324 "Bzip2 decompress failed with code: " ~ bzResultToString(res) 1325 ); 1326 1327 return cast(Flag!"streamEnded")(res == BZ_STREAM_END); 1328 } 1329 1330 void reset(Stream stream) @safe 1331 { 1332 (() @trusted => BZ2_bzDecompressEnd(&stream.strm))(); 1333 1334 stream.strm = bz_stream.init; 1335 stream.strm.bzalloc = &(gcAlloc!int); 1336 stream.strm.bzfree = &gcFree; 1337 1338 const res = (() @trusted => BZ2_bzDecompressInit( 1339 &stream.strm, verbosity, small ? 1 : 0, 1340 ))(); 1341 enforce( 1342 res == BZ_OK, 1343 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res) 1344 ); 1345 } 1346 1347 void end(Stream stream) @trusted 1348 { 1349 BZ2_bzDecompressEnd(&stream.strm); 1350 } 1351 } 1352 1353 /// 1354 @("Compress / Decompress Bzip2") 1355 unittest 1356 { 1357 import test.util; 1358 import std.array : join; 1359 1360 const len = 100_000; 1361 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1362 const input = generateRepetitiveData(len, phrase).join(); 1363 1364 const squized = only(input) 1365 .compressBzip2() 1366 .join(); 1367 1368 const output = only(squized) 1369 .decompressBzip2() 1370 .join(); 1371 1372 assert(squized.length < input.length); 1373 assert(output == input); 1374 1375 // for such long and repetitive data, ratio is around 0.12% 1376 const ratio = cast(double) squized.length / cast(double) input.length; 1377 assert(ratio < 0.002); 1378 } 1379 1380 private string bzActionToString(int action) @safe pure nothrow @nogc 1381 { 1382 switch (action) 1383 { 1384 case BZ_RUN: 1385 return "RUN"; 1386 case BZ_FLUSH: 1387 return "FLUSH"; 1388 case BZ_FINISH: 1389 return "FINISH"; 1390 default: 1391 return "(Unknown result)"; 1392 } 1393 } 1394 1395 private string bzResultToString(int res) @safe pure nothrow @nogc 1396 { 1397 switch (res) 1398 { 1399 case BZ_OK: 1400 return "OK"; 1401 case BZ_RUN_OK: 1402 return "RUN_OK"; 1403 case BZ_FLUSH_OK: 1404 return "FLUSH_OK"; 1405 case BZ_FINISH_OK: 1406 return "FINISH_OK"; 1407 case BZ_STREAM_END: 1408 return "STREAM_END"; 1409 case BZ_SEQUENCE_ERROR: 1410 return "SEQUENCE_ERROR"; 1411 case BZ_PARAM_ERROR: 1412 return "PARAM_ERROR"; 1413 case BZ_MEM_ERROR: 1414 return "MEM_ERROR"; 1415 case BZ_DATA_ERROR: 1416 return "DATA_ERROR"; 1417 case BZ_DATA_ERROR_MAGIC: 1418 return "DATA_ERROR_MAGIC"; 1419 case BZ_IO_ERROR: 1420 return "IO_ERROR"; 1421 case BZ_UNEXPECTED_EOF: 1422 return "UNEXPECTED_EOF"; 1423 case BZ_OUTBUFF_FULL: 1424 return "OUTBUFF_FULL"; 1425 case BZ_CONFIG_ERROR: 1426 return "CONFIG_ERROR"; 1427 default: 1428 return "(Unknown result)"; 1429 } 1430 } 1431 1432 final class LzmaStream : SquizStream 1433 { 1434 mixin ZlibLikeStreamImpl!(lzma_stream); 1435 mixin ZlibLikeTotalInOutImpl!(); 1436 1437 private lzma_allocator alloc; 1438 private lzma_options_delta optsDelta; 1439 private lzma_options_lzma optsLzma; 1440 private lzma_filter[] filterChain; 1441 1442 this() @safe 1443 { 1444 alloc.alloc = &(gcAlloc!size_t); 1445 alloc.free = &gcFree; 1446 strm.allocator = &alloc; 1447 } 1448 1449 private lzma_filter[] buildFilterChain(LzmaFormat format, LzmaFilter[] filters, 1450 uint preset, uint deltaDist) @safe 1451 { 1452 lzma_filter[] res; 1453 foreach (f; filters) 1454 { 1455 final switch (f) 1456 { 1457 case LzmaFilter.delta: 1458 optsDelta.dist = deltaDist; 1459 res ~= lzma_filter(LZMA_FILTER_DELTA, cast(void*)&optsDelta); 1460 break; 1461 case LzmaFilter.bcjX86: 1462 res ~= lzma_filter(LZMA_FILTER_X86, null); 1463 break; 1464 case LzmaFilter.bcjPowerPc: 1465 res ~= lzma_filter(LZMA_FILTER_POWERPC, null); 1466 break; 1467 case LzmaFilter.bcjIa64: 1468 res ~= lzma_filter(LZMA_FILTER_IA64, null); 1469 break; 1470 case LzmaFilter.bcjArm: 1471 res ~= lzma_filter(LZMA_FILTER_ARM, null); 1472 break; 1473 case LzmaFilter.bcjArmThumb: 1474 res ~= lzma_filter(LZMA_FILTER_ARMTHUMB, null); 1475 break; 1476 case LzmaFilter.bcjSparc: 1477 res ~= lzma_filter(LZMA_FILTER_SPARC, null); 1478 break; 1479 } 1480 } 1481 1482 enforce(res.length <= 3, "Too many filters supplied"); 1483 1484 if (format != LzmaFormat.rawCopy) 1485 { 1486 (() @trusted => lzma_lzma_preset(&optsLzma, preset))(); 1487 const compFilter = format.isLegacy ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; 1488 res ~= lzma_filter(compFilter, cast(void*)&optsLzma); 1489 } 1490 1491 res ~= lzma_filter(LZMA_VLI_UNKNOWN, null); // end marker 1492 1493 filterChain = res; 1494 return res; 1495 } 1496 1497 } 1498 1499 /// Header/trailer format for Lzma compression 1500 enum LzmaFormat 1501 { 1502 /// Lzma with Xz format, suitable to write *.xz files 1503 xz, 1504 /// LZMA1 encoding and format, suitable for legacy *.lzma files 1505 /// This format doesn't support filters. 1506 legacy, 1507 /// Raw LZMA2 compression, without header/trailer. 1508 /// Use this to include compressed LZMA data in 1509 /// a container defined externally (e.g. this is used 1510 /// for the *.7z archives) 1511 raw, 1512 /// Raw LZMA1 compression, without header/trailer. 1513 /// This one is still found in some *.7z files. 1514 rawLegacy, 1515 /// Just copy bytes out. 1516 /// You may use this in combination with a filter to observe its 1517 /// effect, but has otherwise no use. 1518 rawCopy, 1519 } 1520 1521 /// Whether this is a legacy format 1522 bool isLegacy(LzmaFormat format) @safe pure nothrow @nogc 1523 { 1524 return format == LzmaFormat.legacy || format == LzmaFormat.rawLegacy; 1525 } 1526 1527 /// Whether this is a raw format 1528 bool isRaw(LzmaFormat format) @safe pure nothrow @nogc 1529 { 1530 return cast(int) format >= cast(int) LzmaFormat.raw; 1531 } 1532 1533 /// Filters to use with the LZMA compression. 1534 /// 1535 /// Up to 3 filters can be used from this list. 1536 /// These filters transform the input to increase 1537 /// redundancy of the data supplied to the LZMA compression. 1538 enum LzmaFilter 1539 { 1540 /// Delta filter, which store differences between bytes 1541 /// to produce more repetitive data in some circumstances. 1542 /// Works with `deltaDist` parameter of `CompressLzma`. 1543 delta, 1544 1545 /// BCJ (Branch/Call/Jump) filters aim optimize machine code 1546 /// compression by converting relative branches, calls and jumps 1547 /// to absolute addresses. This increases redundancy and can be 1548 /// exploited by the LZMA compression. 1549 /// 1550 /// BCJ filters are available for a set of CPU architectures. 1551 /// Use one (or two) of them when compressing compiled binaries. 1552 bcjX86, 1553 /// ditto 1554 bcjPowerPc, 1555 /// ditto 1556 bcjIa64, 1557 /// ditto 1558 bcjArm, 1559 /// ditto 1560 bcjArmThumb, 1561 /// ditto 1562 bcjSparc, 1563 } 1564 1565 /// Integrity check to include in the compressed data 1566 /// (only for the Xz format) 1567 /// Default for xz is CRC-64. 1568 enum LzmaCheck 1569 { 1570 /// No integrity check included 1571 none, 1572 /// CRC-32 integrity check 1573 crc32, 1574 /// CRC-64 integrity check 1575 crc64, 1576 /// SHA-256 integrity check 1577 sha256, 1578 } 1579 1580 private lzma_check toLzma(LzmaCheck check) @safe pure nothrow @nogc 1581 { 1582 final switch (check) 1583 { 1584 case LzmaCheck.none: 1585 return lzma_check.NONE; 1586 case LzmaCheck.crc32: 1587 return lzma_check.CRC32; 1588 case LzmaCheck.crc64: 1589 return lzma_check.CRC64; 1590 case LzmaCheck.sha256: 1591 return lzma_check.SHA256; 1592 } 1593 } 1594 1595 auto compressXz(I)(I input, size_t chunkSize = defaultChunkSize) 1596 { 1597 return squiz(input, CompressLzma.init, chunkSize); 1598 } 1599 1600 auto compressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize) 1601 { 1602 CompressLzma algo; 1603 algo.format = LzmaFormat.raw; 1604 return squiz(input, algo, chunkSize); 1605 } 1606 1607 struct CompressLzma 1608 { 1609 import std.conv : to; 1610 1611 static assert(isSquizAlgo!CompressLzma); 1612 1613 /// The format of the compressed stream 1614 LzmaFormat format; 1615 1616 /// The integrity check to include in compressed stream. 1617 /// Only used with XZ format. 1618 LzmaCheck check = LzmaCheck.crc64; 1619 1620 /// The compression preset between 0 (fast) to 9 (higher compression). 1621 /// The default is 6. 1622 uint preset = 6; 1623 1624 /// Makes the encoding significantly slower for marginal compression 1625 /// improvement. Only useful if you don't mind about CPU time at all. 1626 Flag!"extreme" extreme; 1627 1628 /// Filters to include in the encoding. 1629 /// Maximum three filters can be provided. 1630 /// For most input, no filtering is necessary. 1631 LzmaFilter[] filters; 1632 1633 /// Number of bytes between 1 and 256 to use for the Delta filter. 1634 /// For example for 16bit PCM stero audio, you should use 4. 1635 /// For RGB data 8bit per channel, you should use 3. 1636 uint deltaDist; 1637 1638 alias Stream = LzmaStream; 1639 1640 private void initStream(Stream stream) @trusted 1641 { 1642 uint pres = preset; 1643 if (extreme) 1644 pres |= LZMA_PRESET_EXTREME; 1645 1646 lzma_ret res; 1647 final switch (format) 1648 { 1649 case LzmaFormat.xz: 1650 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1651 res = lzma_stream_encoder(&stream.strm, chain.ptr, check.toLzma()); 1652 break; 1653 case LzmaFormat.legacy: 1654 enforce(filters.length == 0, "Filters are not supported with the legacy format"); 1655 lzma_lzma_preset(&stream.optsLzma, preset); 1656 res = lzma_alone_encoder(&stream.strm, &stream.optsLzma); 1657 break; 1658 case LzmaFormat.raw: 1659 case LzmaFormat.rawLegacy: 1660 case LzmaFormat.rawCopy: 1661 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1662 res = lzma_raw_encoder(&stream.strm, chain.ptr); 1663 break; 1664 } 1665 1666 enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string); 1667 } 1668 1669 Stream initialize() @safe 1670 { 1671 auto stream = new LzmaStream; 1672 initStream(stream); 1673 return stream; 1674 } 1675 1676 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1677 { 1678 return lzmaCode(stream, lastChunk); 1679 } 1680 1681 void reset(Stream stream) @safe 1682 { 1683 // Lzma supports reset out of the box by recalling initialization 1684 // function without calling lzma_end. 1685 1686 initStream(stream); 1687 } 1688 1689 void end(Stream stream) @trusted 1690 { 1691 lzma_end(&stream.strm); 1692 } 1693 } 1694 1695 auto decompressXz(I)(I input, size_t chunkSize = defaultChunkSize) 1696 { 1697 return squiz(input, DecompressLzma.init, chunkSize); 1698 } 1699 1700 auto decompressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize) 1701 { 1702 DecompressLzma algo; 1703 algo.format = LzmaFormat.raw; 1704 return squiz(input, algo, chunkSize); 1705 } 1706 1707 struct DecompressLzma 1708 { 1709 import std.conv : to; 1710 1711 static assert(isSquizAlgo!DecompressLzma); 1712 1713 /// The format of the compressed stream 1714 LzmaFormat format; 1715 1716 /// The memory usage limit in bytes. 1717 /// by default no limit is enforced 1718 size_t memLimit = size_t.max; 1719 1720 /// Parameters for the raw decompression. 1721 /// They are the same than for the compression. 1722 /// As there is no header to tell Lzma what filters were used during 1723 /// compression, it is the responsibility of the programmer to 1724 /// correctly ensure that the same options are used for decompression. 1725 /// All these options are ignored when decompressing .xz stream. 1726 uint preset = 6; 1727 /// ditto 1728 Flag!"extreme" extreme; 1729 /// ditto 1730 LzmaFilter[] filters; 1731 /// ditto 1732 uint deltaDist; 1733 1734 alias Stream = LzmaStream; 1735 1736 this(LzmaFormat format) @safe 1737 { 1738 this.format = format; 1739 } 1740 1741 /// convenience constructor to copy parameters of the compression 1742 /// for the decompression. Especially useful for the raw decompression, 1743 /// to ensure that the parameters fit the ones used for compression. 1744 this(CompressLzma compress) @safe 1745 { 1746 format = compress.format; 1747 preset = compress.preset; 1748 extreme = compress.extreme; 1749 filters = compress.filters; 1750 deltaDist = compress.deltaDist; 1751 } 1752 1753 private void initStream(Stream stream) @trusted 1754 { 1755 ulong memlim = memLimit; 1756 if (memLimit == size_t.max) 1757 memlim = ulong.max; 1758 1759 lzma_ret res; 1760 1761 final switch (format) 1762 { 1763 case LzmaFormat.xz: 1764 res = lzma_stream_decoder(&stream.strm, memlim, 0); 1765 break; 1766 case LzmaFormat.legacy: 1767 res = lzma_alone_decoder(&stream.strm, memlim); 1768 break; 1769 case LzmaFormat.raw: 1770 case LzmaFormat.rawLegacy: 1771 case LzmaFormat.rawCopy: 1772 uint pres = preset; 1773 if (extreme) 1774 pres |= LZMA_PRESET_EXTREME; 1775 1776 const chain = stream.buildFilterChain(format, filters, pres, deltaDist); 1777 1778 res = lzma_raw_decoder(&stream.strm, chain.ptr); 1779 } 1780 enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string); 1781 } 1782 1783 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 1784 { 1785 return lzmaCode(stream, lastChunk); 1786 } 1787 1788 Stream initialize() @safe 1789 { 1790 auto stream = new LzmaStream; 1791 initStream(stream); 1792 return stream; 1793 } 1794 1795 void reset(Stream stream) @safe 1796 { 1797 // Lzma supports reset out of the box by recalling initialization 1798 // function without calling lzma_end. 1799 1800 initStream(stream); 1801 } 1802 1803 void end(Stream stream) @trusted 1804 { 1805 lzma_end(&stream.strm); 1806 } 1807 } 1808 1809 private Flag!"streamEnded" lzmaCode(LzmaStream stream, Flag!"lastChunk" lastChunk) @safe 1810 { 1811 import std.conv : to; 1812 1813 const action = lastChunk ? lzma_action.FINISH : lzma_action.RUN; 1814 const res = (() @trusted => lzma_code(&stream.strm, action))(); 1815 1816 enforce( 1817 res == lzma_ret.OK || res == lzma_ret.STREAM_END, 1818 "LZMA encoding failed with code: " ~ res.to!string 1819 ); 1820 1821 return cast(Flag!"streamEnded")(res == lzma_ret.STREAM_END); 1822 } 1823 1824 /// 1825 @("Compress / Decompress XZ") 1826 unittest 1827 { 1828 import test.util; 1829 import std.array : join; 1830 1831 const len = 100_000; 1832 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1833 const input = generateRepetitiveData(len, phrase).join(); 1834 1835 const squized = only(input) 1836 .compressXz() 1837 .join(); 1838 1839 const output = only(squized) 1840 .decompressXz() 1841 .join(); 1842 1843 assert(squized.length < input.length); 1844 assert(output == input); 1845 1846 // for such long and repetitive data, ratio is around 0.2% 1847 const ratio = cast(double) squized.length / cast(double) input.length; 1848 assert(ratio < 0.003); 1849 } 1850 1851 /// 1852 @("Integrity check XZ") 1853 unittest 1854 { 1855 import test.util; 1856 import std.array : join; 1857 1858 const len = 100_000; 1859 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1860 const input = generateRepetitiveData(len, phrase).join(); 1861 1862 auto squized = only(input) 1863 .compressXz() 1864 .join() 1865 .dup; // dup because const(ubyte)[] is returned 1866 1867 squized[squized.length / 2] += 1; 1868 1869 assertThrown( 1870 only(squized) 1871 .decompressXz() 1872 .join() 1873 ); 1874 } 1875 1876 /// 1877 @("Compress / Decompress XZ with filter") 1878 unittest 1879 { 1880 import test.util; 1881 import std.array : join; 1882 1883 const len = 100_000; 1884 const input = generateSequentialData(len, 1245, 27).join(); 1885 1886 const reference = only(input) 1887 .compressXz() 1888 .join(); 1889 1890 CompressLzma comp; 1891 comp.filters ~= LzmaFilter.delta; 1892 comp.deltaDist = 8; // sequential data of 8 byte integers 1893 1894 const withDelta = only(input) 1895 .squiz(comp) 1896 .join(); 1897 1898 const output = only(withDelta) 1899 .decompressXz() 1900 .join(); 1901 1902 assert(output == input); 1903 // < 20% compression without filter (sequential data is tough) 1904 // < 0.5% compression with delta (peace of cake) 1905 assert(input.length > reference.length * 5); 1906 assert(input.length > withDelta.length * 200); 1907 } 1908 1909 /// 1910 @("Compress / Decompress Lzma Raw") 1911 unittest 1912 { 1913 import test.util; 1914 import std.array : join; 1915 1916 const len = 100_000; 1917 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1918 const input = generateRepetitiveData(len, phrase).join(); 1919 1920 const reference = only(input) 1921 .compressXz() 1922 .join(); 1923 1924 const squized = only(input) 1925 .compressLzmaRaw() 1926 .join(); 1927 1928 const output = only(squized) 1929 .decompressLzmaRaw() 1930 .join(); 1931 1932 assert(output == input); 1933 assert(squized.length < input.length); 1934 assert(squized.length < reference.length); // win header/trailer space 1935 1936 // for such repetitive data, ratio is around 1.13% 1937 // also generally better than zlib, bzip2 struggles a lot for repetitive data 1938 const ratio = cast(double) squized.length / cast(double) input.length; 1939 assert(ratio < 0.003); 1940 } 1941 1942 /// 1943 @("Compress / Decompress Lzma Raw with filter") 1944 unittest 1945 { 1946 import test.util; 1947 import std.array : join; 1948 1949 const len = 100_000; 1950 const input = generateSequentialData(len, 1245, 27).join(); 1951 1952 const reference = only(input) 1953 .compressLzmaRaw() 1954 .join(); 1955 1956 CompressLzma comp; 1957 comp.format = LzmaFormat.raw; 1958 comp.filters ~= LzmaFilter.delta; 1959 comp.deltaDist = 8; // sequential data of 8 byte integers 1960 1961 const withDelta = only(input) 1962 .squiz(comp) 1963 .join(); 1964 1965 const output = only(withDelta) // using compression parameters for decompression 1966 .squiz(DecompressLzma(comp)) 1967 .join(); 1968 1969 assert(output == input); 1970 // < 20% compression without filter (sequential data is tough) 1971 // < 0.4% compression with delta (peace of cake) 1972 assert(input.length > reference.length * 5); 1973 assert(input.length > withDelta.length * 250); 1974 } 1975 1976 /// 1977 @("Compress / Decompress Lzma Legacy") 1978 unittest 1979 { 1980 import test.util; 1981 import std.array : join; 1982 1983 const len = 100_000; 1984 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 1985 const input = generateRepetitiveData(len, phrase).join(); 1986 1987 auto comp = CompressLzma(LzmaFormat.legacy); 1988 auto decomp = DecompressLzma(comp); 1989 1990 const squized = only(input) 1991 .squiz(comp) 1992 .join(); 1993 1994 const output = only(squized) 1995 .squiz(decomp) 1996 .join(); 1997 1998 assert(squized.length < input.length); 1999 assert(output == input); 2000 2001 // for such repetitive data, ratio is around 1.13% 2002 // also generally better than zlib, bzip2 struggles a lot for repetitive data 2003 const ratio = cast(double) squized.length / cast(double) input.length; 2004 assert(ratio < 0.003); 2005 } 2006 2007 /// 2008 @("Compress / Decompress Lzma Raw Legacy") 2009 unittest 2010 { 2011 import test.util; 2012 import std.array : join; 2013 2014 const len = 100_000; 2015 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 2016 const input = generateRepetitiveData(len, phrase).join(); 2017 2018 auto comp = CompressLzma(LzmaFormat.rawLegacy); 2019 auto decomp = DecompressLzma(comp); 2020 2021 const squized = only(input) 2022 .squiz(comp) 2023 .join(); 2024 2025 const output = only(squized) 2026 .squiz(decomp) 2027 .join(); 2028 2029 assert(squized.length < input.length); 2030 assert(output == input); 2031 2032 // for such repetitive data, ratio is around 1.13% 2033 // also generally better than zlib, bzip2 struggles a lot for repetitive data 2034 const ratio = cast(double) squized.length / cast(double) input.length; 2035 assert(ratio < 0.003); 2036 } 2037 2038 /// 2039 @("Compress / Decompress Lzma rawLegacy with filter") 2040 unittest 2041 { 2042 import test.util; 2043 import std.array : join; 2044 2045 const len = 100_000; 2046 const input = generateSequentialData(len, 1245, 27).join(); 2047 2048 const reference = only(input) 2049 .squiz(CompressLzma(LzmaFormat.legacy)) 2050 .join(); 2051 2052 CompressLzma comp; 2053 comp.format = LzmaFormat.rawLegacy; 2054 comp.filters ~= LzmaFilter.delta; 2055 comp.deltaDist = 8; // sequential data of 8 byte integers 2056 2057 auto decomp = DecompressLzma(comp); 2058 2059 const withDelta = only(input) 2060 .squiz(comp) 2061 .join(); 2062 2063 const output = only(withDelta) 2064 .squiz(decomp) 2065 .join(); 2066 2067 assert(output == input); 2068 // < 20% compression without filter (sequential data is tough) 2069 // < 0.4% compression with delta (peace of cake) 2070 assert(input.length > reference.length * 5); 2071 assert(input.length > withDelta.length * 250); 2072 } 2073 2074 auto compressZstd(I)(I input, size_t chunkSize = defaultChunkSize) 2075 { 2076 return squiz(input, CompressZstd.init, chunkSize); 2077 } 2078 2079 auto decompressZstd(I)(I input, size_t chunkSize = defaultChunkSize) 2080 { 2081 return squiz(input, DecompressZstd.init, chunkSize); 2082 } 2083 2084 class ZstdStream : SquizStream 2085 { 2086 private ZSTD_inBuffer inBuf; 2087 private ZSTD_outBuffer outBuf; 2088 private size_t totalIn; 2089 private size_t totalOut; 2090 2091 @property const(ubyte)[] input() const @trusted 2092 { 2093 auto ptr = cast(const(ubyte)*) inBuf.src; 2094 return ptr[inBuf.pos .. inBuf.size]; 2095 } 2096 2097 @property void input(const(ubyte)[] inp) @trusted 2098 { 2099 totalIn += inBuf.pos; 2100 inBuf.pos = 0; 2101 inBuf.src = cast(const(void)*) inp.ptr; 2102 inBuf.size = inp.length; 2103 } 2104 2105 @property size_t totalInput() const @safe 2106 { 2107 return totalIn + inBuf.pos; 2108 } 2109 2110 @property inout(ubyte)[] output() inout @trusted 2111 { 2112 auto ptr = cast(inout(ubyte)*) outBuf.dst; 2113 return ptr[outBuf.pos .. outBuf.size]; 2114 } 2115 2116 @property void output(ubyte[] outp) @trusted 2117 { 2118 totalOut += outBuf.pos; 2119 outBuf.pos = 0; 2120 outBuf.dst = cast(void*) outp.ptr; 2121 outBuf.size = outp.length; 2122 } 2123 2124 @property size_t totalOutput() const @safe 2125 { 2126 return totalOut + outBuf.pos; 2127 } 2128 2129 override string toString() const @safe 2130 { 2131 import std.format : format; 2132 2133 string res; 2134 res ~= "ZstdStream:\n"; 2135 res ~= " Input:\n"; 2136 res ~= format!" start 0x%016x\n"(inBuf.src); 2137 res ~= format!" pos %s\n"(inBuf.pos); 2138 res ~= format!" size %s\n"(inBuf.size); 2139 res ~= format!" total %s\n"(totalInput); 2140 res ~= " Output:\n"; 2141 res ~= format!" start 0x%016x\n"(outBuf.dst); 2142 res ~= format!" pos %s\n"(outBuf.pos); 2143 res ~= format!" size %s\n"(outBuf.size); 2144 res ~= format!" total %s"(totalOutput); 2145 2146 return res; 2147 } 2148 } 2149 2150 private string zstdSetCParam(string name) 2151 { 2152 return "if (" ~ name ~ ") " ~ 2153 "ZSTD_CCtx_setParameter(cctx, ZSTD_cParameter." ~ name ~ ", " ~ name ~ ");"; 2154 } 2155 2156 private void zstdError(size_t code, string desc) @trusted 2157 { 2158 import std.string : fromStringz; 2159 2160 if (ZSTD_isError(code)) 2161 { 2162 const msg = fromStringz(ZSTD_getErrorName(code)); 2163 throw new Exception((desc ~ ": " ~ msg).idup); 2164 } 2165 } 2166 2167 /// Zstandard is a fast compression algorithm designed for streaming. 2168 /// See zstd.h (enum ZSTD_cParameter) for details. 2169 struct CompressZstd 2170 { 2171 static assert(isSquizAlgo!CompressZstd); 2172 2173 /// Common paramters. 2174 /// A value of zero indicates that the default should be used. 2175 int compressionLevel; 2176 /// ditto 2177 int windowLog; 2178 /// ditto 2179 int hashLog; 2180 /// ditto 2181 int chainLog; 2182 /// ditto 2183 int searchLog; 2184 /// ditto 2185 int minMatch; 2186 /// ditto 2187 int targetLength; 2188 /// ditto 2189 int strategy; 2190 2191 /// Long distance matching parameters (LDM) 2192 /// Can be activated for large inputs to improve the compression ratio. 2193 /// Increases memory usage and the window size 2194 /// A value of zero indicate that the default should be used. 2195 bool enableLongDistanceMatching; 2196 /// ditto 2197 int ldmHashLog; 2198 /// ditto 2199 int ldmMinMatch; 2200 /// ditto 2201 int ldmBucketSizeLog; 2202 /// ditto 2203 int ldmHashRateLog; 2204 2205 // frame parameters 2206 2207 /// If input data content size is known, before 2208 /// start of streaming, set contentSize to its value. 2209 /// It will enable the size to be written in the header 2210 /// and checked after decompression. 2211 ulong contentSize = ulong.max; 2212 /// Include a checksum of the content in the trailer. 2213 bool checksumFlag = false; 2214 /// When applicable, dictionary's ID is written in the header 2215 bool dictIdFlag = true; 2216 2217 /// Multi-threading parameters 2218 int nbWorkers; 2219 /// ditto 2220 int jobSize; 2221 /// ditto 2222 int overlapLog; 2223 2224 static final class Stream : ZstdStream 2225 { 2226 private ZSTD_CStream* strm; 2227 } 2228 2229 private void setParams(Stream stream) @trusted 2230 { 2231 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2232 2233 mixin(zstdSetCParam("compressionLevel")); 2234 mixin(zstdSetCParam("windowLog")); 2235 mixin(zstdSetCParam("hashLog")); 2236 mixin(zstdSetCParam("chainLog")); 2237 mixin(zstdSetCParam("searchLog")); 2238 mixin(zstdSetCParam("minMatch")); 2239 mixin(zstdSetCParam("targetLength")); 2240 mixin(zstdSetCParam("strategy")); 2241 2242 if (enableLongDistanceMatching) 2243 { 2244 ZSTD_CCtx_setParameter(cctx, 2245 ZSTD_cParameter.enableLongDistanceMatching, 2246 1 2247 ); 2248 2249 mixin(zstdSetCParam("ldmHashLog")); 2250 mixin(zstdSetCParam("ldmMinMatch")); 2251 mixin(zstdSetCParam("ldmBucketSizeLog")); 2252 mixin(zstdSetCParam("ldmHashRateLog")); 2253 } 2254 2255 if (contentSize != size_t.max) 2256 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize); 2257 if (checksumFlag) 2258 ZSTD_CCtx_setParameter( 2259 cctx, 2260 ZSTD_cParameter.checksumFlag, 2261 1 2262 ); 2263 if (!dictIdFlag) 2264 ZSTD_CCtx_setParameter( 2265 cctx, 2266 ZSTD_cParameter.checksumFlag, 2267 0 2268 ); 2269 2270 mixin(zstdSetCParam("nbWorkers")); 2271 mixin(zstdSetCParam("jobSize")); 2272 mixin(zstdSetCParam("overlapLog")); 2273 } 2274 2275 Stream initialize() @trusted 2276 { 2277 auto stream = new Stream; 2278 2279 stream.strm = ZSTD_createCStream(); 2280 2281 setParams(stream); 2282 2283 return stream; 2284 } 2285 2286 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe 2287 { 2288 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2289 const directive = lastChunk ? ZSTD_EndDirective.end : ZSTD_EndDirective._continue; 2290 2291 const res = (() @trusted => ZSTD_compressStream2(cctx, &stream.outBuf, &stream.inBuf, directive))(); 2292 2293 zstdError(res, "Could not compress data with Zstandard"); 2294 return cast(Flag!"streamEnded")(lastChunk && res == 0); 2295 } 2296 2297 void reset(Stream stream) @trusted 2298 { 2299 auto cctx = cast(ZSTD_CCtx*) stream.strm; 2300 ZSTD_CCtx_reset(cctx, ZSTD_ResetDirective.session_only); 2301 2302 if (contentSize != size_t.max) 2303 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize); 2304 2305 stream.inBuf = ZSTD_inBuffer.init; 2306 stream.outBuf = ZSTD_outBuffer.init; 2307 stream.totalIn = 0; 2308 stream.totalOut = 0; 2309 } 2310 2311 void end(Stream stream) @trusted 2312 { 2313 ZSTD_freeCStream(stream.strm); 2314 } 2315 } 2316 2317 struct DecompressZstd 2318 { 2319 static assert(isSquizAlgo!DecompressZstd); 2320 2321 int windowLogMax; 2322 2323 static final class Stream : ZstdStream 2324 { 2325 private ZSTD_DStream* strm; 2326 } 2327 2328 private void setParams(Stream stream) @trusted 2329 { 2330 auto dctx = cast(ZSTD_DCtx*) stream.strm; 2331 2332 if (windowLogMax) 2333 ZSTD_DCtx_setParameter(dctx, 2334 ZSTD_dParameter.windowLogMax, windowLogMax); 2335 } 2336 2337 Stream initialize() @trusted 2338 { 2339 auto stream = new Stream; 2340 2341 stream.strm = ZSTD_createDStream(); 2342 2343 setParams(stream); 2344 2345 return stream; 2346 } 2347 2348 Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe 2349 { 2350 const res = (() @trusted => ZSTD_decompressStream(stream.strm, &stream.outBuf, &stream 2351 .inBuf))(); 2352 2353 zstdError(res, "Could not decompress data with Zstandard"); 2354 return cast(Flag!"streamEnded")(res == 0); 2355 } 2356 2357 void reset(Stream stream) @trusted 2358 { 2359 auto dctx = cast(ZSTD_DCtx*) stream.strm; 2360 ZSTD_DCtx_reset(dctx, ZSTD_ResetDirective.session_only); 2361 } 2362 2363 void end(Stream stream) @trusted 2364 { 2365 ZSTD_freeDStream(stream.strm); 2366 } 2367 } 2368 2369 /// 2370 @("Compress / Decompress Zstandard") 2371 unittest 2372 { 2373 import test.util; 2374 import std.array : join; 2375 2376 const len = 100_000; 2377 const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n"; 2378 const input = generateRepetitiveData(len, phrase).join(); 2379 2380 const squized = only(input) 2381 .compressZstd() 2382 .join(); 2383 2384 const output = only(squized) 2385 .decompressZstd() 2386 .join(); 2387 2388 assert(squized.length < input.length); 2389 assert(output == input); 2390 2391 // for such long and repetitive data, ratio is around 0.047% 2392 const ratio = cast(double) squized.length / cast(double) input.length; 2393 assert(ratio < 0.0005); 2394 }