1 /// Compression and decompression streaming algorithms.
2 ///
3 /// Each compression or decompression algorithm is represented by a struct
4 /// that contains parameters for compression/decompression.
5 /// Besides the parameters they carry, algorithms have no state. Each
6 /// algorithm instance can be used for an unlimited number of parallel jobs.
7 ///
8 /// The algorithms create a stream, which carry the state and allocated
9 /// resources of the ongoing compression.
10 ///
11 /// The compression/decompression jobs are run by the `squiz` function,
12 /// or one of the related helpers built upon it (e.g. deflate, deflateGz, inflate, ...).
13 ///
14 /// `squiz` and related functions take and InputRange of ubyte[] and return an InputRange of ubyte[].
15 /// This allows streaming in the most natural way for a D program and provide
16 /// the greatest versatility.
17 /// It is possible to read the data from any source (file, network, memory),
18 /// process the data, and write to any kind of destination.
19 /// This also allows to process gigabytes of data with little memory usage.
20 ///
21 /// Compression often wraps the compressed data with header and trailer
22 /// that give the decompression algorithm useful information, especially
23 /// to check the integrity of the data after decompression.
24 /// This is called the format.
25 /// Some compressions algorithms offer different formats, and sometimes
26 /// the possibility to not wrap the data at all (raw format), in which
27 /// case integrity check is not performed. This is usually used when
28 /// an external integrity check is done, for example when archiving
29 /// compressed stream in Zip or 7z archives.
30 module squiz_box.squiz;
31 
32 import squiz_box.c.bzip2;
33 import squiz_box.c.lzma;
34 import squiz_box.c.zlib;
35 import squiz_box.c.zstd;
36 import squiz_box.priv;
37 
38 import std.datetime.systime;
39 import std.exception;
40 import std.range;
41 import std.typecons;
42 
43 /// default chunk size for data exchanges and I/O operations
44 enum defaultChunkSize = 8192;
45 
46 /// definition of a byte chunk, which is the unit of data
47 /// exchanged during I/O and data transformation operations
48 alias ByteChunk = const(ubyte)[];
49 
50 /// A dynamic type of input range of chunks of bytes
51 alias ByteRange = InputRange!ByteChunk;
52 
53 /// Static check that a type is a byte range.
54 template isByteRange(BR)
55 {
56     import std.traits : isArray, Unqual;
57     import std.range : ElementType, isInputRange;
58 
59     alias Arr = ElementType!BR;
60     alias El = ElementType!Arr;
61 
62     enum isByteRange = isInputRange!BR && is(Unqual!El == ubyte);
63 }
64 
65 static assert(isByteRange!ByteRange);
66 
67 /// Exception thrown when inconsistent data is given to
68 /// a decompression algorithm.
69 /// I.e. the data was not compressed with the corresponding algorithm
70 /// or the wrapping format is not the one expected.
71 @safe class DataException : Exception
72 {
73     mixin basicExceptionCtors!();
74 }
75 
76 /// Check whether a type is a proper squiz algorithm.
77 template isSquizAlgo(A)
78 {
79     enum isSquizAlgo = is(typeof((A algo) {
80                 auto stream = algo.initialize();
81                 Flag!"streamEnded" ended = algo.process(stream, Yes.lastChunk);
82                 algo.reset(stream);
83                 algo.end(stream);
84                 static assert(is(typeof(stream) : SquizStream));
85             }));
86 }
87 
88 /// Get the type of a SquizStream for the Squiz algorithm
89 template StreamType(A) if (isSquizAlgo!A)
90 {
91     import std.traits : ReturnType;
92 
93     alias StreamType = ReturnType!(A.initialize);
94 }
95 
96 /// A squiz algorithm whom type is erased behind an interface.
97 /// This helps to choose algorithm at run time.
98 interface SquizAlgo
99 {
100     /// Initialize a new stream for processing data
101     /// with this algorithm.
102     SquizStream initialize() @safe;
103 
104     /// Processes the input stream data to produce output stream data.
105     /// lastChunk indicates that the input chunk in stream is the last one.
106     /// This is an indication to the algorithm that it can start to finish
107     /// the work.
108     /// Returned value indicates that there won't be more output generated
109     /// than the one in stream.output
110     Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe;
111 
112     /// Reset the state of this stream, yet reusing the same
113     /// allocating resources, in order to start processing
114     /// another data stream.
115     void reset(SquizStream stream) @safe;
116 
117     /// Release the resources used by this stream.
118     /// Most of the memory (if not all) used by algorithm
119     /// is allocating with the garbage collector, so not
120     /// calling this function has little consequence (if not none).
121     void end(SquizStream stream) @safe;
122 }
123 
124 static assert(isSquizAlgo!SquizAlgo);
125 
126 /// Get a runtime type for the provided algorithm
127 SquizAlgo squizAlgo(A)(A algo) @safe if (isSquizAlgo!A)
128 {
129     return new CSquizAlgo!A(algo);
130 }
131 
132 ///
133 @("squizAlgo")
134 unittest
135 {
136     import test.util;
137     import std.array : join;
138 
139     auto ctAlgo = Deflate.init;
140     auto rtAlgo = squizAlgo(Deflate.init);
141 
142     const len = 10_000;
143     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
144     const input = generateRepetitiveData(len, phrase).join();
145 
146     const ctSquized = only(input).squiz(ctAlgo).join();
147     const rtSquized = only(input).squiz(rtAlgo).join();
148 
149     assert(ctSquized == rtSquized);
150 }
151 
152 private class CSquizAlgo(A) : SquizAlgo
153 {
154     alias Stream = StreamType!A;
155 
156     A algo;
157 
158     private this(A algo) @safe
159     {
160         this.algo = algo;
161     }
162 
163     private Stream checkStream(SquizStream stream)
164     {
165         auto s = cast(Stream) stream;
166         assert(s, "provided stream is not produced by this algorithm");
167         return s;
168     }
169 
170     SquizStream initialize() @safe
171     {
172         return algo.initialize();
173     }
174 
175     Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe
176     {
177         return algo.process(checkStream(stream), lastChunk);
178     }
179 
180     void reset(SquizStream stream) @safe
181     {
182         return algo.reset(checkStream(stream));
183     }
184 
185     void end(SquizStream stream) @safe
186     {
187         return algo.end(checkStream(stream));
188     }
189 }
190 
191 /// A state carrying, processing stream for squiz algorithms.
192 /// The stream does not carry any buffer, only slices to external buffer.
193 /// One may normally not use this directly as everything is handled
194 /// by the `squiz` function.
195 interface SquizStream
196 {
197     /// Input data for the algorithm
198     /// The slice is reduced by its begining as the processing moves on.
199     /// Must be refilled when empty before calling the algorithm `process` method.
200     @property const(ubyte)[] input() const @safe;
201     /// Ditto
202     @property void input(const(ubyte)[] inp) @safe;
203 
204     /// How many bytes read since the start of the stream processing.
205     @property size_t totalInput() const @safe;
206 
207     /// Output buffer for the algorithm to write to.
208     /// This is NOT the data ready after process, but where the
209     /// algorithm must write next.
210     /// after a call to process, the slice is reduced by its beginning,
211     /// and the data written is therefore the one before the slice.
212     @property inout(ubyte)[] output() inout @safe;
213     @property void output(ubyte[] outp) @safe;
214 
215     /// How many bytes written since the start of the stream processing.
216     @property size_t totalOutput() const @safe;
217 }
218 
219 private template isZlibLikeStream(S)
220 {
221     enum isZlibLikeStream = is(typeof((S stream) {
222                 stream.next_in = cast(const(ubyte)*) null;
223                 stream.avail_in = 0;
224                 stream.next_out = cast(ubyte*) null;
225                 stream.avail_out = 0;
226             }));
227 }
228 
229 private mixin template ZlibLikeStreamImpl(S) if (isZlibLikeStream!S)
230 {
231     private S strm;
232 
233     @property const(ubyte)[] input() const @trusted
234     {
235         return strm.next_in[0 .. strm.avail_in];
236     }
237 
238     @property void input(const(ubyte)[] inp) @trusted
239     {
240         strm.next_in = inp.ptr;
241         strm.avail_in = cast(typeof(strm.avail_in)) inp.length;
242     }
243 
244     @property inout(ubyte)[] output() inout @trusted
245     {
246         return strm.next_out[0 .. strm.avail_out];
247     }
248 
249     @property void output(ubyte[] outp) @trusted
250     {
251         strm.next_out = outp.ptr;
252         strm.avail_out = cast(typeof(strm.avail_out)) outp.length;
253     }
254 }
255 
256 mixin template ZlibLikeTotalInOutImpl()
257 {
258     @property size_t totalInput() const
259     {
260         return cast(size_t) strm.total_in;
261     }
262 
263     @property size_t totalOutput() const
264     {
265         return cast(size_t) strm.total_out;
266     }
267 }
268 
269 /// Returns an InputRange containing the input data processed through the supplied algorithm.
270 auto squiz(I, A)(I input, A algo, size_t chunkSize = defaultChunkSize)
271         if (isByteRange!I && isSquizAlgo!A)
272 {
273     return squiz(input, algo, new ubyte[chunkSize]);
274 }
275 
276 /// ditto
277 auto squiz(I, A)(I input, A algo, ubyte[] chunkBuffer)
278         if (isByteRange!I && isSquizAlgo!A)
279 {
280     auto stream = algo.initialize();
281     return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, ulong.max);
282 }
283 
284 /// Returns an InputRange containing the input data processed through the supplied algorithm.
285 /// To the difference of `squiz`, `squizReuse` will not manage the state (aka stream) of the algorithm,
286 /// which allows to reuse it (and its allocated resources) for several jobs.
287 /// squizReuse will drive the algorithm and move the stream forward until processing is over.
288 /// The stream must be either freshly initialized or freshly reset before being passed
289 /// to this function.
290 auto squizReuse(I, A, S)(I input, A algo, S stream, ubyte[] chunkBuffer)
291         if (isByteRange!I && isSquizAlgo!A)
292 {
293     static assert(is(StreamType!A == S), S.strinof ~ " is not the stream produced by " ~ A.stringof);
294     return Squiz!(I, A, No.endStream)(input, algo, stream, chunkBuffer, ulong.max);
295 }
296 
297 /// Same as squiz, but will stop encoding/decoding after len bytes has been written out
298 /// Useful to decode some raw encoded streams where the uncompressed size is known
299 /// and the algorithm not always report Yes.streamEnded.
300 auto squizMaxOut(I, A)(I input, A algo, ulong maxOut, size_t chunkSize = defaultChunkSize)
301 {
302     import std.algorithm : min;
303 
304     const sz = cast(size_t) min(maxOut, chunkSize);
305     auto chunkBuffer = new ubyte[sz];
306     auto stream = algo.initialize();
307     return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, maxOut);
308 }
309 
310 // Common transformation range for all compression/decompression functions.
311 // I is a byte input range
312 // A is a squiz algorithm
313 // if Yes.end, the stream is ended when data is done processing
314 private struct Squiz(I, A, Flag!"endStream" endStream)
315 {
316     private alias Stream = StreamType!A;
317 
318     // Byte input range (by chunks)
319     private I input;
320 
321     // The algorithm
322     private A algo;
323 
324     // Processed stream stream
325     private Stream stream;
326 
327     // Buffer used to store the front chunk
328     private ubyte[] chunkBuffer;
329     // Slice of the buffer that is valid for read out
330     private ByteChunk chunk;
331 
332     // maximum number of bytes to write out
333     private ulong maxLen;
334 
335     /// Whether the end of stream was reported by the Policy
336     private bool ended;
337 
338     private this(I input, A algo, Stream stream, ubyte[] chunkBuffer, ulong maxLen)
339     {
340         this.input = input;
341         this.algo = algo;
342         this.stream = stream;
343         this.chunkBuffer = chunkBuffer;
344         this.maxLen = maxLen;
345         prime();
346     }
347 
348     @property bool empty()
349     {
350         return chunk.length == 0;
351     }
352 
353     @property ByteChunk front()
354     {
355         return chunk;
356     }
357 
358     void popFront()
359     {
360         chunk = null;
361         if (!ended)
362             prime();
363     }
364 
365     private void prime()
366     {
367         import std.algorithm : min;
368 
369         while (chunk.length < chunkBuffer.length)
370         {
371             if (stream.input.length == 0 && !input.empty)
372                 stream.input = input.front;
373 
374             const len = min(chunkBuffer.length - chunk.length, maxLen);
375             stream.output = chunkBuffer[chunk.length .. chunk.length + len];
376 
377             const streamEnded = algo.process(stream, cast(Flag!"lastChunk") input.empty);
378 
379             chunk = chunkBuffer[0 .. $ - stream.output.length];
380             maxLen -= len;
381 
382             // popFront must be called at the end because it invalidates inChunk
383             if (stream.input.length == 0 && !input.empty)
384                 input.popFront();
385 
386             if (streamEnded || maxLen == 0)
387             {
388                 ended = true;
389                 static if (endStream)
390                     algo.end(stream);
391                 break;
392             }
393         }
394     }
395 }
396 
397 @("squizMaxOut")
398 unittest
399 {
400     // encoded header of test/data/archive.7z
401     const(ubyte)[] dataIn = [
402         0x00, 0x00, 0x81, 0x33, 0x07, 0xae, 0x0f, 0xd1, 0xf2, 0xfb, 0xfd, 0x40,
403         0xc0, 0x90, 0xd2, 0xff, 0x7d, 0x69, 0x4d, 0x90, 0xd3, 0x2c, 0x42, 0x66,
404         0xb0, 0xc6, 0xcc, 0xeb, 0xcf, 0x59, 0xcc, 0x96, 0x23, 0xf9, 0x91, 0xc8,
405         0x75, 0x49, 0xe9, 0x9d, 0x1a, 0xa8, 0xa5, 0x9d, 0xf7, 0x75, 0x29, 0x1a,
406         0x90, 0x78, 0x18, 0x8e, 0x42, 0x1a, 0x97, 0x0c, 0x40, 0xb7, 0xaa, 0xb6,
407         0x16, 0xa9, 0x91, 0x0c, 0x58, 0xad, 0x75, 0xf7, 0x8f, 0xaf, 0x8f, 0x45,
408         0xdb, 0x78, 0xd0, 0x8e, 0xc6, 0x1b, 0x72, 0xa5, 0xf4, 0xd2, 0x46, 0xf7,
409         0xe1, 0xce, 0x01, 0x80, 0x7f, 0x3d, 0x66, 0xa5, 0x2d, 0x64, 0xd7, 0xb0,
410         0x41, 0xdc, 0x92, 0x59, 0x88, 0xb0, 0x4c, 0x67, 0x34, 0xb6, 0x4e, 0xd3,
411         0xd5, 0x01, 0x8d, 0x43, 0x13, 0x9c, 0x82, 0x78, 0x4d, 0xcf, 0x8c, 0x51,
412         0x25, 0x0f, 0xd5, 0x1d, 0x80, 0x4b, 0x80, 0xea, 0x18, 0xc1, 0x29, 0x49,
413         0xe4, 0x4d, 0x4d, 0x8b, 0xb9, 0xa1, 0xfc, 0x17, 0x2b, 0xb3, 0xe6, 0x00,
414         0x00, 0x00
415     ];
416     // decoded header data of test/data/archive.7z
417     const(ubyte)[] expectedDataOut = [
418         0x01, 0x04, 0x06, 0x00, 0x01, 0x09, 0x40, 0x00, 0x07, 0x0b, 0x01, 0x00,
419         0x01, 0x21, 0x21, 0x01, 0x00, 0x0c, 0x8d, 0xe2, 0x00, 0x08, 0x0d, 0x03,
420         0x09, 0x8d, 0xc1, 0x07, 0x0a, 0x01, 0x84, 0x4d, 0x4d, 0xa8, 0x9e, 0xf4,
421         0xb3, 0xdb, 0x12, 0xed, 0x64, 0x40, 0x00, 0x00, 0x05, 0x03, 0x19, 0x0d,
422         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
423         0x00, 0x11, 0x55, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00,
424         0x20, 0x00, 0x32, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00,
425         0x00, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x31, 0x00,
426         0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 0x00, 0x00, 0x66, 0x00,
427         0x6f, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x65, 0x00, 0x72, 0x00, 0x2f, 0x00,
428         0x63, 0x00, 0x68, 0x00, 0x6d, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x20, 0x00,
429         0x36, 0x00, 0x36, 0x00, 0x36, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00,
430         0x74, 0x00, 0x00, 0x00, 0x14, 0x1a, 0x01, 0x00, 0x80, 0x96, 0x9f, 0xd5,
431         0xc8, 0x53, 0xd8, 0x01, 0x80, 0x50, 0x82, 0x4f, 0xc6, 0x53, 0xd8, 0x01,
432         0x00, 0xff, 0x13, 0x13, 0xb7, 0x52, 0xd8, 0x01, 0x15, 0x0e, 0x01, 0x00,
433         0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xb6, 0x81,
434         0x00, 0x00
435     ];
436 
437     auto algo = DecompressLzma(LzmaFormat.rawLegacy);
438 
439     const dataOut = only(dataIn)
440         .squizMaxOut(algo, expectedDataOut.length)
441         .join();
442 
443     assert(dataOut == expectedDataOut);
444 }
445 
446 /// Copy algorithm do not transform data at all
447 /// This is useful in cases of reading/writing data
448 /// that may or may not be compressed. Using Copy
449 /// allows that the same code handles both kind of streams.
450 final class CopyStream : SquizStream
451 {
452     private const(ubyte)[] _inp;
453     size_t _totalIn;
454     private ubyte[] _outp;
455     size_t _totalOut;
456 
457     @property const(ubyte)[] input() const @safe
458     {
459         return _inp;
460     }
461 
462     @property void input(const(ubyte)[] inp) @safe
463     {
464         _inp = inp;
465     }
466 
467     @property size_t totalInput() const @safe
468     {
469         return _totalIn;
470     }
471 
472     @property inout(ubyte)[] output() inout @safe
473     {
474         return _outp;
475     }
476 
477     @property void output(ubyte[] outp) @safe
478     {
479         _outp = outp;
480     }
481 
482     @property size_t totalOutput() const @safe
483     {
484         return _totalOut;
485     }
486 }
487 
488 /// ditto
489 struct Copy
490 {
491     static assert(isSquizAlgo!Copy);
492 
493     CopyStream initialize() @safe
494     {
495         return new CopyStream;
496     }
497 
498     Flag!"streamEnded" process(CopyStream stream, Flag!"lastChunk" lastChunk) @safe
499     {
500         import std.algorithm : min;
501 
502         const len = min(stream._inp.length, stream._outp.length);
503 
504         stream._outp[0 .. len] = stream._inp[0 .. len];
505 
506         stream._inp = stream._inp[len .. $];
507         stream._outp = stream._outp[len .. $];
508         stream._totalIn += len;
509         stream._totalOut += len;
510 
511         return cast(Flag!"streamEnded")(lastChunk && stream._inp.length == 0);
512     }
513 
514     void reset(CopyStream stream) @safe
515     {
516         stream._inp = null;
517         stream._outp = null;
518         stream._totalIn = 0;
519         stream._totalOut = 0;
520     }
521 
522     void end(CopyStream) @safe
523     {
524     }
525 }
526 
527 /// ditto
528 auto copy(I)(I input, size_t chunkSize = defaultChunkSize)
529 {
530     return squiz(input, Copy.init, chunkSize);
531 }
532 
533 ///
534 @("Copy")
535 unittest
536 {
537     import test.util : generateRepetitiveData;
538     import std.array : join;
539 
540     const len = 10_000;
541     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
542     const input = generateRepetitiveData(len, phrase).join();
543 
544     /// copying with arbitrary chunk sizes on input and output
545     const cop1 = generateRepetitiveData(len, phrase, 1231).copy(234).join();
546     const cop2 = generateRepetitiveData(len, phrase, 296).copy(6712).join();
547 
548     assert(input == cop1);
549     assert(input == cop2);
550 }
551 
552 /// Describe what type of header and trailer are wrapping
553 /// a deflated stream.
554 enum ZlibFormat
555 {
556     /// Zlib header and trailer
557     zlib,
558     /// Gzip header and trailer
559     gz,
560     /// Auto detection of Zlib or Gzip format (only used with Inflate)
561     autoDetect,
562     /// No header and trailer, therefore no integrity check included.
563     /// This to be used in other formats such as Zip.
564     /// When using raw, it is advised to use an external integrity check.
565     raw,
566 }
567 
568 private size_t strnlen(const(byte)* str, size_t maxlen) @system
569 {
570     if (!str)
571         return 0;
572 
573     size_t l;
574     while (*str != 0 && l < maxlen)
575     {
576         str++;
577         l++;
578     }
579     return l;
580 }
581 
582 @("strnlen")
583 unittest
584 {
585     assert(strnlen(null, 0) == 0);
586     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 15) == 10);
587     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 10) == 10);
588     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 9) == 9);
589     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 0) == 0);
590     assert(strnlen(cast(const(byte)*)("\0bcdefghij\0klmn".ptr), 15) == 0);
591 }
592 
593 /// Header data for the Gzip format.
594 /// Gzip includes metadata about the file which is compressed.
595 /// These can be specified here when compressing from a stream
596 /// rather than directly from a file.
597 struct GzHeader
598 {
599     import core.stdc.config : c_ulong;
600 
601     /// operating system encoded in the Gz header
602     /// Not all possible values are listed here, only
603     /// the most useful ones
604     enum Os
605     {
606         fatFs = 0,
607         unix = 3,
608         macintosh = 7,
609         ntFs = 11,
610         unknown = 255,
611     }
612 
613     version (OSX)
614         enum defaultOs = Os.macintosh;
615     else version (iOS)
616         enum defaultOs = Os.macintosh;
617     else version (Posix)
618         enum defaultOs = Os.unix;
619     else version (Windows)
620         enum defaultOs = Os.ntFs;
621 
622     /// Whether the content is believed to be text
623     Flag!"text" text;
624 
625     // storing in unix format to avoid
626     // negative numbers with SysTime.init
627     private c_ulong _mtime;
628 
629     /// Modification time
630     @property SysTime mtime() const @safe
631     {
632         return SysTime(unixTimeToStdTime(_mtime));
633     }
634 
635     /// ditto
636     @property void mtime(SysTime time) @safe
637     {
638         _mtime = stdTimeToUnixTime(time.stdTime);
639     }
640 
641     /// Operating system that wrote the gz file
642     Os os = defaultOs;
643 
644     /// Filename to be included in the header
645     string filename;
646 
647     /// Comment to be included in the header
648     string comment;
649 
650     private enum bufSize = 256;
651 
652     private string fromLatin1z(const(byte)* ptr) @system
653     {
654         // ptr points to a buffer of bufSize characters.
655         // End of string is a null character or end of buffer.
656         // Encoding is latin 1.
657         import std.encoding : Latin1Char, transcode;
658 
659         const len = strnlen(ptr, bufSize);
660         auto str = cast(const(Latin1Char)[]) ptr[0 .. len];
661 
662         string res;
663         transcode(str, res);
664         return res;
665     }
666 
667     private byte* toLatin1z(string str) @trusted
668     {
669         import std.encoding : Latin1Char, transcode;
670 
671         Latin1Char[] l1;
672         transcode(str, l1);
673         auto res = (cast(byte[]) l1) ~ 0;
674         return res.ptr;
675     }
676 
677     private this(gz_headerp gzh) @system
678     {
679         text = gzh.text ? Yes.text : No.text;
680         _mtime = gzh.time;
681         os = cast(Os) gzh.os;
682         if (gzh.name)
683             filename = fromLatin1z(gzh.name);
684         if (gzh.comment)
685             comment = fromLatin1z(gzh.comment);
686     }
687 
688     private gz_headerp toZlib() @safe
689     {
690         import core.stdc.config : c_long;
691 
692         auto gzh = new gz_header;
693         gzh.text = text ? 1 : 0;
694         gzh.time = _mtime;
695         gzh.os = cast(int) os;
696         if (filename)
697             gzh.name = toLatin1z(filename);
698         if (comment)
699             gzh.comment = toLatin1z(comment);
700         return gzh;
701     }
702 }
703 
704 /// Type of delegate to use as callback for Inflate.gzHeaderDg
705 alias GzHeaderDg = void delegate(GzHeader header);
706 
707 /// Helper to set GzHeader.text
708 /// Will check if the data are all ascii characters
709 Flag!"text" isText(const(ubyte)[] data)
710 {
711     import std.algorithm : all;
712 
713     return cast(Flag!"text") data.all!(
714         c => c == 0x0a || c == 0x0d || (c >= 0x20 && c <= 0x7e)
715     );
716 }
717 
718 class ZlibStream : SquizStream
719 {
720     mixin ZlibLikeStreamImpl!z_stream;
721     mixin ZlibLikeTotalInOutImpl!();
722 
723     private this() @safe
724     {
725         strm.zalloc = &(gcAlloc!uint);
726         strm.zfree = &gcFree;
727     }
728 }
729 
730 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
731 /// The produced stream of data is wrapped by Zlib header and trailer.
732 auto deflate(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
733 {
734     return squiz(input, Deflate.init, chunkSize);
735 }
736 
737 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
738 /// The produced stream of data is wrapped by Gzip header and trailer.
739 /// Suppliying a header is entirely optional. Zlib produces a default header if not supplied.
740 /// The default header has text false, mtime zero, unknown os, and
741 /// no name or comment.
742 auto deflateGz(I)(I input, GzHeader header, size_t chunkSize = defaultChunkSize)
743         if (isByteRange!I)
744 {
745     auto algo = Deflate.init;
746     algo.format = ZlibFormat.gz;
747     algo.gzHeader = header;
748     return squiz(input, algo, chunkSize);
749 }
750 
751 /// ditto
752 auto deflateGz(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
753 {
754     auto algo = Deflate.init;
755     algo.format = ZlibFormat.gz;
756     return squiz(input, algo, chunkSize);
757 }
758 
759 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
760 /// The produced stream of data isn't wrapped by any header or trailer.
761 auto deflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
762 {
763     auto algo = Deflate.init;
764     algo.format = ZlibFormat.raw;
765     return squiz(input, algo, chunkSize);
766 }
767 
768 /// Zlib's deflate algorithm
769 struct Deflate
770 {
771     static assert(isSquizAlgo!Deflate);
772     static assert(is(StreamType!Deflate == Stream));
773 
774     /// Which format to use for the deflated stream.
775     /// In case ZlibFormat.gz, the gzHeader field will be used if supplied,
776     /// other wise default values will be used.
777     ZlibFormat format;
778 
779     /// Compression level from 1 (fastest) to 9 (best compression).
780     int level = 6;
781 
782     /// The GzHeader to be used with ZlibFormat.gz.
783     Nullable!GzHeader gzHeader;
784 
785     /// Advanced parameters
786     /// See zlib's documentation of `deflateInit2`.
787     /// windowBits must be between 9 and 15 included
788     /// and is adjusted according chosen format.
789     int windowBits = 15;
790     /// ditto
791     int memLevel = 8;
792     /// ditto
793     int strategy = Z_DEFAULT_STRATEGY;
794 
795     static final class Stream : ZlibStream
796     {
797     }
798 
799     Stream initialize() @safe
800     {
801         assert(
802             9 <= windowBits && windowBits <= 15,
803             "inconsistent windowBits"
804         );
805         int wb = windowBits;
806         final switch (format)
807         {
808         case ZlibFormat.zlib:
809             break;
810         case ZlibFormat.gz:
811             wb += 16;
812             break;
813         case ZlibFormat.autoDetect:
814             throw new Exception("invalid ZlibFormat for Deflate");
815         case ZlibFormat.raw:
816             wb = -wb;
817             break;
818         }
819 
820         auto stream = new Stream();
821 
822         const res = (() @trusted => deflateInit2(
823                 &stream.strm, level, Z_DEFLATED,
824                 wb, memLevel, cast(int) strategy,
825         ))();
826 
827         enforce(
828             res == Z_OK,
829             "Could not initialize Zlib deflate stream: " ~ zResultToString(res)
830         );
831 
832         if (format == ZlibFormat.gz && !gzHeader.isNull)
833         {
834             auto head = gzHeader.get.toZlib();
835             (() @trusted => deflateSetHeader(&stream.strm, head))();
836         }
837 
838         return stream;
839     }
840 
841     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
842     {
843         const flush = lastChunk ? Z_FINISH : Z_NO_FLUSH;
844         const res = (() @trusted => squiz_box.c.zlib.deflate(&stream.strm, flush))();
845 
846         enforce(
847             res == Z_OK || res == Z_STREAM_END,
848             "Zlib deflate failed with code: " ~ zResultToString(res)
849         );
850 
851         return cast(Flag!"streamEnded")(res == Z_STREAM_END);
852     }
853 
854     void reset(Stream stream) @trusted
855     {
856         deflateReset(&stream.strm);
857     }
858 
859     void end(Stream stream) @trusted
860     {
861         deflateEnd(&stream.strm);
862     }
863 }
864 
865 /// Returns an InputRange streaming over data inflated with Zlib.
866 /// The input data must be deflated with a zlib format.
867 auto inflate(I)(I input, size_t chunkSize = defaultChunkSize)
868 {
869     return squiz(input, Inflate.init, chunkSize);
870 }
871 
872 /// Returns an InputRange streaming over data inflated with Zlib.
873 /// The input data must be deflated with a gz format.
874 /// If headerDg is not null, it will be called
875 /// as soon as the header is read from the stream.
876 auto inflateGz(I)(I input, GzHeaderDg headerDg, size_t chunkSize = defaultChunkSize)
877 {
878     auto algo = Inflate.init;
879     algo.format = ZlibFormat.gz;
880     algo.gzHeaderDg = headerDg;
881     return squiz(input, algo, chunkSize);
882 }
883 
884 /// ditto
885 auto inflateGz(I)(I input, size_t chunkSize = defaultChunkSize)
886 {
887     return inflateGz(input, null, chunkSize);
888 }
889 
890 /// Returns an InputRange streaming over data inflated with Zlib.
891 /// The input must be raw deflated data
892 auto inflateRaw(I)(I input, size_t chunkSize = defaultChunkSize)
893 {
894     auto algo = Inflate.init;
895     algo.format = ZlibFormat.raw;
896     return squiz(input, algo, chunkSize);
897 }
898 
899 /// Zlib's inflate algorithm
900 struct Inflate
901 {
902     static assert(isSquizAlgo!Inflate);
903 
904     /// Which format to use for the deflated stream.
905     /// In case ZlibFormat.gz, the gzHeader field will be written if set.
906     ZlibFormat format;
907 
908     /// If set, will be assigned to the Gz header once it is known
909     GzHeaderDg gzHeaderDg;
910 
911     /// Advanced parameters
912     /// See zlib's documentation of `deflateInit2`.
913     /// windowBits can be 0 if format is ZlibFormat.zlib.
914     /// Otherwise it must be between 9 and 15 included.
915     int windowBits = 15;
916 
917     private static final class Gzh
918     {
919         private gz_header gzh;
920         private byte[GzHeader.bufSize] nameBuf;
921         private byte[GzHeader.bufSize] commentBuf;
922 
923         private GzHeaderDg dg;
924         private bool dgCalled;
925 
926         this(GzHeaderDg dg) @safe
927         {
928             gzh.name = &nameBuf[0];
929             gzh.name_max = cast(uint) nameBuf.length;
930             gzh.comment = &commentBuf[0];
931             gzh.comm_max = cast(uint) commentBuf.length;
932 
933             this.dg = dg;
934         }
935     }
936 
937     static final class Stream : ZlibStream
938     {
939         Gzh gzh;
940     }
941 
942     Stream initialize() @safe
943     {
944         assert(
945             (windowBits == 0 && format == ZlibFormat.zlib) ||
946                 (9 <= windowBits && windowBits <= 15),
947                 "inconsistent windowBits"
948         );
949         int wb = windowBits;
950         final switch (format)
951         {
952         case ZlibFormat.zlib:
953             break;
954         case ZlibFormat.gz:
955             wb += 16;
956             break;
957         case ZlibFormat.autoDetect:
958             wb += 32;
959             break;
960         case ZlibFormat.raw:
961             wb = -wb;
962             break;
963         }
964 
965         auto stream = new Stream();
966 
967         const res = (() @trusted => inflateInit2(&stream.strm, wb))();
968 
969         enforce(
970             res == Z_OK,
971             "Could not initialize Zlib's inflate stream: " ~ zResultToString(res)
972         );
973 
974         if (gzHeaderDg)
975         {
976             stream.gzh = new Gzh(gzHeaderDg);
977             (() @trusted => inflateGetHeader(&stream.strm, &stream.gzh.gzh))();
978         }
979 
980         return stream;
981     }
982 
983     package Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" /+ lastChunk +/ )
984     {
985         const res = (() @trusted => squiz_box.c.zlib.inflate(&stream.strm, Z_NO_FLUSH))();
986         //
987         if (res == Z_DATA_ERROR)
988             throw new DataException("Improper data given to deflate");
989 
990         enforce(
991             res == Z_OK || res == Z_STREAM_END,
992             "Zlib inflate failed with code: " ~ zResultToString(res)
993         );
994 
995         auto gzh = stream.gzh;
996         if (gzh && !gzh.dgCalled && gzh.gzh.done)
997         {
998             auto head = (() @trusted => GzHeader(&gzh.gzh))();
999             gzh.dg(head);
1000             gzh.dgCalled = true;
1001         }
1002 
1003         return cast(Flag!"streamEnded")(res == Z_STREAM_END);
1004     }
1005 
1006     package void reset(Stream stream) @trusted
1007     {
1008         inflateReset(&stream.strm);
1009     }
1010 
1011     package void end(Stream stream) @trusted
1012     {
1013         inflateEnd(&stream.strm);
1014     }
1015 }
1016 
1017 ///
1018 @("Deflate / Inflate")
1019 unittest
1020 {
1021     import test.util;
1022     import std.array : join;
1023 
1024     auto def = Deflate.init;
1025     auto inf = Inflate.init;
1026 
1027     const len = 100_000;
1028     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1029     const input = generateRepetitiveData(len, phrase).join();
1030 
1031     // deflating
1032     const squized = only(input).squiz(def).join();
1033 
1034     // re-inflating
1035     const output = only(squized).squiz(inf).join();
1036 
1037     assert(squized.length < input.length);
1038     assert(output == input);
1039 
1040     // for such long and repetitive data, ratio is around 0.3%
1041     const ratio = cast(double) squized.length / cast(double) input.length;
1042     assert(ratio < 0.004);
1043 }
1044 
1045 ///
1046 @("Deflate / Inflate in Gz format and custom header")
1047 unittest
1048 {
1049     import test.util;
1050     import std.array : join;
1051 
1052     const len = 100_000;
1053     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1054     const input = generateRepetitiveData(len, phrase).join();
1055 
1056     GzHeader inHead;
1057     inHead.mtime = Clock.currTime;
1058     inHead.os = GzHeader.Os.fatFs;
1059     inHead.text = Yes.text;
1060     inHead.filename = "boring.txt";
1061     inHead.comment = "A very boring file";
1062 
1063     // deflating
1064     const squized = only(input)
1065         .deflateGz(inHead)
1066         .join();
1067 
1068     // re-inflating
1069     GzHeader outHead;
1070     int numCalls;
1071     void setOutHead(GzHeader gzh)
1072     {
1073         outHead = gzh;
1074         numCalls++;
1075     }
1076 
1077     const output = only(squized)
1078         .inflateGz(&setOutHead)
1079         .join();
1080 
1081     assert(squized.length < input.length);
1082     assert(output == input);
1083     assert(inHead == outHead);
1084     assert(numCalls == 1);
1085 }
1086 
1087 ///
1088 @("Deflate / Inflate in raw format")
1089 unittest
1090 {
1091     import test.util;
1092     import std.array : join;
1093 
1094     const len = 100_000;
1095     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1096     const input = generateRepetitiveData(len, phrase).join();
1097 
1098     // deflating
1099     const squized = only(input)
1100         .deflateRaw()
1101         .join();
1102 
1103     // re-inflating
1104     const output = only(squized)
1105         .inflateRaw()
1106         .join();
1107 
1108     assert(squized.length < input.length);
1109     assert(output == input);
1110 }
1111 
1112 package string zResultToString(int res) @safe pure nothrow @nogc
1113 {
1114     switch (res)
1115     {
1116     case Z_OK:
1117         return "OK";
1118     case Z_STREAM_END:
1119         return "STREAM_END";
1120     case Z_NEED_DICT:
1121         return "NEED_DICT";
1122     case Z_ERRNO:
1123         return "ERRNO";
1124     case Z_STREAM_ERROR:
1125         return "STREAM_ERROR";
1126     case Z_DATA_ERROR:
1127         return "DATA_ERROR";
1128     case Z_MEM_ERROR:
1129         return "MEM_ERROR";
1130     case Z_BUF_ERROR:
1131         return "BUF_ERROR";
1132     case Z_VERSION_ERROR:
1133         return "VERSION_ERROR";
1134     default:
1135         return "(Unknown result)";
1136     }
1137 }
1138 
1139 package string zFlushToString(int flush) @safe pure nothrow @nogc
1140 {
1141     switch (flush)
1142     {
1143     case Z_NO_FLUSH:
1144         return "NO_FLUSH";
1145     case Z_PARTIAL_FLUSH:
1146         return "PARTIAL_FLUSH";
1147     case Z_SYNC_FLUSH:
1148         return "SYNC_FLUSH";
1149     case Z_FULL_FLUSH:
1150         return "FULL_FLUSH";
1151     case Z_FINISH:
1152         return "FINISH";
1153     case Z_BLOCK:
1154         return "BLOCK";
1155     case Z_TREES:
1156         return "TREES";
1157     default:
1158         return "(Unknown flush)";
1159     }
1160 }
1161 
1162 /// Returns an InputRange containing the input data processed through Bzip2 compression.
1163 auto compressBzip2(I)(I input, size_t chunkSize = defaultChunkSize)
1164         if (isByteRange!I)
1165 {
1166     return squiz(input, CompressBzip2.init, chunkSize);
1167 }
1168 
1169 final class Bz2Stream : SquizStream
1170 {
1171     mixin ZlibLikeStreamImpl!(bz_stream);
1172 
1173     @property size_t totalInput() const @safe
1174     {
1175         ulong hi = strm.total_in_hi32;
1176         return cast(size_t)(
1177             (hi << 32) | strm.total_in_lo32
1178         );
1179     }
1180 
1181     @property size_t totalOutput() const @safe
1182     {
1183         ulong hi = strm.total_out_hi32;
1184         return cast(size_t)(
1185             (hi << 32) | strm.total_out_lo32
1186         );
1187     }
1188 
1189     this() @safe
1190     {
1191         strm.bzalloc = &(gcAlloc!int);
1192         strm.bzfree = &gcFree;
1193     }
1194 }
1195 
1196 /// Compression with the Bzip2 algorithm.
1197 ///
1198 /// Although having better compression capabilities than Zlib (deflate),
1199 /// Bzip2 has poor latenty when it comes to streaming.
1200 /// I.e. it can swallow several Mb of data before starting to produce output.
1201 /// If streaming latenty is an important factor, deflate/inflate
1202 /// should be the favorite algorithm.
1203 ///
1204 /// This algorithm does not support resource reuse, so calling reset
1205 /// is equivalent to a call to end followed by initialize.
1206 /// (but the same instance of stream is kept).
1207 struct CompressBzip2
1208 {
1209     static assert(isSquizAlgo!CompressBzip2);
1210 
1211     /// Advanced Bzip2 parameters
1212     /// See Bzip2 documentation
1213     /// https://www.sourceware.org/bzip2/manual/manual.html#bzcompress-init
1214     int blockSize100k = 9;
1215     /// ditto
1216     int verbosity = 0;
1217     /// ditto
1218     int workFactor = 30;
1219 
1220     alias Stream = Bz2Stream;
1221 
1222     Stream initialize() @safe
1223     {
1224         auto stream = new Stream;
1225 
1226         const res = (() @trusted => BZ2_bzCompressInit(
1227                 &stream.strm, blockSize100k, verbosity, workFactor,
1228         ))();
1229         enforce(
1230             res == BZ_OK,
1231             "Could not initialize Bzip2 compressor: " ~ bzResultToString(res)
1232         );
1233         return stream;
1234     }
1235 
1236     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1237     {
1238         const action = lastChunk ? BZ_FINISH : BZ_RUN;
1239         const res = (() @trusted => BZ2_bzCompress(&stream.strm, action))();
1240 
1241         if (res == BZ_STREAM_END)
1242             return Yes.streamEnded;
1243 
1244         enforce(
1245             (action == BZ_RUN && res == BZ_RUN_OK) ||
1246                 (action == BZ_FINISH && res == BZ_FINISH_OK),
1247                 "Bzip2 compress failed with code: " ~ bzResultToString(res)
1248         );
1249 
1250         return No.streamEnded;
1251     }
1252 
1253     void reset(Stream stream) @safe
1254     {
1255         (() @trusted => BZ2_bzCompressEnd(&stream.strm))();
1256 
1257         stream.strm = bz_stream.init;
1258         stream.strm.bzalloc = &(gcAlloc!int);
1259         stream.strm.bzfree = &gcFree;
1260 
1261         const res = (() @trusted => BZ2_bzCompressInit(
1262                 &stream.strm, blockSize100k, verbosity, workFactor,
1263         ))();
1264         enforce(
1265             res == BZ_OK,
1266             "Could not initialize Bzip2 compressor: " ~ bzResultToString(res)
1267         );
1268     }
1269 
1270     void end(Stream stream) @trusted
1271     {
1272         BZ2_bzCompressEnd(&stream.strm);
1273     }
1274 }
1275 
1276 /// Returns an InputRange streaming over data decompressed with Bzip2.
1277 auto decompressBzip2(I)(I input, size_t chunkSize = defaultChunkSize)
1278         if (isByteRange!I)
1279 {
1280     return squiz(input, DecompressBzip2.init, chunkSize);
1281 }
1282 
1283 /// Decompression of data encoded with Bzip2.
1284 ///
1285 /// This algorithm does not support resource reuse, so calling reset
1286 /// is equivalent to a call to end followed by initialize.
1287 /// (but the same instance of stream is kept).
1288 struct DecompressBzip2
1289 {
1290     static assert(isSquizAlgo!DecompressBzip2);
1291 
1292     /// Advanced Bzip2 parameters
1293     /// See Bzip2 documentation
1294     /// https://www.sourceware.org/bzip2/manual/manual.html#bzDecompress-init
1295     int verbosity;
1296     /// ditto
1297     bool small;
1298 
1299     alias Stream = Bz2Stream;
1300 
1301     Stream initialize() @safe
1302     {
1303         auto stream = new Stream;
1304 
1305         const res = (() @trusted => BZ2_bzDecompressInit(
1306                 &stream.strm, verbosity, small ? 1 : 0,
1307         ))();
1308         enforce(
1309             res == BZ_OK,
1310             "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res)
1311         );
1312         return stream;
1313     }
1314 
1315     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe
1316     {
1317         const res = (() @trusted => BZ2_bzDecompress(&stream.strm))();
1318 
1319         if (res == BZ_DATA_ERROR)
1320             throw new DataException("Input data was not compressed with Bzip2");
1321 
1322         enforce(
1323             res == BZ_OK || res == BZ_STREAM_END,
1324             "Bzip2 decompress failed with code: " ~ bzResultToString(res)
1325         );
1326 
1327         return cast(Flag!"streamEnded")(res == BZ_STREAM_END);
1328     }
1329 
1330     void reset(Stream stream) @safe
1331     {
1332         (() @trusted => BZ2_bzDecompressEnd(&stream.strm))();
1333 
1334         stream.strm = bz_stream.init;
1335         stream.strm.bzalloc = &(gcAlloc!int);
1336         stream.strm.bzfree = &gcFree;
1337 
1338         const res = (() @trusted => BZ2_bzDecompressInit(
1339                 &stream.strm, verbosity, small ? 1 : 0,
1340         ))();
1341         enforce(
1342             res == BZ_OK,
1343             "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res)
1344         );
1345     }
1346 
1347     void end(Stream stream) @trusted
1348     {
1349         BZ2_bzDecompressEnd(&stream.strm);
1350     }
1351 }
1352 
1353 ///
1354 @("Compress / Decompress Bzip2")
1355 unittest
1356 {
1357     import test.util;
1358     import std.array : join;
1359 
1360     const len = 100_000;
1361     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1362     const input = generateRepetitiveData(len, phrase).join();
1363 
1364     const squized = only(input)
1365         .compressBzip2()
1366         .join();
1367 
1368     const output = only(squized)
1369         .decompressBzip2()
1370         .join();
1371 
1372     assert(squized.length < input.length);
1373     assert(output == input);
1374 
1375     // for such long and repetitive data, ratio is around 0.12%
1376     const ratio = cast(double) squized.length / cast(double) input.length;
1377     assert(ratio < 0.002);
1378 }
1379 
1380 private string bzActionToString(int action) @safe pure nothrow @nogc
1381 {
1382     switch (action)
1383     {
1384     case BZ_RUN:
1385         return "RUN";
1386     case BZ_FLUSH:
1387         return "FLUSH";
1388     case BZ_FINISH:
1389         return "FINISH";
1390     default:
1391         return "(Unknown result)";
1392     }
1393 }
1394 
1395 private string bzResultToString(int res) @safe pure nothrow @nogc
1396 {
1397     switch (res)
1398     {
1399     case BZ_OK:
1400         return "OK";
1401     case BZ_RUN_OK:
1402         return "RUN_OK";
1403     case BZ_FLUSH_OK:
1404         return "FLUSH_OK";
1405     case BZ_FINISH_OK:
1406         return "FINISH_OK";
1407     case BZ_STREAM_END:
1408         return "STREAM_END";
1409     case BZ_SEQUENCE_ERROR:
1410         return "SEQUENCE_ERROR";
1411     case BZ_PARAM_ERROR:
1412         return "PARAM_ERROR";
1413     case BZ_MEM_ERROR:
1414         return "MEM_ERROR";
1415     case BZ_DATA_ERROR:
1416         return "DATA_ERROR";
1417     case BZ_DATA_ERROR_MAGIC:
1418         return "DATA_ERROR_MAGIC";
1419     case BZ_IO_ERROR:
1420         return "IO_ERROR";
1421     case BZ_UNEXPECTED_EOF:
1422         return "UNEXPECTED_EOF";
1423     case BZ_OUTBUFF_FULL:
1424         return "OUTBUFF_FULL";
1425     case BZ_CONFIG_ERROR:
1426         return "CONFIG_ERROR";
1427     default:
1428         return "(Unknown result)";
1429     }
1430 }
1431 
1432 final class LzmaStream : SquizStream
1433 {
1434     mixin ZlibLikeStreamImpl!(lzma_stream);
1435     mixin ZlibLikeTotalInOutImpl!();
1436 
1437     private lzma_allocator alloc;
1438     private lzma_options_delta optsDelta;
1439     private lzma_options_lzma optsLzma;
1440     private lzma_filter[] filterChain;
1441 
1442     this() @safe
1443     {
1444         alloc.alloc = &(gcAlloc!size_t);
1445         alloc.free = &gcFree;
1446         strm.allocator = &alloc;
1447     }
1448 
1449     private lzma_filter[] buildFilterChain(LzmaFormat format, LzmaFilter[] filters,
1450         uint preset, uint deltaDist) @safe
1451     {
1452         lzma_filter[] res;
1453         foreach (f; filters)
1454         {
1455             final switch (f)
1456             {
1457             case LzmaFilter.delta:
1458                 optsDelta.dist = deltaDist;
1459                 res ~= lzma_filter(LZMA_FILTER_DELTA, cast(void*)&optsDelta);
1460                 break;
1461             case LzmaFilter.bcjX86:
1462                 res ~= lzma_filter(LZMA_FILTER_X86, null);
1463                 break;
1464             case LzmaFilter.bcjPowerPc:
1465                 res ~= lzma_filter(LZMA_FILTER_POWERPC, null);
1466                 break;
1467             case LzmaFilter.bcjIa64:
1468                 res ~= lzma_filter(LZMA_FILTER_IA64, null);
1469                 break;
1470             case LzmaFilter.bcjArm:
1471                 res ~= lzma_filter(LZMA_FILTER_ARM, null);
1472                 break;
1473             case LzmaFilter.bcjArmThumb:
1474                 res ~= lzma_filter(LZMA_FILTER_ARMTHUMB, null);
1475                 break;
1476             case LzmaFilter.bcjSparc:
1477                 res ~= lzma_filter(LZMA_FILTER_SPARC, null);
1478                 break;
1479             }
1480         }
1481 
1482         enforce(res.length <= 3, "Too many filters supplied");
1483 
1484         if (format != LzmaFormat.rawCopy)
1485         {
1486             (() @trusted => lzma_lzma_preset(&optsLzma, preset))();
1487             const compFilter = format.isLegacy ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
1488             res ~= lzma_filter(compFilter, cast(void*)&optsLzma);
1489         }
1490 
1491         res ~= lzma_filter(LZMA_VLI_UNKNOWN, null); // end marker
1492 
1493         filterChain = res;
1494         return res;
1495     }
1496 
1497 }
1498 
1499 /// Header/trailer format for Lzma compression
1500 enum LzmaFormat
1501 {
1502     /// Lzma with Xz format, suitable to write *.xz files
1503     xz,
1504     /// LZMA1 encoding and format, suitable for legacy *.lzma files
1505     /// This format doesn't support filters.
1506     legacy,
1507     /// Raw LZMA2 compression, without header/trailer.
1508     /// Use this to include compressed LZMA data in
1509     /// a container defined externally (e.g. this is used
1510     /// for the *.7z archives)
1511     raw,
1512     /// Raw LZMA1 compression, without header/trailer.
1513     /// This one is still found in some *.7z files.
1514     rawLegacy,
1515     /// Just copy bytes out.
1516     /// You may use this in combination with a filter to observe its
1517     /// effect, but has otherwise no use.
1518     rawCopy,
1519 }
1520 
1521 /// Whether this is a legacy format
1522 bool isLegacy(LzmaFormat format) @safe pure nothrow @nogc
1523 {
1524     return format == LzmaFormat.legacy || format == LzmaFormat.rawLegacy;
1525 }
1526 
1527 /// Whether this is a raw format
1528 bool isRaw(LzmaFormat format) @safe pure nothrow @nogc
1529 {
1530     return cast(int) format >= cast(int) LzmaFormat.raw;
1531 }
1532 
1533 /// Filters to use with the LZMA compression.
1534 ///
1535 /// Up to 3 filters can be used from this list.
1536 /// These filters transform the input to increase
1537 /// redundancy of the data supplied to the LZMA compression.
1538 enum LzmaFilter
1539 {
1540     /// Delta filter, which store differences between bytes
1541     /// to produce more repetitive data in some circumstances.
1542     /// Works with `deltaDist` parameter of `CompressLzma`.
1543     delta,
1544 
1545     /// BCJ (Branch/Call/Jump) filters aim optimize machine code
1546     /// compression by converting relative branches, calls and jumps
1547     /// to absolute addresses. This increases redundancy and can be
1548     /// exploited by the LZMA compression.
1549     ///
1550     /// BCJ filters are available for a set of CPU architectures.
1551     /// Use one (or two) of them when compressing compiled binaries.
1552     bcjX86,
1553     /// ditto
1554     bcjPowerPc,
1555     /// ditto
1556     bcjIa64,
1557     /// ditto
1558     bcjArm,
1559     /// ditto
1560     bcjArmThumb,
1561     /// ditto
1562     bcjSparc,
1563 }
1564 
1565 /// Integrity check to include in the compressed data
1566 /// (only for the Xz format)
1567 /// Default for xz is CRC-64.
1568 enum LzmaCheck
1569 {
1570     /// No integrity check included
1571     none,
1572     /// CRC-32 integrity check
1573     crc32,
1574     /// CRC-64 integrity check
1575     crc64,
1576     /// SHA-256 integrity check
1577     sha256,
1578 }
1579 
1580 private lzma_check toLzma(LzmaCheck check) @safe pure nothrow @nogc
1581 {
1582     final switch (check)
1583     {
1584     case LzmaCheck.none:
1585         return lzma_check.NONE;
1586     case LzmaCheck.crc32:
1587         return lzma_check.CRC32;
1588     case LzmaCheck.crc64:
1589         return lzma_check.CRC64;
1590     case LzmaCheck.sha256:
1591         return lzma_check.SHA256;
1592     }
1593 }
1594 
1595 auto compressXz(I)(I input, size_t chunkSize = defaultChunkSize)
1596 {
1597     return squiz(input, CompressLzma.init, chunkSize);
1598 }
1599 
1600 auto compressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize)
1601 {
1602     CompressLzma algo;
1603     algo.format = LzmaFormat.raw;
1604     return squiz(input, algo, chunkSize);
1605 }
1606 
1607 struct CompressLzma
1608 {
1609     import std.conv : to;
1610 
1611     static assert(isSquizAlgo!CompressLzma);
1612 
1613     /// The format of the compressed stream
1614     LzmaFormat format;
1615 
1616     /// The integrity check to include in compressed stream.
1617     /// Only used with XZ format.
1618     LzmaCheck check = LzmaCheck.crc64;
1619 
1620     /// The compression preset between 0 (fast) to 9 (higher compression).
1621     /// The default is 6.
1622     uint preset = 6;
1623 
1624     /// Makes the encoding significantly slower for marginal compression
1625     /// improvement. Only useful if you don't mind about CPU time at all.
1626     Flag!"extreme" extreme;
1627 
1628     /// Filters to include in the encoding.
1629     /// Maximum three filters can be provided.
1630     /// For most input, no filtering is necessary.
1631     LzmaFilter[] filters;
1632 
1633     /// Number of bytes between 1 and 256 to use for the Delta filter.
1634     /// For example for 16bit PCM stero audio, you should use 4.
1635     /// For RGB data 8bit per channel, you should use 3.
1636     uint deltaDist;
1637 
1638     alias Stream = LzmaStream;
1639 
1640     private void initStream(Stream stream) @trusted
1641     {
1642         uint pres = preset;
1643         if (extreme)
1644             pres |= LZMA_PRESET_EXTREME;
1645 
1646         lzma_ret res;
1647         final switch (format)
1648         {
1649         case LzmaFormat.xz:
1650             const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1651             res = lzma_stream_encoder(&stream.strm, chain.ptr, check.toLzma());
1652             break;
1653         case LzmaFormat.legacy:
1654             enforce(filters.length == 0, "Filters are not supported with the legacy format");
1655             lzma_lzma_preset(&stream.optsLzma, preset);
1656             res = lzma_alone_encoder(&stream.strm, &stream.optsLzma);
1657             break;
1658         case LzmaFormat.raw:
1659         case LzmaFormat.rawLegacy:
1660         case LzmaFormat.rawCopy:
1661             const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1662             res = lzma_raw_encoder(&stream.strm, chain.ptr);
1663             break;
1664         }
1665 
1666         enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string);
1667     }
1668 
1669     Stream initialize() @safe
1670     {
1671         auto stream = new LzmaStream;
1672         initStream(stream);
1673         return stream;
1674     }
1675 
1676     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1677     {
1678         return lzmaCode(stream, lastChunk);
1679     }
1680 
1681     void reset(Stream stream) @safe
1682     {
1683         // Lzma supports reset out of the box by recalling initialization
1684         // function without calling lzma_end.
1685 
1686         initStream(stream);
1687     }
1688 
1689     void end(Stream stream) @trusted
1690     {
1691         lzma_end(&stream.strm);
1692     }
1693 }
1694 
1695 auto decompressXz(I)(I input, size_t chunkSize = defaultChunkSize)
1696 {
1697     return squiz(input, DecompressLzma.init, chunkSize);
1698 }
1699 
1700 auto decompressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize)
1701 {
1702     DecompressLzma algo;
1703     algo.format = LzmaFormat.raw;
1704     return squiz(input, algo, chunkSize);
1705 }
1706 
1707 struct DecompressLzma
1708 {
1709     import std.conv : to;
1710 
1711     static assert(isSquizAlgo!DecompressLzma);
1712 
1713     /// The format of the compressed stream
1714     LzmaFormat format;
1715 
1716     /// The memory usage limit in bytes.
1717     /// by default no limit is enforced
1718     size_t memLimit = size_t.max;
1719 
1720     /// Parameters for the raw decompression.
1721     /// They are the same than for the compression.
1722     /// As there is no header to tell Lzma what filters were used during
1723     /// compression, it is the responsibility of the programmer to
1724     /// correctly ensure that the same options are used for decompression.
1725     /// All these options are ignored when decompressing .xz stream.
1726     uint preset = 6;
1727     /// ditto
1728     Flag!"extreme" extreme;
1729     /// ditto
1730     LzmaFilter[] filters;
1731     /// ditto
1732     uint deltaDist;
1733 
1734     alias Stream = LzmaStream;
1735 
1736     this(LzmaFormat format) @safe
1737     {
1738         this.format = format;
1739     }
1740 
1741     /// convenience constructor to copy parameters of the compression
1742     /// for the decompression. Especially useful for the raw decompression,
1743     /// to ensure that the parameters fit the ones used for compression.
1744     this(CompressLzma compress) @safe
1745     {
1746         format = compress.format;
1747         preset = compress.preset;
1748         extreme = compress.extreme;
1749         filters = compress.filters;
1750         deltaDist = compress.deltaDist;
1751     }
1752 
1753     private void initStream(Stream stream) @trusted
1754     {
1755         ulong memlim = memLimit;
1756         if (memLimit == size_t.max)
1757             memlim = ulong.max;
1758 
1759         lzma_ret res;
1760 
1761         final switch (format)
1762         {
1763         case LzmaFormat.xz:
1764             res = lzma_stream_decoder(&stream.strm, memlim, 0);
1765             break;
1766         case LzmaFormat.legacy:
1767             res = lzma_alone_decoder(&stream.strm, memlim);
1768             break;
1769         case LzmaFormat.raw:
1770         case LzmaFormat.rawLegacy:
1771         case LzmaFormat.rawCopy:
1772             uint pres = preset;
1773             if (extreme)
1774                 pres |= LZMA_PRESET_EXTREME;
1775 
1776             const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1777 
1778             res = lzma_raw_decoder(&stream.strm, chain.ptr);
1779         }
1780         enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string);
1781     }
1782 
1783     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1784     {
1785         return lzmaCode(stream, lastChunk);
1786     }
1787 
1788     Stream initialize() @safe
1789     {
1790         auto stream = new LzmaStream;
1791         initStream(stream);
1792         return stream;
1793     }
1794 
1795     void reset(Stream stream) @safe
1796     {
1797         // Lzma supports reset out of the box by recalling initialization
1798         // function without calling lzma_end.
1799 
1800         initStream(stream);
1801     }
1802 
1803     void end(Stream stream) @trusted
1804     {
1805         lzma_end(&stream.strm);
1806     }
1807 }
1808 
1809 private Flag!"streamEnded" lzmaCode(LzmaStream stream, Flag!"lastChunk" lastChunk) @safe
1810 {
1811     import std.conv : to;
1812 
1813     const action = lastChunk ? lzma_action.FINISH : lzma_action.RUN;
1814     const res = (() @trusted => lzma_code(&stream.strm, action))();
1815 
1816     enforce(
1817         res == lzma_ret.OK || res == lzma_ret.STREAM_END,
1818         "LZMA encoding failed with code: " ~ res.to!string
1819     );
1820 
1821     return cast(Flag!"streamEnded")(res == lzma_ret.STREAM_END);
1822 }
1823 
1824 ///
1825 @("Compress / Decompress XZ")
1826 unittest
1827 {
1828     import test.util;
1829     import std.array : join;
1830 
1831     const len = 100_000;
1832     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1833     const input = generateRepetitiveData(len, phrase).join();
1834 
1835     const squized = only(input)
1836         .compressXz()
1837         .join();
1838 
1839     const output = only(squized)
1840         .decompressXz()
1841         .join();
1842 
1843     assert(squized.length < input.length);
1844     assert(output == input);
1845 
1846     // for such long and repetitive data, ratio is around 0.2%
1847     const ratio = cast(double) squized.length / cast(double) input.length;
1848     assert(ratio < 0.003);
1849 }
1850 
1851 ///
1852 @("Integrity check XZ")
1853 unittest
1854 {
1855     import test.util;
1856     import std.array : join;
1857 
1858     const len = 100_000;
1859     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1860     const input = generateRepetitiveData(len, phrase).join();
1861 
1862     auto squized = only(input)
1863         .compressXz()
1864         .join()
1865         .dup; // dup because const(ubyte)[] is returned
1866 
1867     squized[squized.length / 2] += 1;
1868 
1869     assertThrown(
1870         only(squized)
1871             .decompressXz()
1872             .join()
1873     );
1874 }
1875 
1876 ///
1877 @("Compress / Decompress XZ with filter")
1878 unittest
1879 {
1880     import test.util;
1881     import std.array : join;
1882 
1883     const len = 100_000;
1884     const input = generateSequentialData(len, 1245, 27).join();
1885 
1886     const reference = only(input)
1887         .compressXz()
1888         .join();
1889 
1890     CompressLzma comp;
1891     comp.filters ~= LzmaFilter.delta;
1892     comp.deltaDist = 8; // sequential data of 8 byte integers
1893 
1894     const withDelta = only(input)
1895         .squiz(comp)
1896         .join();
1897 
1898     const output = only(withDelta)
1899         .decompressXz()
1900         .join();
1901 
1902     assert(output == input);
1903     // < 20% compression without filter (sequential data is tough)
1904     // < 0.5% compression with delta (peace of cake)
1905     assert(input.length > reference.length * 5);
1906     assert(input.length > withDelta.length * 200);
1907 }
1908 
1909 ///
1910 @("Compress / Decompress Lzma Raw")
1911 unittest
1912 {
1913     import test.util;
1914     import std.array : join;
1915 
1916     const len = 100_000;
1917     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1918     const input = generateRepetitiveData(len, phrase).join();
1919 
1920     const reference = only(input)
1921         .compressXz()
1922         .join();
1923 
1924     const squized = only(input)
1925         .compressLzmaRaw()
1926         .join();
1927 
1928     const output = only(squized)
1929         .decompressLzmaRaw()
1930         .join();
1931 
1932     assert(output == input);
1933     assert(squized.length < input.length);
1934     assert(squized.length < reference.length); // win header/trailer space
1935 
1936     // for such repetitive data, ratio is around 1.13%
1937     // also generally better than zlib, bzip2 struggles a lot for repetitive data
1938     const ratio = cast(double) squized.length / cast(double) input.length;
1939     assert(ratio < 0.003);
1940 }
1941 
1942 ///
1943 @("Compress / Decompress Lzma Raw with filter")
1944 unittest
1945 {
1946     import test.util;
1947     import std.array : join;
1948 
1949     const len = 100_000;
1950     const input = generateSequentialData(len, 1245, 27).join();
1951 
1952     const reference = only(input)
1953         .compressLzmaRaw()
1954         .join();
1955 
1956     CompressLzma comp;
1957     comp.format = LzmaFormat.raw;
1958     comp.filters ~= LzmaFilter.delta;
1959     comp.deltaDist = 8; // sequential data of 8 byte integers
1960 
1961     const withDelta = only(input)
1962         .squiz(comp)
1963         .join();
1964 
1965     const output = only(withDelta) // using compression parameters for decompression
1966     .squiz(DecompressLzma(comp))
1967         .join();
1968 
1969     assert(output == input);
1970     // < 20% compression without filter (sequential data is tough)
1971     // < 0.4% compression with delta (peace of cake)
1972     assert(input.length > reference.length * 5);
1973     assert(input.length > withDelta.length * 250);
1974 }
1975 
1976 ///
1977 @("Compress / Decompress Lzma Legacy")
1978 unittest
1979 {
1980     import test.util;
1981     import std.array : join;
1982 
1983     const len = 100_000;
1984     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1985     const input = generateRepetitiveData(len, phrase).join();
1986 
1987     auto comp = CompressLzma(LzmaFormat.legacy);
1988     auto decomp = DecompressLzma(comp);
1989 
1990     const squized = only(input)
1991         .squiz(comp)
1992         .join();
1993 
1994     const output = only(squized)
1995         .squiz(decomp)
1996         .join();
1997 
1998     assert(squized.length < input.length);
1999     assert(output == input);
2000 
2001     // for such repetitive data, ratio is around 1.13%
2002     // also generally better than zlib, bzip2 struggles a lot for repetitive data
2003     const ratio = cast(double) squized.length / cast(double) input.length;
2004     assert(ratio < 0.003);
2005 }
2006 
2007 ///
2008 @("Compress / Decompress Lzma Raw Legacy")
2009 unittest
2010 {
2011     import test.util;
2012     import std.array : join;
2013 
2014     const len = 100_000;
2015     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
2016     const input = generateRepetitiveData(len, phrase).join();
2017 
2018     auto comp = CompressLzma(LzmaFormat.rawLegacy);
2019     auto decomp = DecompressLzma(comp);
2020 
2021     const squized = only(input)
2022         .squiz(comp)
2023         .join();
2024 
2025     const output = only(squized)
2026         .squiz(decomp)
2027         .join();
2028 
2029     assert(squized.length < input.length);
2030     assert(output == input);
2031 
2032     // for such repetitive data, ratio is around 1.13%
2033     // also generally better than zlib, bzip2 struggles a lot for repetitive data
2034     const ratio = cast(double) squized.length / cast(double) input.length;
2035     assert(ratio < 0.003);
2036 }
2037 
2038 ///
2039 @("Compress / Decompress Lzma rawLegacy with filter")
2040 unittest
2041 {
2042     import test.util;
2043     import std.array : join;
2044 
2045     const len = 100_000;
2046     const input = generateSequentialData(len, 1245, 27).join();
2047 
2048     const reference = only(input)
2049         .squiz(CompressLzma(LzmaFormat.legacy))
2050         .join();
2051 
2052     CompressLzma comp;
2053     comp.format = LzmaFormat.rawLegacy;
2054     comp.filters ~= LzmaFilter.delta;
2055     comp.deltaDist = 8; // sequential data of 8 byte integers
2056 
2057     auto decomp = DecompressLzma(comp);
2058 
2059     const withDelta = only(input)
2060         .squiz(comp)
2061         .join();
2062 
2063     const output = only(withDelta)
2064         .squiz(decomp)
2065         .join();
2066 
2067     assert(output == input);
2068     // < 20% compression without filter (sequential data is tough)
2069     // < 0.4% compression with delta (peace of cake)
2070     assert(input.length > reference.length * 5);
2071     assert(input.length > withDelta.length * 250);
2072 }
2073 
2074 auto compressZstd(I)(I input, size_t chunkSize = defaultChunkSize)
2075 {
2076     return squiz(input, CompressZstd.init, chunkSize);
2077 }
2078 
2079 auto decompressZstd(I)(I input, size_t chunkSize = defaultChunkSize)
2080 {
2081     return squiz(input, DecompressZstd.init, chunkSize);
2082 }
2083 
2084 class ZstdStream : SquizStream
2085 {
2086     private ZSTD_inBuffer inBuf;
2087     private ZSTD_outBuffer outBuf;
2088     private size_t totalIn;
2089     private size_t totalOut;
2090 
2091     @property const(ubyte)[] input() const @trusted
2092     {
2093         auto ptr = cast(const(ubyte)*) inBuf.src;
2094         return ptr[inBuf.pos .. inBuf.size];
2095     }
2096 
2097     @property void input(const(ubyte)[] inp) @trusted
2098     {
2099         totalIn += inBuf.pos;
2100         inBuf.pos = 0;
2101         inBuf.src = cast(const(void)*) inp.ptr;
2102         inBuf.size = inp.length;
2103     }
2104 
2105     @property size_t totalInput() const @safe
2106     {
2107         return totalIn + inBuf.pos;
2108     }
2109 
2110     @property inout(ubyte)[] output() inout @trusted
2111     {
2112         auto ptr = cast(inout(ubyte)*) outBuf.dst;
2113         return ptr[outBuf.pos .. outBuf.size];
2114     }
2115 
2116     @property void output(ubyte[] outp) @trusted
2117     {
2118         totalOut += outBuf.pos;
2119         outBuf.pos = 0;
2120         outBuf.dst = cast(void*) outp.ptr;
2121         outBuf.size = outp.length;
2122     }
2123 
2124     @property size_t totalOutput() const @safe
2125     {
2126         return totalOut + outBuf.pos;
2127     }
2128 
2129     override string toString() const @safe
2130     {
2131         import std.format : format;
2132 
2133         string res;
2134         res ~= "ZstdStream:\n";
2135         res ~= "  Input:\n";
2136         res ~= format!"    start 0x%016x\n"(inBuf.src);
2137         res ~= format!"    pos %s\n"(inBuf.pos);
2138         res ~= format!"    size %s\n"(inBuf.size);
2139         res ~= format!"    total %s\n"(totalInput);
2140         res ~= "  Output:\n";
2141         res ~= format!"    start 0x%016x\n"(outBuf.dst);
2142         res ~= format!"    pos %s\n"(outBuf.pos);
2143         res ~= format!"    size %s\n"(outBuf.size);
2144         res ~= format!"    total %s"(totalOutput);
2145 
2146         return res;
2147     }
2148 }
2149 
2150 private string zstdSetCParam(string name)
2151 {
2152     return "if (" ~ name ~ ") " ~
2153         "ZSTD_CCtx_setParameter(cctx, ZSTD_cParameter." ~ name ~ ", " ~ name ~ ");";
2154 }
2155 
2156 private void zstdError(size_t code, string desc) @trusted
2157 {
2158     import std.string : fromStringz;
2159 
2160     if (ZSTD_isError(code))
2161     {
2162         const msg = fromStringz(ZSTD_getErrorName(code));
2163         throw new Exception((desc ~ ": " ~ msg).idup);
2164     }
2165 }
2166 
2167 /// Zstandard is a fast compression algorithm designed for streaming.
2168 /// See zstd.h (enum ZSTD_cParameter) for details.
2169 struct CompressZstd
2170 {
2171     static assert(isSquizAlgo!CompressZstd);
2172 
2173     /// Common paramters.
2174     /// A value of zero indicates that the default should be used.
2175     int compressionLevel;
2176     /// ditto
2177     int windowLog;
2178     /// ditto
2179     int hashLog;
2180     /// ditto
2181     int chainLog;
2182     /// ditto
2183     int searchLog;
2184     /// ditto
2185     int minMatch;
2186     /// ditto
2187     int targetLength;
2188     /// ditto
2189     int strategy;
2190 
2191     /// Long distance matching parameters (LDM)
2192     /// Can be activated for large inputs to improve the compression ratio.
2193     /// Increases memory usage and the window size
2194     /// A value of zero indicate that the default should be used.
2195     bool enableLongDistanceMatching;
2196     /// ditto
2197     int ldmHashLog;
2198     /// ditto
2199     int ldmMinMatch;
2200     /// ditto
2201     int ldmBucketSizeLog;
2202     /// ditto
2203     int ldmHashRateLog;
2204 
2205     // frame parameters
2206 
2207     /// If input data content size is known, before
2208     /// start of streaming, set contentSize to its value.
2209     /// It will enable the size to be written in the header
2210     /// and checked after decompression.
2211     ulong contentSize = ulong.max;
2212     /// Include a checksum of the content in the trailer.
2213     bool checksumFlag = false;
2214     /// When applicable, dictionary's ID is written in the header
2215     bool dictIdFlag = true;
2216 
2217     /// Multi-threading parameters
2218     int nbWorkers;
2219     /// ditto
2220     int jobSize;
2221     /// ditto
2222     int overlapLog;
2223 
2224     static final class Stream : ZstdStream
2225     {
2226         private ZSTD_CStream* strm;
2227     }
2228 
2229     private void setParams(Stream stream) @trusted
2230     {
2231         auto cctx = cast(ZSTD_CCtx*) stream.strm;
2232 
2233         mixin(zstdSetCParam("compressionLevel"));
2234         mixin(zstdSetCParam("windowLog"));
2235         mixin(zstdSetCParam("hashLog"));
2236         mixin(zstdSetCParam("chainLog"));
2237         mixin(zstdSetCParam("searchLog"));
2238         mixin(zstdSetCParam("minMatch"));
2239         mixin(zstdSetCParam("targetLength"));
2240         mixin(zstdSetCParam("strategy"));
2241 
2242         if (enableLongDistanceMatching)
2243         {
2244             ZSTD_CCtx_setParameter(cctx,
2245                 ZSTD_cParameter.enableLongDistanceMatching,
2246                 1
2247             );
2248 
2249             mixin(zstdSetCParam("ldmHashLog"));
2250             mixin(zstdSetCParam("ldmMinMatch"));
2251             mixin(zstdSetCParam("ldmBucketSizeLog"));
2252             mixin(zstdSetCParam("ldmHashRateLog"));
2253         }
2254 
2255         if (contentSize != size_t.max)
2256             ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize);
2257         if (checksumFlag)
2258             ZSTD_CCtx_setParameter(
2259                 cctx,
2260                 ZSTD_cParameter.checksumFlag,
2261                 1
2262             );
2263         if (!dictIdFlag)
2264             ZSTD_CCtx_setParameter(
2265                 cctx,
2266                 ZSTD_cParameter.checksumFlag,
2267                 0
2268             );
2269 
2270         mixin(zstdSetCParam("nbWorkers"));
2271         mixin(zstdSetCParam("jobSize"));
2272         mixin(zstdSetCParam("overlapLog"));
2273     }
2274 
2275     Stream initialize() @trusted
2276     {
2277         auto stream = new Stream;
2278 
2279         stream.strm = ZSTD_createCStream();
2280 
2281         setParams(stream);
2282 
2283         return stream;
2284     }
2285 
2286     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
2287     {
2288         auto cctx = cast(ZSTD_CCtx*) stream.strm;
2289         const directive = lastChunk ? ZSTD_EndDirective.end : ZSTD_EndDirective._continue;
2290 
2291         const res = (() @trusted => ZSTD_compressStream2(cctx, &stream.outBuf, &stream.inBuf, directive))();
2292 
2293         zstdError(res, "Could not compress data with Zstandard");
2294         return cast(Flag!"streamEnded")(lastChunk && res == 0);
2295     }
2296 
2297     void reset(Stream stream) @trusted
2298     {
2299         auto cctx = cast(ZSTD_CCtx*) stream.strm;
2300         ZSTD_CCtx_reset(cctx, ZSTD_ResetDirective.session_only);
2301 
2302         if (contentSize != size_t.max)
2303             ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize);
2304 
2305         stream.inBuf = ZSTD_inBuffer.init;
2306         stream.outBuf = ZSTD_outBuffer.init;
2307         stream.totalIn = 0;
2308         stream.totalOut = 0;
2309     }
2310 
2311     void end(Stream stream) @trusted
2312     {
2313         ZSTD_freeCStream(stream.strm);
2314     }
2315 }
2316 
2317 struct DecompressZstd
2318 {
2319     static assert(isSquizAlgo!DecompressZstd);
2320 
2321     int windowLogMax;
2322 
2323     static final class Stream : ZstdStream
2324     {
2325         private ZSTD_DStream* strm;
2326     }
2327 
2328     private void setParams(Stream stream) @trusted
2329     {
2330         auto dctx = cast(ZSTD_DCtx*) stream.strm;
2331 
2332         if (windowLogMax)
2333             ZSTD_DCtx_setParameter(dctx,
2334                 ZSTD_dParameter.windowLogMax, windowLogMax);
2335     }
2336 
2337     Stream initialize() @trusted
2338     {
2339         auto stream = new Stream;
2340 
2341         stream.strm = ZSTD_createDStream();
2342 
2343         setParams(stream);
2344 
2345         return stream;
2346     }
2347 
2348     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe
2349     {
2350         const res = (() @trusted => ZSTD_decompressStream(stream.strm, &stream.outBuf, &stream
2351                 .inBuf))();
2352 
2353         zstdError(res, "Could not decompress data with Zstandard");
2354         return cast(Flag!"streamEnded")(res == 0);
2355     }
2356 
2357     void reset(Stream stream) @trusted
2358     {
2359         auto dctx = cast(ZSTD_DCtx*) stream.strm;
2360         ZSTD_DCtx_reset(dctx, ZSTD_ResetDirective.session_only);
2361     }
2362 
2363     void end(Stream stream) @trusted
2364     {
2365         ZSTD_freeDStream(stream.strm);
2366     }
2367 }
2368 
2369 ///
2370 @("Compress / Decompress Zstandard")
2371 unittest
2372 {
2373     import test.util;
2374     import std.array : join;
2375 
2376     const len = 100_000;
2377     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
2378     const input = generateRepetitiveData(len, phrase).join();
2379 
2380     const squized = only(input)
2381         .compressZstd()
2382         .join();
2383 
2384     const output = only(squized)
2385         .decompressZstd()
2386         .join();
2387 
2388     assert(squized.length < input.length);
2389     assert(output == input);
2390 
2391     // for such long and repetitive data, ratio is around 0.047%
2392     const ratio = cast(double) squized.length / cast(double) input.length;
2393     assert(ratio < 0.0005);
2394 }