1 /// Compression and decompression streaming algorithms.
2 ///
3 /// Each compression or decompression algorithm is represented by a struct
4 /// that contains parameters for compression/decompression.
5 /// Besides the parameters they carry, algorithms have no state. Each
6 /// algorithm instance can be used for an unlimited number of parallel jobs.
7 ///
8 /// The algorithms create a stream, which carry the state and allocated
9 /// resources of the ongoing compression.
10 ///
11 /// The compression/decompression jobs are run by the `squiz` function,
12 /// or one of the related helpers built upon it (e.g. deflate, deflateGz, inflate, ...).
13 ///
14 /// `squiz` and related functions take and InputRange of ubyte[] and return an InputRange of ubyte[].
15 /// This allows streaming in the most natural way for a D program and provide
16 /// the greatest versatility.
17 /// It is possible to read the data from any source (file, network, memory),
18 /// process the data, and write to any kind of destination.
19 /// This also allows to process gigabytes of data with little memory usage.
20 ///
21 /// Compression often wraps the compressed data with header and trailer
22 /// that give the decompression algorithm useful information, especially
23 /// to check the integrity of the data after decompression.
24 /// This is called the format.
25 /// Some compressions algorithms offer different formats, and sometimes
26 /// the possibility to not wrap the data at all (raw format), in which
27 /// case integrity check is not performed. This is usually used when
28 /// an external integrity check is done, for example when archiving
29 /// compressed stream in Zip or 7z archives.
30 module squiz_box.squiz;
31 
32 import squiz_box.c.zlib;
33 import squiz_box.priv;
34 
35 import std.datetime.systime;
36 import std.exception;
37 import std.range;
38 import std.typecons;
39 
40 version (HaveSquizBzip2)
41 {
42     import squiz_box.c.bzip2;
43 }
44 version (HaveSquizLzma)
45 {
46     import squiz_box.c.lzma;
47 }
48 version (HaveSquizZstandard)
49 {
50     import squiz_box.c.zstd;
51 }
52 
53 /// default chunk size for data exchanges and I/O operations
54 enum defaultChunkSize = 8192;
55 
56 /// definition of a byte chunk, which is the unit of data
57 /// exchanged during I/O and data transformation operations
58 alias ByteChunk = const(ubyte)[];
59 
60 /// A dynamic type of input range of chunks of bytes
61 alias ByteRange = InputRange!ByteChunk;
62 
63 /// Static check that a type is a byte range.
64 template isByteRange(BR)
65 {
66     import std.traits : isArray, Unqual;
67     import std.range : ElementType, isInputRange;
68 
69     alias Arr = ElementType!BR;
70     alias El = ElementType!Arr;
71 
72     enum isByteRange = isInputRange!BR && is(Unqual!El == ubyte);
73 }
74 
75 static assert(isByteRange!ByteRange);
76 
77 /// Exception thrown when inconsistent data is given to
78 /// a decompression algorithm.
79 /// I.e. the data was not compressed with the corresponding algorithm
80 /// or the wrapping format is not the one expected.
81 @safe class DataException : Exception
82 {
83     mixin basicExceptionCtors!();
84 }
85 
86 /// Check whether a type is a proper squiz algorithm.
87 template isSquizAlgo(A)
88 {
89     enum isSquizAlgo = is(typeof((A algo) {
90                 auto stream = algo.initialize();
91                 Flag!"streamEnded" ended = algo.process(stream, Yes.lastChunk);
92                 algo.reset(stream);
93                 algo.end(stream);
94                 static assert(is(typeof(stream) : SquizStream));
95             }));
96 }
97 
98 /// Get the type of a SquizStream for the Squiz algorithm
99 template StreamType(A) if (isSquizAlgo!A)
100 {
101     import std.traits : ReturnType;
102 
103     alias StreamType = ReturnType!(A.initialize);
104 }
105 
106 /// A squiz algorithm whom type is erased behind an interface.
107 /// This helps to choose algorithm at run time.
108 interface SquizAlgo
109 {
110     /// Initialize a new stream for processing data
111     /// with this algorithm.
112     SquizStream initialize() @safe;
113 
114     /// Processes the input stream data to produce output stream data.
115     /// lastChunk indicates that the input chunk in stream is the last one.
116     /// This is an indication to the algorithm that it can start to finish
117     /// the work.
118     /// Returned value indicates that there won't be more output generated
119     /// than the one in stream.output
120     Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe;
121 
122     /// Reset the state of this stream, yet reusing the same
123     /// allocating resources, in order to start processing
124     /// another data stream.
125     void reset(SquizStream stream) @safe;
126 
127     /// Release the resources used by this stream.
128     /// Most of the memory (if not all) used by algorithm
129     /// is allocating with the garbage collector, so not
130     /// calling this function has little consequence (if not none).
131     void end(SquizStream stream) @safe;
132 }
133 
134 static assert(isSquizAlgo!SquizAlgo);
135 
136 /// Get a runtime type for the provided algorithm
137 SquizAlgo squizAlgo(A)(A algo) @safe if (isSquizAlgo!A)
138 {
139     return new CSquizAlgo!A(algo);
140 }
141 
142 ///
143 @("squizAlgo")
144 unittest
145 {
146     import test.util;
147     import std.array : join;
148 
149     auto ctAlgo = Deflate.init;
150     auto rtAlgo = squizAlgo(Deflate.init);
151 
152     const len = 10_000;
153     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
154     const input = generateRepetitiveData(len, phrase).join();
155 
156     const ctSquized = only(input).squiz(ctAlgo).join();
157     const rtSquized = only(input).squiz(rtAlgo).join();
158 
159     assert(ctSquized == rtSquized);
160 }
161 
162 private class CSquizAlgo(A) : SquizAlgo
163 {
164     alias Stream = StreamType!A;
165 
166     A algo;
167 
168     private this(A algo) @safe
169     {
170         this.algo = algo;
171     }
172 
173     private Stream checkStream(SquizStream stream)
174     {
175         auto s = cast(Stream) stream;
176         assert(s, "provided stream is not produced by this algorithm");
177         return s;
178     }
179 
180     SquizStream initialize() @safe
181     {
182         return algo.initialize();
183     }
184 
185     Flag!"streamEnded" process(SquizStream stream, Flag!"lastChunk" lastChunk) @safe
186     {
187         return algo.process(checkStream(stream), lastChunk);
188     }
189 
190     void reset(SquizStream stream) @safe
191     {
192         return algo.reset(checkStream(stream));
193     }
194 
195     void end(SquizStream stream) @safe
196     {
197         return algo.end(checkStream(stream));
198     }
199 }
200 
201 /// A state carrying, processing stream for squiz algorithms.
202 /// The stream does not carry any buffer, only slices to external buffer.
203 /// One may normally not use this directly as everything is handled
204 /// by the `squiz` function.
205 interface SquizStream
206 {
207     /// Input data for the algorithm
208     /// The slice is reduced by its begining as the processing moves on.
209     /// Must be refilled when empty before calling the algorithm `process` method.
210     @property const(ubyte)[] input() const @safe;
211     /// Ditto
212     @property void input(const(ubyte)[] inp) @safe;
213 
214     /// How many bytes read since the start of the stream processing.
215     @property size_t totalInput() const @safe;
216 
217     /// Output buffer for the algorithm to write to.
218     /// This is NOT the data ready after process, but where the
219     /// algorithm must write next.
220     /// after a call to process, the slice is reduced by its beginning,
221     /// and the data written is therefore the one before the slice.
222     @property inout(ubyte)[] output() inout @safe;
223     @property void output(ubyte[] outp) @safe;
224 
225     /// How many bytes written since the start of the stream processing.
226     @property size_t totalOutput() const @safe;
227 }
228 
229 private template isZlibLikeStream(S)
230 {
231     enum isZlibLikeStream = is(typeof((S stream) {
232                 stream.next_in = cast(const(ubyte)*) null;
233                 stream.avail_in = 0;
234                 stream.next_out = cast(ubyte*) null;
235                 stream.avail_out = 0;
236             }));
237 }
238 
239 private mixin template ZlibLikeStreamImpl(S) if (isZlibLikeStream!S)
240 {
241     private S strm;
242 
243     @property const(ubyte)[] input() const @trusted
244     {
245         return strm.next_in[0 .. strm.avail_in];
246     }
247 
248     @property void input(const(ubyte)[] inp) @trusted
249     {
250         strm.next_in = inp.ptr;
251         strm.avail_in = cast(typeof(strm.avail_in)) inp.length;
252     }
253 
254     @property inout(ubyte)[] output() inout @trusted
255     {
256         return strm.next_out[0 .. strm.avail_out];
257     }
258 
259     @property void output(ubyte[] outp) @trusted
260     {
261         strm.next_out = outp.ptr;
262         strm.avail_out = cast(typeof(strm.avail_out)) outp.length;
263     }
264 }
265 
266 mixin template ZlibLikeTotalInOutImpl()
267 {
268     @property size_t totalInput() const
269     {
270         return cast(size_t) strm.total_in;
271     }
272 
273     @property size_t totalOutput() const
274     {
275         return cast(size_t) strm.total_out;
276     }
277 }
278 
279 /// Returns an InputRange containing the input data processed through the supplied algorithm.
280 auto squiz(I, A)(I input, A algo, size_t chunkSize = defaultChunkSize)
281         if (isByteRange!I && isSquizAlgo!A)
282 {
283     return squiz(input, algo, new ubyte[chunkSize]);
284 }
285 
286 /// ditto
287 auto squiz(I, A)(I input, A algo, ubyte[] chunkBuffer)
288         if (isByteRange!I && isSquizAlgo!A)
289 {
290     auto stream = algo.initialize();
291     return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, ulong.max);
292 }
293 
294 /// Returns an InputRange containing the input data processed through the supplied algorithm.
295 /// To the difference of `squiz`, `squizReuse` will not manage the state (aka stream) of the algorithm,
296 /// which allows to reuse it (and its allocated resources) for several jobs.
297 /// squizReuse will drive the algorithm and move the stream forward until processing is over.
298 /// The stream must be either freshly initialized or freshly reset before being passed
299 /// to this function.
300 auto squizReuse(I, A, S)(I input, A algo, S stream, ubyte[] chunkBuffer)
301         if (isByteRange!I && isSquizAlgo!A)
302 {
303     static assert(is(StreamType!A == S), S.strinof ~ " is not the stream produced by " ~ A.stringof);
304     return Squiz!(I, A, No.endStream)(input, algo, stream, chunkBuffer, ulong.max);
305 }
306 
307 /// Same as squiz, but will stop encoding/decoding after len bytes has been written out
308 /// Useful to decode some raw encoded streams where the uncompressed size is known
309 /// and the algorithm not always report Yes.streamEnded.
310 auto squizMaxOut(I, A)(I input, A algo, ulong maxOut, size_t chunkSize = defaultChunkSize)
311 {
312     import std.algorithm : min;
313 
314     const sz = cast(size_t) min(maxOut, chunkSize);
315     auto chunkBuffer = new ubyte[sz];
316     auto stream = algo.initialize();
317     return Squiz!(I, A, Yes.endStream)(input, algo, stream, chunkBuffer, maxOut);
318 }
319 
320 // Common transformation range for all compression/decompression functions.
321 // I is a byte input range
322 // A is a squiz algorithm
323 // if Yes.end, the stream is ended when data is done processing
324 private struct Squiz(I, A, Flag!"endStream" endStream)
325 {
326     private alias Stream = StreamType!A;
327 
328     // Byte input range (by chunks)
329     private I input;
330 
331     // The algorithm
332     private A algo;
333 
334     // Processed stream stream
335     private Stream stream;
336 
337     // Buffer used to store the front chunk
338     private ubyte[] chunkBuffer;
339     // Slice of the buffer that is valid for read out
340     private ByteChunk chunk;
341 
342     // maximum number of bytes to write out
343     private ulong maxLen;
344 
345     /// Whether the end of stream was reported by the Policy
346     private bool ended;
347 
348     private this(I input, A algo, Stream stream, ubyte[] chunkBuffer, ulong maxLen)
349     {
350         this.input = input;
351         this.algo = algo;
352         this.stream = stream;
353         this.chunkBuffer = chunkBuffer;
354         this.maxLen = maxLen;
355         prime();
356     }
357 
358     @property bool empty()
359     {
360         return chunk.length == 0;
361     }
362 
363     @property ByteChunk front()
364     {
365         return chunk;
366     }
367 
368     void popFront()
369     {
370         chunk = null;
371         if (!ended)
372             prime();
373     }
374 
375     private void prime()
376     {
377         import std.algorithm : min;
378 
379         while (chunk.length < chunkBuffer.length)
380         {
381             if (stream.input.length == 0 && !input.empty)
382                 stream.input = input.front;
383 
384             const len = min(chunkBuffer.length - chunk.length, maxLen);
385             stream.output = chunkBuffer[chunk.length .. chunk.length + len];
386 
387             const streamEnded = algo.process(stream, cast(Flag!"lastChunk") input.empty);
388 
389             chunk = chunkBuffer[0 .. $ - stream.output.length];
390             maxLen -= len;
391 
392             // popFront must be called at the end because it invalidates inChunk
393             if (stream.input.length == 0 && !input.empty)
394                 input.popFront();
395 
396             if (streamEnded || maxLen == 0)
397             {
398                 ended = true;
399                 static if (endStream)
400                     algo.end(stream);
401                 break;
402             }
403         }
404     }
405 }
406 
407 version (HaveSquizLzma)
408 {
409     @("squizMaxOut")
410     unittest
411     {
412         // encoded header of test/data/archive.7z
413         const(ubyte)[] dataIn = [
414             0x00, 0x00, 0x81, 0x33, 0x07, 0xae, 0x0f, 0xd1, 0xf2, 0xfb, 0xfd, 0x40,
415             0xc0, 0x90, 0xd2, 0xff, 0x7d, 0x69, 0x4d, 0x90, 0xd3, 0x2c, 0x42, 0x66,
416             0xb0, 0xc6, 0xcc, 0xeb, 0xcf, 0x59, 0xcc, 0x96, 0x23, 0xf9, 0x91, 0xc8,
417             0x75, 0x49, 0xe9, 0x9d, 0x1a, 0xa8, 0xa5, 0x9d, 0xf7, 0x75, 0x29, 0x1a,
418             0x90, 0x78, 0x18, 0x8e, 0x42, 0x1a, 0x97, 0x0c, 0x40, 0xb7, 0xaa, 0xb6,
419             0x16, 0xa9, 0x91, 0x0c, 0x58, 0xad, 0x75, 0xf7, 0x8f, 0xaf, 0x8f, 0x45,
420             0xdb, 0x78, 0xd0, 0x8e, 0xc6, 0x1b, 0x72, 0xa5, 0xf4, 0xd2, 0x46, 0xf7,
421             0xe1, 0xce, 0x01, 0x80, 0x7f, 0x3d, 0x66, 0xa5, 0x2d, 0x64, 0xd7, 0xb0,
422             0x41, 0xdc, 0x92, 0x59, 0x88, 0xb0, 0x4c, 0x67, 0x34, 0xb6, 0x4e, 0xd3,
423             0xd5, 0x01, 0x8d, 0x43, 0x13, 0x9c, 0x82, 0x78, 0x4d, 0xcf, 0x8c, 0x51,
424             0x25, 0x0f, 0xd5, 0x1d, 0x80, 0x4b, 0x80, 0xea, 0x18, 0xc1, 0x29, 0x49,
425             0xe4, 0x4d, 0x4d, 0x8b, 0xb9, 0xa1, 0xfc, 0x17, 0x2b, 0xb3, 0xe6, 0x00,
426             0x00, 0x00
427         ];
428         // decoded header data of test/data/archive.7z
429         const(ubyte)[] expectedDataOut = [
430             0x01, 0x04, 0x06, 0x00, 0x01, 0x09, 0x40, 0x00, 0x07, 0x0b, 0x01, 0x00,
431             0x01, 0x21, 0x21, 0x01, 0x00, 0x0c, 0x8d, 0xe2, 0x00, 0x08, 0x0d, 0x03,
432             0x09, 0x8d, 0xc1, 0x07, 0x0a, 0x01, 0x84, 0x4d, 0x4d, 0xa8, 0x9e, 0xf4,
433             0xb3, 0xdb, 0x12, 0xed, 0x64, 0x40, 0x00, 0x00, 0x05, 0x03, 0x19, 0x0d,
434             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
435             0x00, 0x11, 0x55, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00,
436             0x20, 0x00, 0x32, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00,
437             0x00, 0x00, 0x66, 0x00, 0x69, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x31, 0x00,
438             0x2e, 0x00, 0x74, 0x00, 0x78, 0x00, 0x74, 0x00, 0x00, 0x00, 0x66, 0x00,
439             0x6f, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x65, 0x00, 0x72, 0x00, 0x2f, 0x00,
440             0x63, 0x00, 0x68, 0x00, 0x6d, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x20, 0x00,
441             0x36, 0x00, 0x36, 0x00, 0x36, 0x00, 0x2e, 0x00, 0x74, 0x00, 0x78, 0x00,
442             0x74, 0x00, 0x00, 0x00, 0x14, 0x1a, 0x01, 0x00, 0x80, 0x96, 0x9f, 0xd5,
443             0xc8, 0x53, 0xd8, 0x01, 0x80, 0x50, 0x82, 0x4f, 0xc6, 0x53, 0xd8, 0x01,
444             0x00, 0xff, 0x13, 0x13, 0xb7, 0x52, 0xd8, 0x01, 0x15, 0x0e, 0x01, 0x00,
445             0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xa4, 0x81, 0x20, 0x80, 0xb6, 0x81,
446             0x00, 0x00
447         ];
448 
449         auto algo = DecompressLzma(LzmaFormat.rawLegacy);
450 
451         const dataOut = only(dataIn)
452             .squizMaxOut(algo, expectedDataOut.length)
453             .join();
454 
455         assert(dataOut == expectedDataOut);
456     }
457 }
458 
459 /// Copy algorithm do not transform data at all
460 /// This is useful in cases of reading/writing data
461 /// that may or may not be compressed. Using Copy
462 /// allows that the same code handles both kind of streams.
463 final class CopyStream : SquizStream
464 {
465     private const(ubyte)[] _inp;
466     size_t _totalIn;
467     private ubyte[] _outp;
468     size_t _totalOut;
469 
470     @property const(ubyte)[] input() const @safe
471     {
472         return _inp;
473     }
474 
475     @property void input(const(ubyte)[] inp) @safe
476     {
477         _inp = inp;
478     }
479 
480     @property size_t totalInput() const @safe
481     {
482         return _totalIn;
483     }
484 
485     @property inout(ubyte)[] output() inout @safe
486     {
487         return _outp;
488     }
489 
490     @property void output(ubyte[] outp) @safe
491     {
492         _outp = outp;
493     }
494 
495     @property size_t totalOutput() const @safe
496     {
497         return _totalOut;
498     }
499 }
500 
501 /// ditto
502 struct Copy
503 {
504     static assert(isSquizAlgo!Copy);
505 
506     CopyStream initialize() @safe
507     {
508         return new CopyStream;
509     }
510 
511     Flag!"streamEnded" process(CopyStream stream, Flag!"lastChunk" lastChunk) @safe
512     {
513         import std.algorithm : min;
514 
515         const len = min(stream._inp.length, stream._outp.length);
516 
517         stream._outp[0 .. len] = stream._inp[0 .. len];
518 
519         stream._inp = stream._inp[len .. $];
520         stream._outp = stream._outp[len .. $];
521         stream._totalIn += len;
522         stream._totalOut += len;
523 
524         return cast(Flag!"streamEnded")(lastChunk && stream._inp.length == 0);
525     }
526 
527     void reset(CopyStream stream) @safe
528     {
529         stream._inp = null;
530         stream._outp = null;
531         stream._totalIn = 0;
532         stream._totalOut = 0;
533     }
534 
535     void end(CopyStream) @safe
536     {
537     }
538 }
539 
540 /// ditto
541 auto copy(I)(I input, size_t chunkSize = defaultChunkSize)
542 {
543     return squiz(input, Copy.init, chunkSize);
544 }
545 
546 ///
547 @("Copy")
548 unittest
549 {
550     import test.util : generateRepetitiveData;
551     import std.array : join;
552 
553     const len = 10_000;
554     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
555     const input = generateRepetitiveData(len, phrase).join();
556 
557     /// copying with arbitrary chunk sizes on input and output
558     const cop1 = generateRepetitiveData(len, phrase, 1231).copy(234).join();
559     const cop2 = generateRepetitiveData(len, phrase, 296).copy(6712).join();
560 
561     assert(input == cop1);
562     assert(input == cop2);
563 }
564 
565 /// Describe what type of header and trailer are wrapping
566 /// a deflated stream.
567 enum ZlibFormat
568 {
569     /// Zlib header and trailer
570     zlib,
571     /// Gzip header and trailer
572     gz,
573     /// Auto detection of Zlib or Gzip format (only used with Inflate)
574     autoDetect,
575     /// No header and trailer, therefore no integrity check included.
576     /// This to be used in other formats such as Zip.
577     /// When using raw, it is advised to use an external integrity check.
578     raw,
579 }
580 
581 private size_t strnlen(const(byte)* str, size_t maxlen) @system
582 {
583     if (!str)
584         return 0;
585 
586     size_t l;
587     while (*str != 0 && l < maxlen)
588     {
589         str++;
590         l++;
591     }
592     return l;
593 }
594 
595 @("strnlen")
596 unittest
597 {
598     assert(strnlen(null, 0) == 0);
599     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 15) == 10);
600     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 10) == 10);
601     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 9) == 9);
602     assert(strnlen(cast(const(byte)*)("abcdefghij\0klmn".ptr), 0) == 0);
603     assert(strnlen(cast(const(byte)*)("\0bcdefghij\0klmn".ptr), 15) == 0);
604 }
605 
606 /// Header data for the Gzip format.
607 /// Gzip includes metadata about the file which is compressed.
608 /// These can be specified here when compressing from a stream
609 /// rather than directly from a file.
610 struct GzHeader
611 {
612     import core.stdc.config : c_ulong;
613 
614     /// operating system encoded in the Gz header
615     /// Not all possible values are listed here, only
616     /// the most useful ones
617     enum Os
618     {
619         fatFs = 0,
620         unix = 3,
621         macintosh = 7,
622         ntFs = 11,
623         unknown = 255,
624     }
625 
626     version (OSX)
627         enum defaultOs = Os.macintosh;
628     else version (iOS)
629         enum defaultOs = Os.macintosh;
630     else version (Posix)
631         enum defaultOs = Os.unix;
632     else version (Windows)
633         enum defaultOs = Os.ntFs;
634 
635     /// Whether the content is believed to be text
636     Flag!"text" text;
637 
638     // storing in unix format to avoid
639     // negative numbers with SysTime.init
640     private c_ulong _mtime;
641 
642     /// Modification time
643     @property SysTime mtime() const @safe
644     {
645         return SysTime(unixTimeToStdTime(_mtime));
646     }
647 
648     /// ditto
649     @property void mtime(SysTime time) @safe
650     {
651         _mtime = stdTimeToUnixTime(time.stdTime);
652     }
653 
654     /// Operating system that wrote the gz file
655     Os os = defaultOs;
656 
657     /// Filename to be included in the header
658     string filename;
659 
660     /// Comment to be included in the header
661     string comment;
662 
663     private enum bufSize = 256;
664 
665     private string fromLatin1z(const(byte)* ptr) @system
666     {
667         // ptr points to a buffer of bufSize characters.
668         // End of string is a null character or end of buffer.
669         // Encoding is latin 1.
670         import std.encoding : Latin1Char, transcode;
671 
672         const len = strnlen(ptr, bufSize);
673         auto str = cast(const(Latin1Char)[]) ptr[0 .. len];
674 
675         string res;
676         transcode(str, res);
677         return res;
678     }
679 
680     private byte* toLatin1z(string str) @trusted
681     {
682         import std.encoding : Latin1Char, transcode;
683 
684         Latin1Char[] l1;
685         transcode(str, l1);
686         auto res = (cast(byte[]) l1) ~ 0;
687         return res.ptr;
688     }
689 
690     private this(gz_headerp gzh) @system
691     {
692         text = gzh.text ? Yes.text : No.text;
693         _mtime = gzh.time;
694         os = cast(Os) gzh.os;
695         if (gzh.name)
696             filename = fromLatin1z(gzh.name);
697         if (gzh.comment)
698             comment = fromLatin1z(gzh.comment);
699     }
700 
701     private gz_headerp toZlib() @safe
702     {
703         import core.stdc.config : c_long;
704 
705         auto gzh = new gz_header;
706         gzh.text = text ? 1 : 0;
707         gzh.time = _mtime;
708         gzh.os = cast(int) os;
709         if (filename)
710             gzh.name = toLatin1z(filename);
711         if (comment)
712             gzh.comment = toLatin1z(comment);
713         return gzh;
714     }
715 }
716 
717 /// Type of delegate to use as callback for Inflate.gzHeaderDg
718 alias GzHeaderDg = void delegate(GzHeader header);
719 
720 /// Helper to set GzHeader.text
721 /// Will check if the data are all ascii characters
722 Flag!"text" isText(const(ubyte)[] data)
723 {
724     import std.algorithm : all;
725 
726     return cast(Flag!"text") data.all!(
727         c => c == 0x0a || c == 0x0d || (c >= 0x20 && c <= 0x7e)
728     );
729 }
730 
731 class ZlibStream : SquizStream
732 {
733     mixin ZlibLikeStreamImpl!z_stream;
734     mixin ZlibLikeTotalInOutImpl!();
735 
736     private this() @safe
737     {
738         strm.zalloc = &(gcAlloc!uint);
739         strm.zfree = &gcFree;
740     }
741 }
742 
743 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
744 /// The produced stream of data is wrapped by Zlib header and trailer.
745 auto deflate(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
746 {
747     return squiz(input, Deflate.init, chunkSize);
748 }
749 
750 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
751 /// The produced stream of data is wrapped by Gzip header and trailer.
752 /// Suppliying a header is entirely optional. Zlib produces a default header if not supplied.
753 /// The default header has text false, mtime zero, unknown os, and
754 /// no name or comment.
755 auto deflateGz(I)(I input, GzHeader header, size_t chunkSize = defaultChunkSize)
756         if (isByteRange!I)
757 {
758     auto algo = Deflate.init;
759     algo.format = ZlibFormat.gz;
760     algo.gzHeader = header;
761     return squiz(input, algo, chunkSize);
762 }
763 
764 /// ditto
765 auto deflateGz(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
766 {
767     auto algo = Deflate.init;
768     algo.format = ZlibFormat.gz;
769     return squiz(input, algo, chunkSize);
770 }
771 
772 /// Returns an InputRange containing the input data processed through Zlib's deflate algorithm.
773 /// The produced stream of data isn't wrapped by any header or trailer.
774 auto deflateRaw(I)(I input, size_t chunkSize = defaultChunkSize) if (isByteRange!I)
775 {
776     auto algo = Deflate.init;
777     algo.format = ZlibFormat.raw;
778     return squiz(input, algo, chunkSize);
779 }
780 
781 /// Zlib's deflate algorithm
782 struct Deflate
783 {
784     static assert(isSquizAlgo!Deflate);
785     static assert(is(StreamType!Deflate == Stream));
786 
787     /// Which format to use for the deflated stream.
788     /// In case ZlibFormat.gz, the gzHeader field will be used if supplied,
789     /// other wise default values will be used.
790     ZlibFormat format;
791 
792     /// Compression level from 1 (fastest) to 9 (best compression).
793     int level = 6;
794 
795     /// The GzHeader to be used with ZlibFormat.gz.
796     Nullable!GzHeader gzHeader;
797 
798     /// Advanced parameters
799     /// See zlib's documentation of `deflateInit2`.
800     /// windowBits must be between 9 and 15 included
801     /// and is adjusted according chosen format.
802     int windowBits = 15;
803     /// ditto
804     int memLevel = 8;
805     /// ditto
806     int strategy = Z_DEFAULT_STRATEGY;
807 
808     static final class Stream : ZlibStream
809     {
810     }
811 
812     Stream initialize() @safe
813     {
814         assert(
815             9 <= windowBits && windowBits <= 15,
816             "inconsistent windowBits"
817         );
818         int wb = windowBits;
819         final switch (format)
820         {
821         case ZlibFormat.zlib:
822             break;
823         case ZlibFormat.gz:
824             wb += 16;
825             break;
826         case ZlibFormat.autoDetect:
827             throw new Exception("invalid ZlibFormat for Deflate");
828         case ZlibFormat.raw:
829             wb = -wb;
830             break;
831         }
832 
833         auto stream = new Stream();
834 
835         const res = (() @trusted => deflateInit2(
836                 &stream.strm, level, Z_DEFLATED,
837                 wb, memLevel, cast(int) strategy,
838         ))();
839 
840         enforce(
841             res == Z_OK,
842             "Could not initialize Zlib deflate stream: " ~ zResultToString(res)
843         );
844 
845         if (format == ZlibFormat.gz && !gzHeader.isNull)
846         {
847             auto head = gzHeader.get.toZlib();
848             (() @trusted => deflateSetHeader(&stream.strm, head))();
849         }
850 
851         return stream;
852     }
853 
854     Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
855     {
856         const flush = lastChunk ? Z_FINISH : Z_NO_FLUSH;
857         const res = (() @trusted => squiz_box.c.zlib.deflate(&stream.strm, flush))();
858 
859         enforce(
860             res == Z_OK || res == Z_STREAM_END,
861             "Zlib deflate failed with code: " ~ zResultToString(res)
862         );
863 
864         return cast(Flag!"streamEnded")(res == Z_STREAM_END);
865     }
866 
867     void reset(Stream stream) @trusted
868     {
869         deflateReset(&stream.strm);
870     }
871 
872     void end(Stream stream) @trusted
873     {
874         deflateEnd(&stream.strm);
875     }
876 }
877 
878 /// Returns an InputRange streaming over data inflated with Zlib.
879 /// The input data must be deflated with a zlib format.
880 auto inflate(I)(I input, size_t chunkSize = defaultChunkSize)
881 {
882     return squiz(input, Inflate.init, chunkSize);
883 }
884 
885 /// Returns an InputRange streaming over data inflated with Zlib.
886 /// The input data must be deflated with a gz format.
887 /// If headerDg is not null, it will be called
888 /// as soon as the header is read from the stream.
889 auto inflateGz(I)(I input, GzHeaderDg headerDg, size_t chunkSize = defaultChunkSize)
890 {
891     auto algo = Inflate.init;
892     algo.format = ZlibFormat.gz;
893     algo.gzHeaderDg = headerDg;
894     return squiz(input, algo, chunkSize);
895 }
896 
897 /// ditto
898 auto inflateGz(I)(I input, size_t chunkSize = defaultChunkSize)
899 {
900     return inflateGz(input, null, chunkSize);
901 }
902 
903 /// Returns an InputRange streaming over data inflated with Zlib.
904 /// The input must be raw deflated data
905 auto inflateRaw(I)(I input, size_t chunkSize = defaultChunkSize)
906 {
907     auto algo = Inflate.init;
908     algo.format = ZlibFormat.raw;
909     return squiz(input, algo, chunkSize);
910 }
911 
912 /// Zlib's inflate algorithm
913 struct Inflate
914 {
915     static assert(isSquizAlgo!Inflate);
916 
917     /// Which format to use for the deflated stream.
918     /// In case ZlibFormat.gz, the gzHeader field will be written if set.
919     ZlibFormat format;
920 
921     /// If set, will be assigned to the Gz header once it is known
922     GzHeaderDg gzHeaderDg;
923 
924     /// Advanced parameters
925     /// See zlib's documentation of `deflateInit2`.
926     /// windowBits can be 0 if format is ZlibFormat.zlib.
927     /// Otherwise it must be between 9 and 15 included.
928     int windowBits = 15;
929 
930     private static final class Gzh
931     {
932         private gz_header gzh;
933         private byte[GzHeader.bufSize] nameBuf;
934         private byte[GzHeader.bufSize] commentBuf;
935 
936         private GzHeaderDg dg;
937         private bool dgCalled;
938 
939         this(GzHeaderDg dg) @safe
940         {
941             gzh.name = &nameBuf[0];
942             gzh.name_max = cast(uint) nameBuf.length;
943             gzh.comment = &commentBuf[0];
944             gzh.comm_max = cast(uint) commentBuf.length;
945 
946             this.dg = dg;
947         }
948     }
949 
950     static final class Stream : ZlibStream
951     {
952         Gzh gzh;
953     }
954 
955     Stream initialize() @safe
956     {
957         assert(
958             (windowBits == 0 && format == ZlibFormat.zlib) ||
959                 (9 <= windowBits && windowBits <= 15),
960                 "inconsistent windowBits"
961         );
962         int wb = windowBits;
963         final switch (format)
964         {
965         case ZlibFormat.zlib:
966             break;
967         case ZlibFormat.gz:
968             wb += 16;
969             break;
970         case ZlibFormat.autoDetect:
971             wb += 32;
972             break;
973         case ZlibFormat.raw:
974             wb = -wb;
975             break;
976         }
977 
978         auto stream = new Stream();
979 
980         const res = (() @trusted => inflateInit2(&stream.strm, wb))();
981 
982         enforce(
983             res == Z_OK,
984             "Could not initialize Zlib's inflate stream: " ~ zResultToString(res)
985         );
986 
987         if (gzHeaderDg)
988         {
989             stream.gzh = new Gzh(gzHeaderDg);
990             (() @trusted => inflateGetHeader(&stream.strm, &stream.gzh.gzh))();
991         }
992 
993         return stream;
994     }
995 
996     package Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" /+ lastChunk +/ )
997     {
998         const res = (() @trusted => squiz_box.c.zlib.inflate(&stream.strm, Z_NO_FLUSH))();
999         //
1000         if (res == Z_DATA_ERROR)
1001             throw new DataException("Improper data given to deflate");
1002 
1003         enforce(
1004             res == Z_OK || res == Z_STREAM_END,
1005             "Zlib inflate failed with code: " ~ zResultToString(res)
1006         );
1007 
1008         auto gzh = stream.gzh;
1009         if (gzh && !gzh.dgCalled && gzh.gzh.done)
1010         {
1011             auto head = (() @trusted => GzHeader(&gzh.gzh))();
1012             gzh.dg(head);
1013             gzh.dgCalled = true;
1014         }
1015 
1016         return cast(Flag!"streamEnded")(res == Z_STREAM_END);
1017     }
1018 
1019     package void reset(Stream stream) @trusted
1020     {
1021         inflateReset(&stream.strm);
1022     }
1023 
1024     package void end(Stream stream) @trusted
1025     {
1026         inflateEnd(&stream.strm);
1027     }
1028 }
1029 
1030 ///
1031 @("Deflate / Inflate")
1032 unittest
1033 {
1034     import test.util;
1035     import std.array : join;
1036 
1037     auto def = Deflate.init;
1038     auto inf = Inflate.init;
1039 
1040     const len = 100_000;
1041     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1042     const input = generateRepetitiveData(len, phrase).join();
1043 
1044     // deflating
1045     const squized = only(input).squiz(def).join();
1046 
1047     // re-inflating
1048     const output = only(squized).squiz(inf).join();
1049 
1050     assert(squized.length < input.length);
1051     assert(output == input);
1052 
1053     // for such long and repetitive data, ratio is around 0.3%
1054     const ratio = cast(double) squized.length / cast(double) input.length;
1055     assert(ratio < 0.004);
1056 }
1057 
1058 ///
1059 @("Deflate / Inflate in Gz format and custom header")
1060 unittest
1061 {
1062     import test.util;
1063     import std.array : join;
1064 
1065     const len = 100_000;
1066     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1067     const input = generateRepetitiveData(len, phrase).join();
1068 
1069     GzHeader inHead;
1070     inHead.mtime = Clock.currTime;
1071     inHead.os = GzHeader.Os.fatFs;
1072     inHead.text = Yes.text;
1073     inHead.filename = "boring.txt";
1074     inHead.comment = "A very boring file";
1075 
1076     // deflating
1077     const squized = only(input)
1078         .deflateGz(inHead)
1079         .join();
1080 
1081     // re-inflating
1082     GzHeader outHead;
1083     int numCalls;
1084     void setOutHead(GzHeader gzh)
1085     {
1086         outHead = gzh;
1087         numCalls++;
1088     }
1089 
1090     const output = only(squized)
1091         .inflateGz(&setOutHead)
1092         .join();
1093 
1094     assert(squized.length < input.length);
1095     assert(output == input);
1096     assert(inHead == outHead);
1097     assert(numCalls == 1);
1098 }
1099 
1100 ///
1101 @("Deflate / Inflate in raw format")
1102 unittest
1103 {
1104     import test.util;
1105     import std.array : join;
1106 
1107     const len = 100_000;
1108     const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1109     const input = generateRepetitiveData(len, phrase).join();
1110 
1111     // deflating
1112     const squized = only(input)
1113         .deflateRaw()
1114         .join();
1115 
1116     // re-inflating
1117     const output = only(squized)
1118         .inflateRaw()
1119         .join();
1120 
1121     assert(squized.length < input.length);
1122     assert(output == input);
1123 }
1124 
1125 package string zResultToString(int res) @safe pure nothrow @nogc
1126 {
1127     switch (res)
1128     {
1129     case Z_OK:
1130         return "OK";
1131     case Z_STREAM_END:
1132         return "STREAM_END";
1133     case Z_NEED_DICT:
1134         return "NEED_DICT";
1135     case Z_ERRNO:
1136         return "ERRNO";
1137     case Z_STREAM_ERROR:
1138         return "STREAM_ERROR";
1139     case Z_DATA_ERROR:
1140         return "DATA_ERROR";
1141     case Z_MEM_ERROR:
1142         return "MEM_ERROR";
1143     case Z_BUF_ERROR:
1144         return "BUF_ERROR";
1145     case Z_VERSION_ERROR:
1146         return "VERSION_ERROR";
1147     default:
1148         return "(Unknown result)";
1149     }
1150 }
1151 
1152 package string zFlushToString(int flush) @safe pure nothrow @nogc
1153 {
1154     switch (flush)
1155     {
1156     case Z_NO_FLUSH:
1157         return "NO_FLUSH";
1158     case Z_PARTIAL_FLUSH:
1159         return "PARTIAL_FLUSH";
1160     case Z_SYNC_FLUSH:
1161         return "SYNC_FLUSH";
1162     case Z_FULL_FLUSH:
1163         return "FULL_FLUSH";
1164     case Z_FINISH:
1165         return "FINISH";
1166     case Z_BLOCK:
1167         return "BLOCK";
1168     case Z_TREES:
1169         return "TREES";
1170     default:
1171         return "(Unknown flush)";
1172     }
1173 }
1174 
1175 version (HaveSquizBzip2)
1176 {
1177 
1178     /// Returns an InputRange containing the input data processed through Bzip2 compression.
1179     auto compressBzip2(I)(I input, size_t chunkSize = defaultChunkSize)
1180             if (isByteRange!I)
1181     {
1182         return squiz(input, CompressBzip2.init, chunkSize);
1183     }
1184 
1185     final class Bz2Stream : SquizStream
1186     {
1187         mixin ZlibLikeStreamImpl!(bz_stream);
1188 
1189         @property size_t totalInput() const @safe
1190         {
1191             ulong hi = strm.total_in_hi32;
1192             return cast(size_t)(
1193                 (hi << 32) | strm.total_in_lo32
1194             );
1195         }
1196 
1197         @property size_t totalOutput() const @safe
1198         {
1199             ulong hi = strm.total_out_hi32;
1200             return cast(size_t)(
1201                 (hi << 32) | strm.total_out_lo32
1202             );
1203         }
1204 
1205         this() @safe
1206         {
1207             strm.bzalloc = &(gcAlloc!int);
1208             strm.bzfree = &gcFree;
1209         }
1210     }
1211 
1212     /// Compression with the Bzip2 algorithm.
1213     ///
1214     /// Although having better compression capabilities than Zlib (deflate),
1215     /// Bzip2 has poor latenty when it comes to streaming.
1216     /// I.e. it can swallow several Mb of data before starting to produce output.
1217     /// If streaming latenty is an important factor, deflate/inflate
1218     /// should be the favorite algorithm.
1219     ///
1220     /// This algorithm does not support resource reuse, so calling reset
1221     /// is equivalent to a call to end followed by initialize.
1222     /// (but the same instance of stream is kept).
1223     struct CompressBzip2
1224     {
1225         static assert(isSquizAlgo!CompressBzip2);
1226 
1227         /// Advanced Bzip2 parameters
1228         /// See Bzip2 documentation
1229         /// https://www.sourceware.org/bzip2/manual/manual.html#bzcompress-init
1230         int blockSize100k = 9;
1231         /// ditto
1232         int verbosity = 0;
1233         /// ditto
1234         int workFactor = 30;
1235 
1236         alias Stream = Bz2Stream;
1237 
1238         Stream initialize() @safe
1239         {
1240             auto stream = new Stream;
1241 
1242             const res = (() @trusted => BZ2_bzCompressInit(
1243                     &stream.strm, blockSize100k, verbosity, workFactor,
1244             ))();
1245             enforce(
1246                 res == BZ_OK,
1247                 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res)
1248             );
1249             return stream;
1250         }
1251 
1252         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1253         {
1254             const action = lastChunk ? BZ_FINISH : BZ_RUN;
1255             const res = (() @trusted => BZ2_bzCompress(&stream.strm, action))();
1256 
1257             if (res == BZ_STREAM_END)
1258                 return Yes.streamEnded;
1259 
1260             enforce(
1261                 (action == BZ_RUN && res == BZ_RUN_OK) ||
1262                     (action == BZ_FINISH && res == BZ_FINISH_OK),
1263                     "Bzip2 compress failed with code: " ~ bzResultToString(res)
1264             );
1265 
1266             return No.streamEnded;
1267         }
1268 
1269         void reset(Stream stream) @safe
1270         {
1271             (() @trusted => BZ2_bzCompressEnd(&stream.strm))();
1272 
1273             stream.strm = bz_stream.init;
1274             stream.strm.bzalloc = &(gcAlloc!int);
1275             stream.strm.bzfree = &gcFree;
1276 
1277             const res = (() @trusted => BZ2_bzCompressInit(
1278                     &stream.strm, blockSize100k, verbosity, workFactor,
1279             ))();
1280             enforce(
1281                 res == BZ_OK,
1282                 "Could not initialize Bzip2 compressor: " ~ bzResultToString(res)
1283             );
1284         }
1285 
1286         void end(Stream stream) @trusted
1287         {
1288             BZ2_bzCompressEnd(&stream.strm);
1289         }
1290     }
1291 
1292     /// Returns an InputRange streaming over data decompressed with Bzip2.
1293     auto decompressBzip2(I)(I input, size_t chunkSize = defaultChunkSize)
1294             if (isByteRange!I)
1295     {
1296         return squiz(input, DecompressBzip2.init, chunkSize);
1297     }
1298 
1299     /// Decompression of data encoded with Bzip2.
1300     ///
1301     /// This algorithm does not support resource reuse, so calling reset
1302     /// is equivalent to a call to end followed by initialize.
1303     /// (but the same instance of stream is kept).
1304     struct DecompressBzip2
1305     {
1306         static assert(isSquizAlgo!DecompressBzip2);
1307 
1308         /// Advanced Bzip2 parameters
1309         /// See Bzip2 documentation
1310         /// https://www.sourceware.org/bzip2/manual/manual.html#bzDecompress-init
1311         int verbosity;
1312         /// ditto
1313         bool small;
1314 
1315         alias Stream = Bz2Stream;
1316 
1317         Stream initialize() @safe
1318         {
1319             auto stream = new Stream;
1320 
1321             const res = (() @trusted => BZ2_bzDecompressInit(
1322                     &stream.strm, verbosity, small ? 1 : 0,
1323             ))();
1324             enforce(
1325                 res == BZ_OK,
1326                 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res)
1327             );
1328             return stream;
1329         }
1330 
1331         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe
1332         {
1333             const res = (() @trusted => BZ2_bzDecompress(&stream.strm))();
1334 
1335             if (res == BZ_DATA_ERROR)
1336                 throw new DataException("Input data was not compressed with Bzip2");
1337 
1338             enforce(
1339                 res == BZ_OK || res == BZ_STREAM_END,
1340                 "Bzip2 decompress failed with code: " ~ bzResultToString(res)
1341             );
1342 
1343             return cast(Flag!"streamEnded")(res == BZ_STREAM_END);
1344         }
1345 
1346         void reset(Stream stream) @safe
1347         {
1348             (() @trusted => BZ2_bzDecompressEnd(&stream.strm))();
1349 
1350             stream.strm = bz_stream.init;
1351             stream.strm.bzalloc = &(gcAlloc!int);
1352             stream.strm.bzfree = &gcFree;
1353 
1354             const res = (() @trusted => BZ2_bzDecompressInit(
1355                     &stream.strm, verbosity, small ? 1 : 0,
1356             ))();
1357             enforce(
1358                 res == BZ_OK,
1359                 "Could not initialize Bzip2 decompressor: " ~ bzResultToString(res)
1360             );
1361         }
1362 
1363         void end(Stream stream) @trusted
1364         {
1365             BZ2_bzDecompressEnd(&stream.strm);
1366         }
1367     }
1368 
1369     ///
1370     @("Compress / Decompress Bzip2")
1371     unittest
1372     {
1373         import test.util;
1374         import std.array : join;
1375 
1376         const len = 100_000;
1377         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1378         const input = generateRepetitiveData(len, phrase).join();
1379 
1380         const squized = only(input)
1381             .compressBzip2()
1382             .join();
1383 
1384         const output = only(squized)
1385             .decompressBzip2()
1386             .join();
1387 
1388         assert(squized.length < input.length);
1389         assert(output == input);
1390 
1391         // for such long and repetitive data, ratio is around 0.12%
1392         const ratio = cast(double) squized.length / cast(double) input.length;
1393         assert(ratio < 0.002);
1394     }
1395 
1396     private string bzActionToString(int action) @safe pure nothrow @nogc
1397     {
1398         switch (action)
1399         {
1400         case BZ_RUN:
1401             return "RUN";
1402         case BZ_FLUSH:
1403             return "FLUSH";
1404         case BZ_FINISH:
1405             return "FINISH";
1406         default:
1407             return "(Unknown result)";
1408         }
1409     }
1410 
1411     private string bzResultToString(int res) @safe pure nothrow @nogc
1412     {
1413         switch (res)
1414         {
1415         case BZ_OK:
1416             return "OK";
1417         case BZ_RUN_OK:
1418             return "RUN_OK";
1419         case BZ_FLUSH_OK:
1420             return "FLUSH_OK";
1421         case BZ_FINISH_OK:
1422             return "FINISH_OK";
1423         case BZ_STREAM_END:
1424             return "STREAM_END";
1425         case BZ_SEQUENCE_ERROR:
1426             return "SEQUENCE_ERROR";
1427         case BZ_PARAM_ERROR:
1428             return "PARAM_ERROR";
1429         case BZ_MEM_ERROR:
1430             return "MEM_ERROR";
1431         case BZ_DATA_ERROR:
1432             return "DATA_ERROR";
1433         case BZ_DATA_ERROR_MAGIC:
1434             return "DATA_ERROR_MAGIC";
1435         case BZ_IO_ERROR:
1436             return "IO_ERROR";
1437         case BZ_UNEXPECTED_EOF:
1438             return "UNEXPECTED_EOF";
1439         case BZ_OUTBUFF_FULL:
1440             return "OUTBUFF_FULL";
1441         case BZ_CONFIG_ERROR:
1442             return "CONFIG_ERROR";
1443         default:
1444             return "(Unknown result)";
1445         }
1446     }
1447 }
1448 
1449 version (HaveSquizLzma)
1450 {
1451     final class LzmaStream : SquizStream
1452     {
1453         mixin ZlibLikeStreamImpl!(lzma_stream);
1454         mixin ZlibLikeTotalInOutImpl!();
1455 
1456         private lzma_allocator alloc;
1457         private lzma_options_delta optsDelta;
1458         private lzma_options_lzma optsLzma;
1459         private lzma_filter[] filterChain;
1460 
1461         this() @safe
1462         {
1463             alloc.alloc = &(gcAlloc!size_t);
1464             alloc.free = &gcFree;
1465             strm.allocator = &alloc;
1466         }
1467 
1468         private lzma_filter[] buildFilterChain(LzmaFormat format, LzmaFilter[] filters,
1469             uint preset, uint deltaDist) @safe
1470         {
1471             lzma_filter[] res;
1472             foreach (f; filters)
1473             {
1474                 final switch (f)
1475                 {
1476                 case LzmaFilter.delta:
1477                     optsDelta.dist = deltaDist;
1478                     res ~= lzma_filter(LZMA_FILTER_DELTA, cast(void*)&optsDelta);
1479                     break;
1480                 case LzmaFilter.bcjX86:
1481                     res ~= lzma_filter(LZMA_FILTER_X86, null);
1482                     break;
1483                 case LzmaFilter.bcjPowerPc:
1484                     res ~= lzma_filter(LZMA_FILTER_POWERPC, null);
1485                     break;
1486                 case LzmaFilter.bcjIa64:
1487                     res ~= lzma_filter(LZMA_FILTER_IA64, null);
1488                     break;
1489                 case LzmaFilter.bcjArm:
1490                     res ~= lzma_filter(LZMA_FILTER_ARM, null);
1491                     break;
1492                 case LzmaFilter.bcjArmThumb:
1493                     res ~= lzma_filter(LZMA_FILTER_ARMTHUMB, null);
1494                     break;
1495                 case LzmaFilter.bcjSparc:
1496                     res ~= lzma_filter(LZMA_FILTER_SPARC, null);
1497                     break;
1498                 }
1499             }
1500 
1501             enforce(res.length <= 3, "Too many filters supplied");
1502 
1503             if (format != LzmaFormat.rawCopy)
1504             {
1505                 (() @trusted => lzma_lzma_preset(&optsLzma, preset))();
1506                 const compFilter = format.isLegacy ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
1507                 res ~= lzma_filter(compFilter, cast(void*)&optsLzma);
1508             }
1509 
1510             res ~= lzma_filter(LZMA_VLI_UNKNOWN, null); // end marker
1511 
1512             filterChain = res;
1513             return res;
1514         }
1515 
1516     }
1517 
1518     /// Header/trailer format for Lzma compression
1519     enum LzmaFormat
1520     {
1521         /// Lzma with Xz format, suitable to write *.xz files
1522         xz,
1523         /// LZMA1 encoding and format, suitable for legacy *.lzma files
1524         /// This format doesn't support filters.
1525         legacy,
1526         /// Raw LZMA2 compression, without header/trailer.
1527         /// Use this to include compressed LZMA data in
1528         /// a container defined externally (e.g. this is used
1529         /// for the *.7z archives)
1530         raw,
1531         /// Raw LZMA1 compression, without header/trailer.
1532         /// This one is still found in some *.7z files.
1533         rawLegacy,
1534         /// Just copy bytes out.
1535         /// You may use this in combination with a filter to observe its
1536         /// effect, but has otherwise no use.
1537         rawCopy,
1538     }
1539 
1540     /// Whether this is a legacy format
1541     bool isLegacy(LzmaFormat format) @safe pure nothrow @nogc
1542     {
1543         return format == LzmaFormat.legacy || format == LzmaFormat.rawLegacy;
1544     }
1545 
1546     /// Whether this is a raw format
1547     bool isRaw(LzmaFormat format) @safe pure nothrow @nogc
1548     {
1549         return cast(int) format >= cast(int) LzmaFormat.raw;
1550     }
1551 
1552     /// Filters to use with the LZMA compression.
1553     ///
1554     /// Up to 3 filters can be used from this list.
1555     /// These filters transform the input to increase
1556     /// redundancy of the data supplied to the LZMA compression.
1557     enum LzmaFilter
1558     {
1559         /// Delta filter, which store differences between bytes
1560         /// to produce more repetitive data in some circumstances.
1561         /// Works with `deltaDist` parameter of `CompressLzma`.
1562         delta,
1563 
1564         /// BCJ (Branch/Call/Jump) filters aim optimize machine code
1565         /// compression by converting relative branches, calls and jumps
1566         /// to absolute addresses. This increases redundancy and can be
1567         /// exploited by the LZMA compression.
1568         ///
1569         /// BCJ filters are available for a set of CPU architectures.
1570         /// Use one (or two) of them when compressing compiled binaries.
1571         bcjX86,
1572         /// ditto
1573         bcjPowerPc,
1574         /// ditto
1575         bcjIa64,
1576         /// ditto
1577         bcjArm,
1578         /// ditto
1579         bcjArmThumb,
1580         /// ditto
1581         bcjSparc,
1582     }
1583 
1584     /// Integrity check to include in the compressed data
1585     /// (only for the Xz format)
1586     /// Default for xz is CRC-64.
1587     enum LzmaCheck
1588     {
1589         /// No integrity check included
1590         none,
1591         /// CRC-32 integrity check
1592         crc32,
1593         /// CRC-64 integrity check
1594         crc64,
1595         /// SHA-256 integrity check
1596         sha256,
1597     }
1598 
1599     private lzma_check toLzma(LzmaCheck check) @safe pure nothrow @nogc
1600     {
1601         final switch (check)
1602         {
1603         case LzmaCheck.none:
1604             return lzma_check.NONE;
1605         case LzmaCheck.crc32:
1606             return lzma_check.CRC32;
1607         case LzmaCheck.crc64:
1608             return lzma_check.CRC64;
1609         case LzmaCheck.sha256:
1610             return lzma_check.SHA256;
1611         }
1612     }
1613 
1614     auto compressXz(I)(I input, size_t chunkSize = defaultChunkSize)
1615     {
1616         return squiz(input, CompressLzma.init, chunkSize);
1617     }
1618 
1619     auto compressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize)
1620     {
1621         CompressLzma algo;
1622         algo.format = LzmaFormat.raw;
1623         return squiz(input, algo, chunkSize);
1624     }
1625 
1626     struct CompressLzma
1627     {
1628         import std.conv : to;
1629 
1630         static assert(isSquizAlgo!CompressLzma);
1631 
1632         /// The format of the compressed stream
1633         LzmaFormat format;
1634 
1635         /// The integrity check to include in compressed stream.
1636         /// Only used with XZ format.
1637         LzmaCheck check = LzmaCheck.crc64;
1638 
1639         /// The compression preset between 0 (fast) to 9 (higher compression).
1640         /// The default is 6.
1641         uint preset = 6;
1642 
1643         /// Makes the encoding significantly slower for marginal compression
1644         /// improvement. Only useful if you don't mind about CPU time at all.
1645         Flag!"extreme" extreme;
1646 
1647         /// Filters to include in the encoding.
1648         /// Maximum three filters can be provided.
1649         /// For most input, no filtering is necessary.
1650         LzmaFilter[] filters;
1651 
1652         /// Number of bytes between 1 and 256 to use for the Delta filter.
1653         /// For example for 16bit PCM stero audio, you should use 4.
1654         /// For RGB data 8bit per channel, you should use 3.
1655         uint deltaDist;
1656 
1657         alias Stream = LzmaStream;
1658 
1659         private void initStream(Stream stream) @trusted
1660         {
1661             uint pres = preset;
1662             if (extreme)
1663                 pres |= LZMA_PRESET_EXTREME;
1664 
1665             lzma_ret res;
1666             final switch (format)
1667             {
1668             case LzmaFormat.xz:
1669                 const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1670                 res = lzma_stream_encoder(&stream.strm, chain.ptr, check.toLzma());
1671                 break;
1672             case LzmaFormat.legacy:
1673                 enforce(filters.length == 0, "Filters are not supported with the legacy format");
1674                 lzma_lzma_preset(&stream.optsLzma, preset);
1675                 res = lzma_alone_encoder(&stream.strm, &stream.optsLzma);
1676                 break;
1677             case LzmaFormat.raw:
1678             case LzmaFormat.rawLegacy:
1679             case LzmaFormat.rawCopy:
1680                 const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1681                 res = lzma_raw_encoder(&stream.strm, chain.ptr);
1682                 break;
1683             }
1684 
1685             enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string);
1686         }
1687 
1688         Stream initialize() @safe
1689         {
1690             auto stream = new LzmaStream;
1691             initStream(stream);
1692             return stream;
1693         }
1694 
1695         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1696         {
1697             return lzmaCode(stream, lastChunk);
1698         }
1699 
1700         void reset(Stream stream) @safe
1701         {
1702             // Lzma supports reset out of the box by recalling initialization
1703             // function without calling lzma_end.
1704 
1705             initStream(stream);
1706         }
1707 
1708         void end(Stream stream) @trusted
1709         {
1710             lzma_end(&stream.strm);
1711         }
1712     }
1713 
1714     auto decompressXz(I)(I input, size_t chunkSize = defaultChunkSize)
1715     {
1716         return squiz(input, DecompressLzma.init, chunkSize);
1717     }
1718 
1719     auto decompressLzmaRaw(I)(I input, size_t chunkSize = defaultChunkSize)
1720     {
1721         DecompressLzma algo;
1722         algo.format = LzmaFormat.raw;
1723         return squiz(input, algo, chunkSize);
1724     }
1725 
1726     struct DecompressLzma
1727     {
1728         import std.conv : to;
1729 
1730         static assert(isSquizAlgo!DecompressLzma);
1731 
1732         /// The format of the compressed stream
1733         LzmaFormat format;
1734 
1735         /// The memory usage limit in bytes.
1736         /// by default no limit is enforced
1737         size_t memLimit = size_t.max;
1738 
1739         /// Parameters for the raw decompression.
1740         /// They are the same than for the compression.
1741         /// As there is no header to tell Lzma what filters were used during
1742         /// compression, it is the responsibility of the programmer to
1743         /// correctly ensure that the same options are used for decompression.
1744         /// All these options are ignored when decompressing .xz stream.
1745         uint preset = 6;
1746         /// ditto
1747         Flag!"extreme" extreme;
1748         /// ditto
1749         LzmaFilter[] filters;
1750         /// ditto
1751         uint deltaDist;
1752 
1753         alias Stream = LzmaStream;
1754 
1755         this(LzmaFormat format) @safe
1756         {
1757             this.format = format;
1758         }
1759 
1760         /// convenience constructor to copy parameters of the compression
1761         /// for the decompression. Especially useful for the raw decompression,
1762         /// to ensure that the parameters fit the ones used for compression.
1763         this(CompressLzma compress) @safe
1764         {
1765             format = compress.format;
1766             preset = compress.preset;
1767             extreme = compress.extreme;
1768             filters = compress.filters;
1769             deltaDist = compress.deltaDist;
1770         }
1771 
1772         private void initStream(Stream stream) @trusted
1773         {
1774             ulong memlim = memLimit;
1775             if (memLimit == size_t.max)
1776                 memlim = ulong.max;
1777 
1778             lzma_ret res;
1779 
1780             final switch (format)
1781             {
1782             case LzmaFormat.xz:
1783                 res = lzma_stream_decoder(&stream.strm, memlim, 0);
1784                 break;
1785             case LzmaFormat.legacy:
1786                 res = lzma_alone_decoder(&stream.strm, memlim);
1787                 break;
1788             case LzmaFormat.raw:
1789             case LzmaFormat.rawLegacy:
1790             case LzmaFormat.rawCopy:
1791                 uint pres = preset;
1792                 if (extreme)
1793                     pres |= LZMA_PRESET_EXTREME;
1794 
1795                 const chain = stream.buildFilterChain(format, filters, pres, deltaDist);
1796 
1797                 res = lzma_raw_decoder(&stream.strm, chain.ptr);
1798             }
1799             enforce(res == lzma_ret.OK, "Could not initialize LZMA encoder: ", res.to!string);
1800         }
1801 
1802         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
1803         {
1804             return lzmaCode(stream, lastChunk);
1805         }
1806 
1807         Stream initialize() @safe
1808         {
1809             auto stream = new LzmaStream;
1810             initStream(stream);
1811             return stream;
1812         }
1813 
1814         void reset(Stream stream) @safe
1815         {
1816             // Lzma supports reset out of the box by recalling initialization
1817             // function without calling lzma_end.
1818 
1819             initStream(stream);
1820         }
1821 
1822         void end(Stream stream) @trusted
1823         {
1824             lzma_end(&stream.strm);
1825         }
1826     }
1827 
1828     private Flag!"streamEnded" lzmaCode(LzmaStream stream, Flag!"lastChunk" lastChunk) @safe
1829     {
1830         import std.conv : to;
1831 
1832         const action = lastChunk ? lzma_action.FINISH : lzma_action.RUN;
1833         const res = (() @trusted => lzma_code(&stream.strm, action))();
1834 
1835         enforce(
1836             res == lzma_ret.OK || res == lzma_ret.STREAM_END,
1837             "LZMA encoding failed with code: " ~ res.to!string
1838         );
1839 
1840         return cast(Flag!"streamEnded")(res == lzma_ret.STREAM_END);
1841     }
1842 
1843     ///
1844     @("Compress / Decompress XZ")
1845     unittest
1846     {
1847         import test.util;
1848         import std.array : join;
1849 
1850         const len = 100_000;
1851         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1852         const input = generateRepetitiveData(len, phrase).join();
1853 
1854         const squized = only(input)
1855             .compressXz()
1856             .join();
1857 
1858         const output = only(squized)
1859             .decompressXz()
1860             .join();
1861 
1862         assert(squized.length < input.length);
1863         assert(output == input);
1864 
1865         // for such long and repetitive data, ratio is around 0.2%
1866         const ratio = cast(double) squized.length / cast(double) input.length;
1867         assert(ratio < 0.003);
1868     }
1869 
1870     ///
1871     @("Integrity check XZ")
1872     unittest
1873     {
1874         import test.util;
1875         import std.array : join;
1876 
1877         const len = 100_000;
1878         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1879         const input = generateRepetitiveData(len, phrase).join();
1880 
1881         auto squized = only(input)
1882             .compressXz()
1883             .join()
1884             .dup; // dup because const(ubyte)[] is returned
1885 
1886         squized[squized.length / 2] += 1;
1887 
1888         assertThrown(
1889             only(squized)
1890                 .decompressXz()
1891                 .join()
1892         );
1893     }
1894 
1895     ///
1896     @("Compress / Decompress XZ with filter")
1897     unittest
1898     {
1899         import test.util;
1900         import std.array : join;
1901 
1902         const len = 100_000;
1903         const input = generateSequentialData(len, 1245, 27).join();
1904 
1905         const reference = only(input)
1906             .compressXz()
1907             .join();
1908 
1909         CompressLzma comp;
1910         comp.filters ~= LzmaFilter.delta;
1911         comp.deltaDist = 8; // sequential data of 8 byte integers
1912 
1913         const withDelta = only(input)
1914             .squiz(comp)
1915             .join();
1916 
1917         const output = only(withDelta)
1918             .decompressXz()
1919             .join();
1920 
1921         assert(output == input);
1922         // < 20% compression without filter (sequential data is tough)
1923         // < 0.5% compression with delta (peace of cake)
1924         assert(input.length > reference.length * 5);
1925         assert(input.length > withDelta.length * 200);
1926     }
1927 
1928     ///
1929     @("Compress / Decompress Lzma Raw")
1930     unittest
1931     {
1932         import test.util;
1933         import std.array : join;
1934 
1935         const len = 100_000;
1936         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
1937         const input = generateRepetitiveData(len, phrase).join();
1938 
1939         const reference = only(input)
1940             .compressXz()
1941             .join();
1942 
1943         const squized = only(input)
1944             .compressLzmaRaw()
1945             .join();
1946 
1947         const output = only(squized)
1948             .decompressLzmaRaw()
1949             .join();
1950 
1951         assert(output == input);
1952         assert(squized.length < input.length);
1953         assert(squized.length < reference.length); // win header/trailer space
1954 
1955         // for such repetitive data, ratio is around 1.13%
1956         // also generally better than zlib, bzip2 struggles a lot for repetitive data
1957         const ratio = cast(double) squized.length / cast(double) input.length;
1958         assert(ratio < 0.003);
1959     }
1960 
1961     ///
1962     @("Compress / Decompress Lzma Raw with filter")
1963     unittest
1964     {
1965         import test.util;
1966         import std.array : join;
1967 
1968         const len = 100_000;
1969         const input = generateSequentialData(len, 1245, 27).join();
1970 
1971         const reference = only(input)
1972             .compressLzmaRaw()
1973             .join();
1974 
1975         CompressLzma comp;
1976         comp.format = LzmaFormat.raw;
1977         comp.filters ~= LzmaFilter.delta;
1978         comp.deltaDist = 8; // sequential data of 8 byte integers
1979 
1980         const withDelta = only(input)
1981             .squiz(comp)
1982             .join();
1983 
1984         const output = only(withDelta) // using compression parameters for decompression
1985         .squiz(DecompressLzma(comp))
1986             .join();
1987 
1988         assert(output == input);
1989         // < 20% compression without filter (sequential data is tough)
1990         // < 0.4% compression with delta (peace of cake)
1991         assert(input.length > reference.length * 5);
1992         assert(input.length > withDelta.length * 250);
1993     }
1994 
1995     ///
1996     @("Compress / Decompress Lzma Legacy")
1997     unittest
1998     {
1999         import test.util;
2000         import std.array : join;
2001 
2002         const len = 100_000;
2003         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
2004         const input = generateRepetitiveData(len, phrase).join();
2005 
2006         auto comp = CompressLzma(LzmaFormat.legacy);
2007         auto decomp = DecompressLzma(comp);
2008 
2009         const squized = only(input)
2010             .squiz(comp)
2011             .join();
2012 
2013         const output = only(squized)
2014             .squiz(decomp)
2015             .join();
2016 
2017         assert(squized.length < input.length);
2018         assert(output == input);
2019 
2020         // for such repetitive data, ratio is around 1.13%
2021         // also generally better than zlib, bzip2 struggles a lot for repetitive data
2022         const ratio = cast(double) squized.length / cast(double) input.length;
2023         assert(ratio < 0.003);
2024     }
2025 
2026     ///
2027     @("Compress / Decompress Lzma Raw Legacy")
2028     unittest
2029     {
2030         import test.util;
2031         import std.array : join;
2032 
2033         const len = 100_000;
2034         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
2035         const input = generateRepetitiveData(len, phrase).join();
2036 
2037         auto comp = CompressLzma(LzmaFormat.rawLegacy);
2038         auto decomp = DecompressLzma(comp);
2039 
2040         const squized = only(input)
2041             .squiz(comp)
2042             .join();
2043 
2044         const output = only(squized)
2045             .squiz(decomp)
2046             .join();
2047 
2048         assert(squized.length < input.length);
2049         assert(output == input);
2050 
2051         // for such repetitive data, ratio is around 1.13%
2052         // also generally better than zlib, bzip2 struggles a lot for repetitive data
2053         const ratio = cast(double) squized.length / cast(double) input.length;
2054         assert(ratio < 0.003);
2055     }
2056 
2057     ///
2058     @("Compress / Decompress Lzma rawLegacy with filter")
2059     unittest
2060     {
2061         import test.util;
2062         import std.array : join;
2063 
2064         const len = 100_000;
2065         const input = generateSequentialData(len, 1245, 27).join();
2066 
2067         const reference = only(input)
2068             .squiz(CompressLzma(LzmaFormat.legacy))
2069             .join();
2070 
2071         CompressLzma comp;
2072         comp.format = LzmaFormat.rawLegacy;
2073         comp.filters ~= LzmaFilter.delta;
2074         comp.deltaDist = 8; // sequential data of 8 byte integers
2075 
2076         auto decomp = DecompressLzma(comp);
2077 
2078         const withDelta = only(input)
2079             .squiz(comp)
2080             .join();
2081 
2082         const output = only(withDelta)
2083             .squiz(decomp)
2084             .join();
2085 
2086         assert(output == input);
2087         // < 20% compression without filter (sequential data is tough)
2088         // < 0.4% compression with delta (peace of cake)
2089         assert(input.length > reference.length * 5);
2090         assert(input.length > withDelta.length * 250);
2091     }
2092 }
2093 
2094 version (HaveSquizZstandard)
2095 {
2096     auto compressZstd(I)(I input, size_t chunkSize = defaultChunkSize)
2097     {
2098         return squiz(input, CompressZstd.init, chunkSize);
2099     }
2100 
2101     auto decompressZstd(I)(I input, size_t chunkSize = defaultChunkSize)
2102     {
2103         return squiz(input, DecompressZstd.init, chunkSize);
2104     }
2105 
2106     class ZstdStream : SquizStream
2107     {
2108         private ZSTD_inBuffer inBuf;
2109         private ZSTD_outBuffer outBuf;
2110         private size_t totalIn;
2111         private size_t totalOut;
2112 
2113         @property const(ubyte)[] input() const @trusted
2114         {
2115             auto ptr = cast(const(ubyte)*) inBuf.src;
2116             return ptr[inBuf.pos .. inBuf.size];
2117         }
2118 
2119         @property void input(const(ubyte)[] inp) @trusted
2120         {
2121             totalIn += inBuf.pos;
2122             inBuf.pos = 0;
2123             inBuf.src = cast(const(void)*) inp.ptr;
2124             inBuf.size = inp.length;
2125         }
2126 
2127         @property size_t totalInput() const @safe
2128         {
2129             return totalIn + inBuf.pos;
2130         }
2131 
2132         @property inout(ubyte)[] output() inout @trusted
2133         {
2134             auto ptr = cast(inout(ubyte)*) outBuf.dst;
2135             return ptr[outBuf.pos .. outBuf.size];
2136         }
2137 
2138         @property void output(ubyte[] outp) @trusted
2139         {
2140             totalOut += outBuf.pos;
2141             outBuf.pos = 0;
2142             outBuf.dst = cast(void*) outp.ptr;
2143             outBuf.size = outp.length;
2144         }
2145 
2146         @property size_t totalOutput() const @safe
2147         {
2148             return totalOut + outBuf.pos;
2149         }
2150 
2151         override string toString() const @safe
2152         {
2153             import std.format : format;
2154 
2155             string res;
2156             res ~= "ZstdStream:\n";
2157             res ~= "  Input:\n";
2158             res ~= format!"    start 0x%016x\n"(inBuf.src);
2159             res ~= format!"    pos %s\n"(inBuf.pos);
2160             res ~= format!"    size %s\n"(inBuf.size);
2161             res ~= format!"    total %s\n"(totalInput);
2162             res ~= "  Output:\n";
2163             res ~= format!"    start 0x%016x\n"(outBuf.dst);
2164             res ~= format!"    pos %s\n"(outBuf.pos);
2165             res ~= format!"    size %s\n"(outBuf.size);
2166             res ~= format!"    total %s"(totalOutput);
2167 
2168             return res;
2169         }
2170     }
2171 
2172     private string zstdSetCParam(string name)
2173     {
2174         return "if (" ~ name ~ ") " ~
2175             "ZSTD_CCtx_setParameter(cctx, ZSTD_cParameter." ~ name ~ ", " ~ name ~ ");";
2176     }
2177 
2178     private void zstdError(size_t code, string desc) @trusted
2179     {
2180         import std.string : fromStringz;
2181 
2182         if (ZSTD_isError(code))
2183         {
2184             const msg = fromStringz(ZSTD_getErrorName(code));
2185             throw new Exception((desc ~ ": " ~ msg).idup);
2186         }
2187     }
2188 
2189     /// Zstandard is a fast compression algorithm designed for streaming.
2190     /// See zstd.h (enum ZSTD_cParameter) for details.
2191     struct CompressZstd
2192     {
2193         static assert(isSquizAlgo!CompressZstd);
2194 
2195         /// Common paramters.
2196         /// A value of zero indicates that the default should be used.
2197         int compressionLevel;
2198         /// ditto
2199         int windowLog;
2200         /// ditto
2201         int hashLog;
2202         /// ditto
2203         int chainLog;
2204         /// ditto
2205         int searchLog;
2206         /// ditto
2207         int minMatch;
2208         /// ditto
2209         int targetLength;
2210         /// ditto
2211         int strategy;
2212 
2213         /// Long distance matching parameters (LDM)
2214         /// Can be activated for large inputs to improve the compression ratio.
2215         /// Increases memory usage and the window size
2216         /// A value of zero indicate that the default should be used.
2217         bool enableLongDistanceMatching;
2218         /// ditto
2219         int ldmHashLog;
2220         /// ditto
2221         int ldmMinMatch;
2222         /// ditto
2223         int ldmBucketSizeLog;
2224         /// ditto
2225         int ldmHashRateLog;
2226 
2227         // frame parameters
2228 
2229         /// If input data content size is known, before
2230         /// start of streaming, set contentSize to its value.
2231         /// It will enable the size to be written in the header
2232         /// and checked after decompression.
2233         ulong contentSize = ulong.max;
2234         /// Include a checksum of the content in the trailer.
2235         bool checksumFlag = false;
2236         /// When applicable, dictionary's ID is written in the header
2237         bool dictIdFlag = true;
2238 
2239         /// Multi-threading parameters
2240         int nbWorkers;
2241         /// ditto
2242         int jobSize;
2243         /// ditto
2244         int overlapLog;
2245 
2246         static final class Stream : ZstdStream
2247         {
2248             private ZSTD_CStream* strm;
2249         }
2250 
2251         private void setParams(Stream stream) @trusted
2252         {
2253             auto cctx = cast(ZSTD_CCtx*) stream.strm;
2254 
2255             mixin(zstdSetCParam("compressionLevel"));
2256             mixin(zstdSetCParam("windowLog"));
2257             mixin(zstdSetCParam("hashLog"));
2258             mixin(zstdSetCParam("chainLog"));
2259             mixin(zstdSetCParam("searchLog"));
2260             mixin(zstdSetCParam("minMatch"));
2261             mixin(zstdSetCParam("targetLength"));
2262             mixin(zstdSetCParam("strategy"));
2263 
2264             if (enableLongDistanceMatching)
2265             {
2266                 ZSTD_CCtx_setParameter(cctx,
2267                     ZSTD_cParameter.enableLongDistanceMatching,
2268                     1
2269                 );
2270 
2271                 mixin(zstdSetCParam("ldmHashLog"));
2272                 mixin(zstdSetCParam("ldmMinMatch"));
2273                 mixin(zstdSetCParam("ldmBucketSizeLog"));
2274                 mixin(zstdSetCParam("ldmHashRateLog"));
2275             }
2276 
2277             if (contentSize != size_t.max)
2278                 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize);
2279             if (checksumFlag)
2280                 ZSTD_CCtx_setParameter(
2281                     cctx,
2282                     ZSTD_cParameter.checksumFlag,
2283                     1
2284                 );
2285             if (!dictIdFlag)
2286                 ZSTD_CCtx_setParameter(
2287                     cctx,
2288                     ZSTD_cParameter.checksumFlag,
2289                     0
2290                 );
2291 
2292             mixin(zstdSetCParam("nbWorkers"));
2293             mixin(zstdSetCParam("jobSize"));
2294             mixin(zstdSetCParam("overlapLog"));
2295         }
2296 
2297         Stream initialize() @trusted
2298         {
2299             auto stream = new Stream;
2300 
2301             stream.strm = ZSTD_createCStream();
2302 
2303             setParams(stream);
2304 
2305             return stream;
2306         }
2307 
2308         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk" lastChunk) @safe
2309         {
2310             auto cctx = cast(ZSTD_CCtx*) stream.strm;
2311             const directive = lastChunk ? ZSTD_EndDirective.end : ZSTD_EndDirective._continue;
2312 
2313             const res = (() @trusted => ZSTD_compressStream2(cctx, &stream.outBuf, &stream.inBuf, directive))();
2314 
2315             zstdError(res, "Could not compress data with Zstandard");
2316             return cast(Flag!"streamEnded")(lastChunk && res == 0);
2317         }
2318 
2319         void reset(Stream stream) @trusted
2320         {
2321             auto cctx = cast(ZSTD_CCtx*) stream.strm;
2322             ZSTD_CCtx_reset(cctx, ZSTD_ResetDirective.session_only);
2323 
2324             if (contentSize != size_t.max)
2325                 ZSTD_CCtx_setPledgedSrcSize(cctx, contentSize);
2326 
2327             stream.inBuf = ZSTD_inBuffer.init;
2328             stream.outBuf = ZSTD_outBuffer.init;
2329             stream.totalIn = 0;
2330             stream.totalOut = 0;
2331         }
2332 
2333         void end(Stream stream) @trusted
2334         {
2335             ZSTD_freeCStream(stream.strm);
2336         }
2337     }
2338 
2339     struct DecompressZstd
2340     {
2341         static assert(isSquizAlgo!DecompressZstd);
2342 
2343         int windowLogMax;
2344 
2345         static final class Stream : ZstdStream
2346         {
2347             private ZSTD_DStream* strm;
2348         }
2349 
2350         private void setParams(Stream stream) @trusted
2351         {
2352             auto dctx = cast(ZSTD_DCtx*) stream.strm;
2353 
2354             if (windowLogMax)
2355                 ZSTD_DCtx_setParameter(dctx,
2356                     ZSTD_dParameter.windowLogMax, windowLogMax);
2357         }
2358 
2359         Stream initialize() @trusted
2360         {
2361             auto stream = new Stream;
2362 
2363             stream.strm = ZSTD_createDStream();
2364 
2365             setParams(stream);
2366 
2367             return stream;
2368         }
2369 
2370         Flag!"streamEnded" process(Stream stream, Flag!"lastChunk") @safe
2371         {
2372             const res = (() @trusted => ZSTD_decompressStream(stream.strm, &stream.outBuf, &stream
2373                     .inBuf))();
2374 
2375             zstdError(res, "Could not decompress data with Zstandard");
2376             return cast(Flag!"streamEnded")(res == 0);
2377         }
2378 
2379         void reset(Stream stream) @trusted
2380         {
2381             auto dctx = cast(ZSTD_DCtx*) stream.strm;
2382             ZSTD_DCtx_reset(dctx, ZSTD_ResetDirective.session_only);
2383         }
2384 
2385         void end(Stream stream) @trusted
2386         {
2387             ZSTD_freeDStream(stream.strm);
2388         }
2389     }
2390 
2391     ///
2392     @("Compress / Decompress Zstandard")
2393     unittest
2394     {
2395         import test.util;
2396         import std.array : join;
2397 
2398         const len = 100_000;
2399         const phrase = cast(const(ubyte)[]) "Some very repetitive phrase.\n";
2400         const input = generateRepetitiveData(len, phrase).join();
2401 
2402         const squized = only(input)
2403             .compressZstd()
2404             .join();
2405 
2406         const output = only(squized)
2407             .decompressZstd()
2408             .join();
2409 
2410         assert(squized.length < input.length);
2411         assert(output == input);
2412 
2413         // for such long and repetitive data, ratio is around 0.047%
2414         const ratio = cast(double) squized.length / cast(double) input.length;
2415         assert(ratio < 0.0005);
2416     }
2417 
2418 }