1 module squiz_box.priv;
2 
3 package(squiz_box):
4 
5 import squiz_box.squiz : ByteChunk, defaultChunkSize, isByteRange;
6 
7 import std.datetime.systime;
8 import std.exception;
9 import std.traits : isDynamicArray, isIntegral;
10 
11 extern (C) void* gcAlloc(T)(void* opaque, T n, T m) nothrow if (isIntegral!T)
12 {
13     import core.memory : GC;
14 
15     return GC.malloc(n * m);
16 }
17 
18 extern (C) void gcFree(void* opaque, void* addr) nothrow
19 {
20     import core.memory : GC;
21 
22     GC.free(addr);
23 }
24 
25 /// A forward only data input iterator
26 /// This is used either as adapter to File reading
27 /// or as adapter to by-chunk data where arbitrary read length
28 /// ease the implementation of an algorithm
29 interface Cursor
30 {
31     /// The name of the data source
32     /// For FileCursor, this will be the filename,
33     /// otherwise a description of the type of data.
34     @property string name();
35 
36     /// Position in the stream (how many bytes read so far)
37     @property ulong pos();
38 
39     /// Whether end-of-input was reached
40     @property bool eoi();
41 
42     /// Fast-forward and discard dist bytes.
43     /// Passing size_t.max will exhaust the input.
44     void ffw(ulong dist);
45 
46     /// Reads a single byte
47     ubyte get()
48     in (!eoi);
49 
50     /// Get a value of type T.
51     /// Is a convenience over readValue.
52     T getValue(T)()
53     {
54         T val = void;
55         readValue(&val);
56         return val;
57     }
58 
59     /// Read up to buffer.length bytes into buffer and return what was read.
60     /// Returns a smaller slice only if EOI was reached.
61     ubyte[] read(ubyte[] buffer);
62 
63     /// Read T.sizeof data and returns it as a T.
64     /// Similar to get!T but the value is passed as pointer to be filled in.
65     /// Prefer this form for greater values (e.g. dozens of bytes)
66     void readValue(T)(T* val) if (!isDynamicArray!T)
67     {
68         import std.exception : enforce;
69 
70         auto ptr = cast(ubyte*) val;
71         auto buf = ptr[0 .. T.sizeof];
72         auto res = read(buf);
73         enforce(res.length == T.sizeof, "Could not read enough bytes for " ~ T.stringof);
74     }
75 
76     T[] read(T)(T[] buffer)
77     {
78         auto ptr = cast(ubyte)&buffer[0];
79         auto arr = ptr[0 .. buffer.length * T.sizeof];
80         auto res = read(arr);
81         enforce(res.length % T.sizeof == 0, "Could not read aligned bytes for " ~ T.stringof);
82         if (!res.length)
83             return [];
84         auto resptr = cast(T*)&res[0];
85         return resptr[0 .. res.length / T.sizeof];
86     }
87 }
88 
89 /// A Cursor whose size is known and that can seek to arbitrary positions,
90 /// including backwards.
91 interface SearchableCursor : Cursor
92 {
93     /// Complete size of the data
94     @property ulong size();
95 
96     /// Seek to a new position (relative to beginning)
97     void seek(ulong pos);
98 }
99 
100 /// Range based data input
101 final class ByteRangeCursor(BR) : Cursor if (isByteRange!BR)
102 {
103     private BR _input;
104     private string _name;
105     private ulong _pos;
106     private ByteChunk _chunk;
107 
108     this(BR input, string name = "Byte Range")
109     {
110         _input = input;
111         _name = name;
112 
113         if (!_input.empty)
114             _chunk = _input.front;
115     }
116 
117     @property string name()
118     {
119         return _name;
120     }
121 
122     @property ulong pos()
123     {
124         return _pos;
125     }
126 
127     @property bool eoi()
128     {
129         return _chunk.length == 0;
130     }
131 
132     void ffw(ulong dist)
133     {
134         import std.algorithm : min;
135 
136         while (dist > 0 && _chunk.length)
137         {
138             const len = cast(size_t) min(_chunk.length, dist);
139             _chunk = _chunk[len .. $];
140             _pos += len;
141             dist -= len;
142 
143             if (_chunk.length == 0)
144             {
145                 _input.popFront();
146                 if (!_input.empty)
147                     _chunk = _input.front;
148                 else
149                     break;
150             }
151         }
152     }
153 
154     ubyte get()
155     {
156         const res = _chunk[0];
157         _chunk = _chunk[1 .. $];
158         if (_chunk.length == 0)
159         {
160             _input.popFront();
161             if (!_input.empty)
162                 _chunk = _input.front;
163         }
164         return res;
165     }
166 
167     ubyte[] read(ubyte[] buffer)
168     {
169         import std.algorithm : min;
170 
171         size_t filled;
172 
173         while (_chunk.length && filled != buffer.length)
174         {
175             const len = min(_chunk.length, buffer.length - filled);
176             buffer[filled .. filled + len] = _chunk[0 .. len];
177 
178             _pos += len;
179 
180             filled += len;
181             _chunk = _chunk[len .. $];
182 
183             if (_chunk.length == 0)
184             {
185                 _input.popFront();
186                 if (!_input.empty)
187                     _chunk = _input.front;
188                 else
189                     break;
190             }
191         }
192         return buffer[0 .. filled];
193     }
194 }
195 
196 final class ArrayCursor : SearchableCursor
197 {
198     private const(ubyte)[] _array;
199     private size_t _pos;
200     private string _name;
201 
202     this(const(ubyte)[] array, string name = "Byte Array")
203     {
204         _array = array;
205         _name = name;
206     }
207 
208     @property string name()
209     {
210         return _name;
211     }
212 
213     @property ulong pos()
214     {
215         return _pos;
216     }
217 
218     @property bool eoi()
219     {
220         return _pos == _array.length;
221     }
222 
223     @property ulong size()
224     {
225         return _array.length;
226     }
227 
228     void seek(ulong pos)
229     {
230         import std.algorithm : min;
231 
232         _pos = cast(size_t) min(pos, _array.length);
233     }
234 
235     void ffw(ulong dist)
236     {
237         seek(pos + dist);
238     }
239 
240     ubyte get()
241     {
242         enforce(_pos < _array.length, "No more bytes");
243         return _array[_pos++];
244     }
245 
246     ubyte[] read(ubyte[] buffer)
247     {
248         import std.algorithm : min;
249 
250         const l = min(buffer.length, _array.length - _pos);
251         buffer[0 .. l] = _array[_pos .. _pos + l];
252         _pos += l;
253         return buffer[0 .. l];
254     }
255 
256     /// Read up to len bytes from the inner array and return what was read.
257     /// Returns an array smaller than len only if EOI was reached.
258     const(ubyte)[] readInner(size_t len)
259     {
260         import std.algorithm : min;
261 
262         const l = min(len, _array.length - _pos);
263         const p = _pos;
264         _pos += l;
265         return _array[p .. p + l];
266     }
267 }
268 
269 /// The cursor MUST have exclusive access on the file
270 final class FileCursor : SearchableCursor
271 {
272     import std.stdio : File;
273 
274     File _file;
275     ulong _pos;
276     ulong _start;
277     ulong _end;
278     ubyte[] _buffer;
279 
280     this(File file, size_t bufferSize = defaultChunkSize, ulong start = 0, ulong end = ulong.max)
281     in (start <= end)
282     {
283         import std.algorithm : min;
284         import std.exception : enforce;
285         import std.stdio : LockType;
286 
287         enforce(file.isOpen, "File is not open");
288         file.lock(LockType.read);
289         _file = file;
290         _start = start;
291 
292         const fs = file.size;
293         enforce(fs < ulong.max, "File is not searchable");
294         _end = min(fs, end);
295         enforce(_start <= _end, "Bounds out of File range");
296 
297         const bufSize = min(bufferSize, _end - _start);
298         if (bufSize > 0)
299             _buffer = new ubyte[bufSize];
300     }
301 
302     @property string name()
303     {
304         return _file.name;
305     }
306 
307     @property ulong pos()
308     {
309         return _pos;
310     }
311 
312     @property bool eoi()
313     {
314         return _pos == _end;
315     }
316 
317     @property ulong size()
318     {
319         return _end - _start;
320     }
321 
322     void seek(ulong pos)
323     {
324         _pos = pos;
325         _file.seek(pos);
326     }
327 
328     void ffw(ulong dist)
329     {
330         seek(pos + dist);
331     }
332 
333     ubyte get()
334     {
335         // for efficiency we use getc
336         import core.stdc.stdio : getc;
337 
338         assert(_file.tell == _pos);
339 
340         _pos++;
341         enforce(_pos <= _end, "No more bytes");
342         return cast(ubyte) getc(_file.getFP());
343     }
344 
345     ubyte[] read(ubyte[] buffer)
346     {
347         import std.algorithm : min;
348 
349         assert(_file.tell == _pos);
350 
351         const len = cast(size_t) min(buffer.length, _end - _pos);
352         auto result = _file.rawRead(buffer[0 .. len]);
353         _pos += result.length;
354         assert(_pos <= _end);
355         return result;
356     }
357 }
358 
359 auto cursorByteRange(C)(C cursor, ulong len = ulong.max, size_t chunkSize = defaultChunkSize)
360         if (is(C : Cursor))
361 {
362     import std.algorithm : min;
363     import std.range : only;
364 
365     static if (is(C : ArrayCursor))
366     {
367         const l = cast(size_t) min(len, cursor._array.length - cursor._pos);
368         return only(cursor._array[cursor._pos .. cursor._pos + l]);
369     }
370     else
371     {
372         return CursorByteRange!C(cursor, len, cast(size_t) min(len, chunkSize));
373     }
374 }
375 
376 /// ByteRange that takes its data from Cursor.
377 /// Optionally stopping before data is exhausted.
378 struct CursorByteRange(C) if (is(C : Cursor))
379 {
380     private C _input;
381     private ulong _len;
382     private ubyte[] _buffer;
383     private ubyte[] _chunk;
384 
385     this(C input, ulong len = ulong.max, size_t chunkSize = 4096)
386     {
387         _input = input;
388         _len = len;
389         _buffer = new ubyte[chunkSize];
390         if (!_input.eoi)
391             prime();
392     }
393 
394     private void prime()
395     {
396         import std.algorithm : min;
397 
398         const len = cast(size_t) min(_buffer.length, _len);
399         if (len == 0)
400             _chunk = null;
401         else
402             _chunk = _input.read(_buffer[0 .. len]);
403         _len -= len;
404     }
405 
406     @property bool empty()
407     {
408         return (_input.eoi || _len == 0) && _chunk.length == 0;
409     }
410 
411     @property ByteChunk front()
412     {
413         return _chunk;
414     }
415 
416     void popFront()
417     {
418         if (!_input.eoi)
419             prime();
420         else
421             _chunk = null;
422     }
423 }
424 
425 // Common algorithm for all compression/decompression functions.
426 // I is a byte input range
427 // P is a stream processor
428 // This common struct is made possible thanks to the great job of the zlib, bzip2 and lzma authors.
429 // Zlib authors have defined an extremly versatile stream interface that bzip2 and lzma authors have reused.
430 struct CompressDecompressAlgo(I, P)
431 {
432     /// Byte input range (by chunks)
433     I input;
434     /// Processed stream (with common interface by zlib, bzip2 and lzma)
435     P.Stream* stream;
436 
437     /// Byte chunk from the input, provided to the stream
438     ByteChunk inChunk;
439 
440     /// Buffer used to read from stream
441     ubyte[] outBuffer;
442     /// Slice of the buffer that is valid for read out
443     ByteChunk outChunk;
444 
445     /// Whether the end of stream was reported by the Policy
446     bool ended;
447 
448     this(I input, P.Stream* stream, ubyte[] outBuffer)
449     {
450         this.input = input;
451         this.stream = stream;
452         this.outBuffer = outBuffer;
453         prime();
454     }
455 
456     @property bool empty()
457     {
458         return outChunk.length == 0;
459     }
460 
461     @property ByteChunk front()
462     {
463         return outChunk;
464     }
465 
466     void popFront()
467     {
468         outChunk = null;
469         if (!ended)
470             prime();
471     }
472 
473     private void prime()
474     {
475         while (outChunk.length < outBuffer.length)
476         {
477             if (inChunk.length == 0 && !input.empty)
478                 inChunk = input.front;
479 
480             stream.next_in = inChunk.ptr;
481             stream.avail_in = cast(typeof(stream.avail_in)) inChunk.length;
482 
483             stream.next_out = outBuffer.ptr + outChunk.length;
484             stream.avail_out = cast(typeof(stream.avail_out))(outBuffer.length - outChunk.length);
485 
486             const streamEnded = P.process(stream, input.empty);
487 
488             const readIn = inChunk.length - stream.avail_in;
489             inChunk = inChunk[readIn .. $];
490 
491             const outEnd = outBuffer.length - stream.avail_out;
492             outChunk = outBuffer[0 .. outEnd];
493 
494             // popFront must be called at the end because it invalidates inChunk
495             if (inChunk.length == 0 && !input.empty)
496                 input.popFront();
497 
498             if (streamEnded)
499             {
500                 P.end(stream);
501                 ended = true;
502                 break;
503             }
504         }
505     }
506 }
507 
508 struct LittleEndian(size_t sz) if (sz == 2 || sz == 4 || sz == 8)
509 {
510     static if (sz == 2)
511     {
512         alias T = ushort;
513     }
514     static if (sz == 4)
515     {
516         alias T = uint;
517     }
518     static if (sz == 8)
519     {
520         alias T = ulong;
521     }
522 
523     ubyte[sz] data;
524 
525     this(T val) pure @safe @nogc nothrow
526     {
527         import std.bitmanip : nativeToLittleEndian;
528 
529         data = nativeToLittleEndian(val);
530     }
531 
532     @property void val(T val) pure @safe @nogc nothrow
533     {
534         import std.bitmanip : nativeToLittleEndian;
535 
536         data = nativeToLittleEndian(val);
537     }
538 
539     @property T val() const pure @safe @nogc nothrow
540     {
541         import std.bitmanip : littleEndianToNative;
542 
543         return littleEndianToNative!(T, sz)(data);
544     }
545 
546     auto opAssign(T value)
547     {
548         val = value;
549         return this;
550     }
551 
552     bool opEquals(const T rhs) const
553     {
554         return val == rhs;
555     }
556 
557     size_t toHash() const @nogc @safe pure nothrow
558     {
559         return val.hashOf();
560     }
561 
562     int opCmp(const T rhs) const
563     {
564         const lhs = val;
565         if (lhs < rhs)
566             return -1;
567         if (lhs > rhs)
568             return 1;
569         return 0;
570     }
571 }
572 
573 static assert((LittleEndian!2).sizeof == 2);
574 static assert((LittleEndian!4).sizeof == 4);
575 static assert((LittleEndian!8).sizeof == 8);
576 
577 /// same as std.stdio.File.byChunk but returns const(ubyte)[]
578 struct ByChunkImpl
579 {
580     import std.stdio : File;
581 
582 private:
583     File    file_;
584     ubyte[] chunk_;
585 
586     void prime()
587     {
588         chunk_ = file_.rawRead(chunk_);
589         if (chunk_.length == 0)
590             file_.detach();
591     }
592 
593 public:
594     this(File file, size_t size)
595     {
596         this(file, new ubyte[](size));
597     }
598 
599     this(File file, ubyte[] buffer)
600     {
601         import std.exception : enforce;
602         enforce(buffer.length, "size must be larger than 0");
603         file_ = file;
604         chunk_ = buffer;
605         prime();
606     }
607 
608     // `ByChunk`'s input range primitive operations.
609     @property nothrow
610     bool empty() const
611     {
612         return !file_.isOpen;
613     }
614 
615     /// Ditto
616     @property nothrow
617     const(ubyte)[] front()
618     {
619         version (assert)
620         {
621             import core.exception : RangeError;
622             if (empty)
623                 throw new RangeError();
624         }
625         return chunk_;
626     }
627 
628     /// Ditto
629     void popFront()
630     {
631         version (assert)
632         {
633             import core.exception : RangeError;
634             if (empty)
635                 throw new RangeError();
636         }
637         prime();
638     }
639 }