1 module squiz_box.priv; 2 3 package(squiz_box): 4 5 import squiz_box.squiz : ByteChunk, defaultChunkSize, isByteRange; 6 7 import std.datetime.systime; 8 import std.exception; 9 import std.traits : isDynamicArray, isIntegral; 10 11 extern (C) void* gcAlloc(T)(void* opaque, T n, T m) nothrow if (isIntegral!T) 12 { 13 import core.memory : GC; 14 15 return GC.malloc(n * m); 16 } 17 18 extern (C) void gcFree(void* opaque, void* addr) nothrow 19 { 20 import core.memory : GC; 21 22 GC.free(addr); 23 } 24 25 /// A forward only data input iterator 26 /// This is used either as adapter to File reading 27 /// or as adapter to by-chunk data where arbitrary read length 28 /// ease the implementation of an algorithm 29 interface Cursor 30 { 31 /// The name of the data source 32 /// For FileCursor, this will be the filename, 33 /// otherwise a description of the type of data. 34 @property string name(); 35 36 /// Position in the stream (how many bytes read so far) 37 @property ulong pos(); 38 39 /// Whether end-of-input was reached 40 @property bool eoi(); 41 42 /// Fast-forward and discard dist bytes. 43 /// Passing size_t.max will exhaust the input. 44 void ffw(ulong dist); 45 46 /// Reads a single byte 47 ubyte get() 48 in (!eoi); 49 50 /// Get a value of type T. 51 /// Is a convenience over readValue. 52 T getValue(T)() 53 { 54 T val = void; 55 readValue(&val); 56 return val; 57 } 58 59 /// Read up to buffer.length bytes into buffer and return what was read. 60 /// Returns a smaller slice only if EOI was reached. 61 ubyte[] read(ubyte[] buffer); 62 63 /// Read T.sizeof data and returns it as a T. 64 /// Similar to get!T but the value is passed as pointer to be filled in. 65 /// Prefer this form for greater values (e.g. dozens of bytes) 66 void readValue(T)(T* val) if (!isDynamicArray!T) 67 { 68 import std.exception : enforce; 69 70 auto ptr = cast(ubyte*) val; 71 auto buf = ptr[0 .. T.sizeof]; 72 auto res = read(buf); 73 enforce(res.length == T.sizeof, "Could not read enough bytes for " ~ T.stringof); 74 } 75 76 T[] read(T)(T[] buffer) 77 { 78 auto ptr = cast(ubyte)&buffer[0]; 79 auto arr = ptr[0 .. buffer.length * T.sizeof]; 80 auto res = read(arr); 81 enforce(res.length % T.sizeof == 0, "Could not read aligned bytes for " ~ T.stringof); 82 if (!res.length) 83 return []; 84 auto resptr = cast(T*)&res[0]; 85 return resptr[0 .. res.length / T.sizeof]; 86 } 87 } 88 89 /// A Cursor whose size is known and that can seek to arbitrary positions, 90 /// including backwards. 91 interface SearchableCursor : Cursor 92 { 93 /// Complete size of the data 94 @property ulong size(); 95 96 /// Seek to a new position (relative to beginning) 97 void seek(ulong pos); 98 } 99 100 /// Range based data input 101 final class ByteRangeCursor(BR) : Cursor if (isByteRange!BR) 102 { 103 private BR _input; 104 private string _name; 105 private ulong _pos; 106 private ByteChunk _chunk; 107 108 this(BR input, string name = "Byte Range") 109 { 110 _input = input; 111 _name = name; 112 113 if (!_input.empty) 114 _chunk = _input.front; 115 } 116 117 @property string name() 118 { 119 return _name; 120 } 121 122 @property ulong pos() 123 { 124 return _pos; 125 } 126 127 @property bool eoi() 128 { 129 return _chunk.length == 0; 130 } 131 132 void ffw(ulong dist) 133 { 134 import std.algorithm : min; 135 136 while (dist > 0 && _chunk.length) 137 { 138 const len = cast(size_t) min(_chunk.length, dist); 139 _chunk = _chunk[len .. $]; 140 _pos += len; 141 dist -= len; 142 143 if (_chunk.length == 0) 144 { 145 _input.popFront(); 146 if (!_input.empty) 147 _chunk = _input.front; 148 else 149 break; 150 } 151 } 152 } 153 154 ubyte get() 155 { 156 const res = _chunk[0]; 157 _chunk = _chunk[1 .. $]; 158 if (_chunk.length == 0) 159 { 160 _input.popFront(); 161 if (!_input.empty) 162 _chunk = _input.front; 163 } 164 return res; 165 } 166 167 ubyte[] read(ubyte[] buffer) 168 { 169 import std.algorithm : min; 170 171 size_t filled; 172 173 while (_chunk.length && filled != buffer.length) 174 { 175 const len = min(_chunk.length, buffer.length - filled); 176 buffer[filled .. filled + len] = _chunk[0 .. len]; 177 178 _pos += len; 179 180 filled += len; 181 _chunk = _chunk[len .. $]; 182 183 if (_chunk.length == 0) 184 { 185 _input.popFront(); 186 if (!_input.empty) 187 _chunk = _input.front; 188 else 189 break; 190 } 191 } 192 return buffer[0 .. filled]; 193 } 194 } 195 196 final class ArrayCursor : SearchableCursor 197 { 198 private const(ubyte)[] _array; 199 private size_t _pos; 200 private string _name; 201 202 this(const(ubyte)[] array, string name = "Byte Array") 203 { 204 _array = array; 205 _name = name; 206 } 207 208 @property string name() 209 { 210 return _name; 211 } 212 213 @property ulong pos() 214 { 215 return _pos; 216 } 217 218 @property bool eoi() 219 { 220 return _pos == _array.length; 221 } 222 223 @property ulong size() 224 { 225 return _array.length; 226 } 227 228 void seek(ulong pos) 229 { 230 import std.algorithm : min; 231 232 _pos = cast(size_t) min(pos, _array.length); 233 } 234 235 void ffw(ulong dist) 236 { 237 seek(pos + dist); 238 } 239 240 ubyte get() 241 { 242 enforce(_pos < _array.length, "No more bytes"); 243 return _array[_pos++]; 244 } 245 246 ubyte[] read(ubyte[] buffer) 247 { 248 import std.algorithm : min; 249 250 const l = min(buffer.length, _array.length - _pos); 251 buffer[0 .. l] = _array[_pos .. _pos + l]; 252 _pos += l; 253 return buffer[0 .. l]; 254 } 255 256 /// Read up to len bytes from the inner array and return what was read. 257 /// Returns an array smaller than len only if EOI was reached. 258 const(ubyte)[] readInner(size_t len) 259 { 260 import std.algorithm : min; 261 262 const l = min(len, _array.length - _pos); 263 const p = _pos; 264 _pos += l; 265 return _array[p .. p + l]; 266 } 267 } 268 269 /// The cursor MUST have exclusive access on the file 270 final class FileCursor : SearchableCursor 271 { 272 import std.stdio : File; 273 274 File _file; 275 ulong _pos; 276 ulong _start; 277 ulong _end; 278 ubyte[] _buffer; 279 280 this(File file, size_t bufferSize = defaultChunkSize, ulong start = 0, ulong end = ulong.max) 281 in (start <= end) 282 { 283 import std.algorithm : min; 284 import std.exception : enforce; 285 import std.stdio : LockType; 286 287 enforce(file.isOpen, "File is not open"); 288 file.lock(LockType.read); 289 _file = file; 290 _start = start; 291 292 const fs = file.size; 293 enforce(fs < ulong.max, "File is not searchable"); 294 _end = min(fs, end); 295 enforce(_start <= _end, "Bounds out of File range"); 296 297 const bufSize = min(bufferSize, _end - _start); 298 if (bufSize > 0) 299 _buffer = new ubyte[bufSize]; 300 } 301 302 @property string name() 303 { 304 return _file.name; 305 } 306 307 @property ulong pos() 308 { 309 return _pos; 310 } 311 312 @property bool eoi() 313 { 314 return _pos == _end; 315 } 316 317 @property ulong size() 318 { 319 return _end - _start; 320 } 321 322 void seek(ulong pos) 323 { 324 _pos = pos; 325 _file.seek(pos); 326 } 327 328 void ffw(ulong dist) 329 { 330 seek(pos + dist); 331 } 332 333 ubyte get() 334 { 335 // for efficiency we use getc 336 import core.stdc.stdio : getc; 337 338 assert(_file.tell == _pos); 339 340 _pos++; 341 enforce(_pos <= _end, "No more bytes"); 342 return cast(ubyte) getc(_file.getFP()); 343 } 344 345 ubyte[] read(ubyte[] buffer) 346 { 347 import std.algorithm : min; 348 349 assert(_file.tell == _pos); 350 351 const len = cast(size_t) min(buffer.length, _end - _pos); 352 auto result = _file.rawRead(buffer[0 .. len]); 353 _pos += result.length; 354 assert(_pos <= _end); 355 return result; 356 } 357 } 358 359 auto cursorByteRange(C)(C cursor, ulong len = ulong.max, size_t chunkSize = defaultChunkSize) 360 if (is(C : Cursor)) 361 { 362 import std.algorithm : min; 363 import std.range : only; 364 365 static if (is(C : ArrayCursor)) 366 { 367 const l = cast(size_t) min(len, cursor._array.length - cursor._pos); 368 return only(cursor._array[cursor._pos .. cursor._pos + l]); 369 } 370 else 371 { 372 return CursorByteRange!C(cursor, len, cast(size_t) min(len, chunkSize)); 373 } 374 } 375 376 /// ByteRange that takes its data from Cursor. 377 /// Optionally stopping before data is exhausted. 378 struct CursorByteRange(C) if (is(C : Cursor)) 379 { 380 private C _input; 381 private ulong _len; 382 private ubyte[] _buffer; 383 private ubyte[] _chunk; 384 385 this(C input, ulong len = ulong.max, size_t chunkSize = 4096) 386 { 387 _input = input; 388 _len = len; 389 _buffer = new ubyte[chunkSize]; 390 if (!_input.eoi) 391 prime(); 392 } 393 394 private void prime() 395 { 396 import std.algorithm : min; 397 398 const len = cast(size_t) min(_buffer.length, _len); 399 if (len == 0) 400 _chunk = null; 401 else 402 _chunk = _input.read(_buffer[0 .. len]); 403 _len -= len; 404 } 405 406 @property bool empty() 407 { 408 return (_input.eoi || _len == 0) && _chunk.length == 0; 409 } 410 411 @property ByteChunk front() 412 { 413 return _chunk; 414 } 415 416 void popFront() 417 { 418 if (!_input.eoi) 419 prime(); 420 else 421 _chunk = null; 422 } 423 } 424 425 // Common algorithm for all compression/decompression functions. 426 // I is a byte input range 427 // P is a stream processor 428 // This common struct is made possible thanks to the great job of the zlib, bzip2 and lzma authors. 429 // Zlib authors have defined an extremly versatile stream interface that bzip2 and lzma authors have reused. 430 struct CompressDecompressAlgo(I, P) 431 { 432 /// Byte input range (by chunks) 433 I input; 434 /// Processed stream (with common interface by zlib, bzip2 and lzma) 435 P.Stream* stream; 436 437 /// Byte chunk from the input, provided to the stream 438 ByteChunk inChunk; 439 440 /// Buffer used to read from stream 441 ubyte[] outBuffer; 442 /// Slice of the buffer that is valid for read out 443 ByteChunk outChunk; 444 445 /// Whether the end of stream was reported by the Policy 446 bool ended; 447 448 this(I input, P.Stream* stream, ubyte[] outBuffer) 449 { 450 this.input = input; 451 this.stream = stream; 452 this.outBuffer = outBuffer; 453 prime(); 454 } 455 456 @property bool empty() 457 { 458 return outChunk.length == 0; 459 } 460 461 @property ByteChunk front() 462 { 463 return outChunk; 464 } 465 466 void popFront() 467 { 468 outChunk = null; 469 if (!ended) 470 prime(); 471 } 472 473 private void prime() 474 { 475 while (outChunk.length < outBuffer.length) 476 { 477 if (inChunk.length == 0 && !input.empty) 478 inChunk = input.front; 479 480 stream.next_in = inChunk.ptr; 481 stream.avail_in = cast(typeof(stream.avail_in)) inChunk.length; 482 483 stream.next_out = outBuffer.ptr + outChunk.length; 484 stream.avail_out = cast(typeof(stream.avail_out))(outBuffer.length - outChunk.length); 485 486 const streamEnded = P.process(stream, input.empty); 487 488 const readIn = inChunk.length - stream.avail_in; 489 inChunk = inChunk[readIn .. $]; 490 491 const outEnd = outBuffer.length - stream.avail_out; 492 outChunk = outBuffer[0 .. outEnd]; 493 494 // popFront must be called at the end because it invalidates inChunk 495 if (inChunk.length == 0 && !input.empty) 496 input.popFront(); 497 498 if (streamEnded) 499 { 500 P.end(stream); 501 ended = true; 502 break; 503 } 504 } 505 } 506 } 507 508 struct LittleEndian(size_t sz) if (sz == 2 || sz == 4 || sz == 8) 509 { 510 static if (sz == 2) 511 { 512 alias T = ushort; 513 } 514 static if (sz == 4) 515 { 516 alias T = uint; 517 } 518 static if (sz == 8) 519 { 520 alias T = ulong; 521 } 522 523 ubyte[sz] data; 524 525 this(T val) pure @safe @nogc nothrow 526 { 527 import std.bitmanip : nativeToLittleEndian; 528 529 data = nativeToLittleEndian(val); 530 } 531 532 @property void val(T val) pure @safe @nogc nothrow 533 { 534 import std.bitmanip : nativeToLittleEndian; 535 536 data = nativeToLittleEndian(val); 537 } 538 539 @property T val() const pure @safe @nogc nothrow 540 { 541 import std.bitmanip : littleEndianToNative; 542 543 return littleEndianToNative!(T, sz)(data); 544 } 545 546 auto opAssign(T value) 547 { 548 val = value; 549 return this; 550 } 551 552 bool opEquals(const T rhs) const 553 { 554 return val == rhs; 555 } 556 557 size_t toHash() const @nogc @safe pure nothrow 558 { 559 return val.hashOf(); 560 } 561 562 int opCmp(const T rhs) const 563 { 564 const lhs = val; 565 if (lhs < rhs) 566 return -1; 567 if (lhs > rhs) 568 return 1; 569 return 0; 570 } 571 } 572 573 static assert((LittleEndian!2).sizeof == 2); 574 static assert((LittleEndian!4).sizeof == 4); 575 static assert((LittleEndian!8).sizeof == 8); 576 577 /// same as std.stdio.File.byChunk but returns const(ubyte)[] 578 struct ByChunkImpl 579 { 580 import std.stdio : File; 581 582 private: 583 File file_; 584 ubyte[] chunk_; 585 586 void prime() 587 { 588 chunk_ = file_.rawRead(chunk_); 589 if (chunk_.length == 0) 590 file_.detach(); 591 } 592 593 public: 594 this(File file, size_t size) 595 { 596 this(file, new ubyte[](size)); 597 } 598 599 this(File file, ubyte[] buffer) 600 { 601 import std.exception : enforce; 602 enforce(buffer.length, "size must be larger than 0"); 603 file_ = file; 604 chunk_ = buffer; 605 prime(); 606 } 607 608 // `ByChunk`'s input range primitive operations. 609 @property nothrow 610 bool empty() const 611 { 612 return !file_.isOpen; 613 } 614 615 /// Ditto 616 @property nothrow 617 const(ubyte)[] front() 618 { 619 version (assert) 620 { 621 import core.exception : RangeError; 622 if (empty) 623 throw new RangeError(); 624 } 625 return chunk_; 626 } 627 628 /// Ditto 629 void popFront() 630 { 631 version (assert) 632 { 633 import core.exception : RangeError; 634 if (empty) 635 throw new RangeError(); 636 } 637 prime(); 638 } 639 }