1 module squiz_box.box.tar; 2 3 import squiz_box.box; 4 import squiz_box.priv; 5 import squiz_box.squiz; 6 7 import std.datetime.systime; 8 import std.exception; 9 import std.path; 10 import std.range.primitives; 11 12 /// Returns a Tar archive as a byte range 13 /// corresponding to the entries in input. 14 /// chunkSize must be a multiple of 512. 15 auto createTarArchive(I)(I entries, size_t chunkSize = defaultChunkSize) 16 if (isCreateEntryRange!I) 17 in (chunkSize >= 512 && chunkSize % 512 == 0) 18 { 19 return TarArchiveCreate!I(entries, chunkSize); 20 } 21 22 private struct TarArchiveCreate(I) 23 { 24 // init data 25 I entriesInput; 26 ubyte[] buffer; 27 28 // current chunk (front data) 29 ubyte[] chunk; // data ready 30 ubyte[] avail; // space available in buffer (after chunk) 31 32 // current entry being processed 33 ArchiveCreateEntry entry; 34 ByteRange entryChunks; 35 36 // footer is two empty blocks 37 size_t footer; 38 enum footerLen = 1024; 39 40 this(I entries, size_t chunkSize) 41 { 42 enforce(chunkSize % 512 == 0, "chunk size must be a multiple of 512"); 43 entriesInput = entries; 44 buffer = new ubyte[chunkSize]; 45 avail = buffer; 46 popFront(); 47 } 48 49 @property bool empty() 50 { 51 // handle .init 52 if (!buffer) 53 return true; 54 55 // more files to be processed 56 if (!entriesInput.empty) 57 return false; 58 59 // current entry not exhausted 60 if (hasEntryChunks()) 61 return false; 62 63 // some unconsumed flying data 64 if (chunk.length) 65 return false; 66 67 return true; 68 } 69 70 @property ByteChunk front() 71 { 72 return chunk; 73 } 74 75 void popFront() 76 { 77 if (!moreToRead()) 78 { 79 if (footer >= footerLen) 80 { 81 chunk = null; 82 } 83 else 84 { 85 import std.algorithm : min; 86 87 const len = min(buffer.length, footerLen - footer); 88 buffer[0 .. len] = 0; 89 chunk = buffer[0 .. len]; 90 footer += len; 91 } 92 return; 93 } 94 95 while (avail.length && moreToRead) 96 { 97 nextBlock(); 98 chunk = buffer[0 .. $ - avail.length]; 99 } 100 avail = buffer; 101 } 102 103 private bool hasEntryChunks() 104 { 105 return entryChunks && !entryChunks.empty; 106 } 107 108 private bool moreToRead() 109 { 110 return !entriesInput.empty || hasEntryChunks(); 111 } 112 113 private void nextBlock() 114 in (avail.length >= 512) 115 { 116 if (!entry || !hasEntryChunks()) 117 { 118 enforce(!entriesInput.empty); 119 entry = entriesInput.front; 120 entriesInput.popFront(); 121 avail = TarHeader.fillWith(entry, avail); 122 entryChunks = entry.byChunk(512); 123 } 124 else 125 { 126 auto filled = entryChunks.front; 127 avail[0 .. filled.length] = filled; 128 avail = avail[filled.length .. $]; 129 entryChunks.popFront(); 130 if (entryChunks.empty) 131 { 132 const pad = avail.length % 512; 133 avail[0 .. pad] = 0; 134 avail = avail[pad .. $]; 135 } 136 } 137 } 138 } 139 140 static assert(isByteRange!(TarArchiveCreate!(ArchiveCreateEntry[]))); 141 142 /// Return a range of entries from a Tar formatted byte range 143 auto readTarArchive(I)(I tarInput) if (isByteRange!I) 144 { 145 auto dataInput = new ByteRangeCursor!I(tarInput); 146 return ArchiveTarRead(dataInput); 147 } 148 149 private struct ArchiveTarRead 150 { 151 private Cursor _input; 152 153 // current header data 154 private size_t _next; 155 private ubyte[] _block; 156 private ArchiveExtractEntry _entry; 157 158 this(Cursor input) 159 { 160 _input = input; 161 _block = new ubyte[512]; 162 163 // file with zero bytes is a valid tar file 164 if (!_input.eoi) 165 readHeaderBlock(); 166 } 167 168 @property bool empty() 169 { 170 return _input.eoi; 171 } 172 173 @property ArchiveExtractEntry front() 174 { 175 return _entry; 176 } 177 178 void popFront() 179 { 180 assert(_input.pos <= _next); 181 182 if (_input.pos < _next) 183 { 184 // the current entry was not fully read, we move the stream forward 185 // up to the next header 186 const dist = _next - _input.pos; 187 _input.ffw(dist); 188 } 189 readHeaderBlock(); 190 } 191 192 private void readHeaderBlock() 193 { 194 import std.conv : to; 195 196 enforce(_input.read(_block).length == 512, "Unexpected end of input"); 197 198 TarHeader* th = cast(TarHeader*) _block.ptr; 199 200 const computed = th.unsignedChecksum(); 201 const checksum = parseOctalString(th.chksum); 202 203 if (computed == 256 && checksum == 0) 204 { 205 // this is an empty header (only zeros) 206 // indicates end of archive 207 208 while (!_input.eoi) 209 { 210 _input.ffw(512); 211 } 212 return; 213 } 214 215 enforce( 216 checksum == computed, 217 "Invalid TAR checksum at 0x" ~ ( 218 _input.pos - 512 + th.chksum.offsetof) 219 .to!string(16) ~ 220 "\nExpected " ~ computed.to!string ~ " but found " ~ checksum.to!string, 221 ); 222 223 TarEntryInfo info; 224 info.path = (parseString(th.prefix) ~ parseString(th.name)).idup; 225 info.type = toEntryType(th.typeflag); 226 info.linkname = parseString(th.linkname).idup; 227 info.size = parseOctalString!size_t(th.size); 228 info.entrySize = 512 + next512(info.size); 229 info.timeLastModified = SysTime(unixTimeToStdTime(parseOctalString!ulong(th.mtime))); 230 version (Posix) 231 { 232 // tar mode contains stat.st_mode & 07777. 233 // we have to add the missing flags corresponding to file type 234 // (and by no way tar mode is meaningful on Windows) 235 const filetype = posixModeFileType(th.typeflag); 236 info.attributes = parseOctalString(th.mode) | filetype; 237 info.ownerId = parseOctalString(th.uid); 238 info.groupId = parseOctalString(th.gid); 239 } 240 241 _entry = new ArchiveTarExtractEntry(_input, info); 242 243 _next = next512(_input.pos + info.size); 244 } 245 } 246 247 static assert(isExtractEntryRange!ArchiveTarRead); 248 249 struct TarEntryInfo 250 { 251 string path; 252 string linkname; 253 EntryType type; 254 ulong size; 255 ulong entrySize; 256 SysTime timeLastModified; 257 uint attributes; 258 259 version (Posix) 260 { 261 int ownerId; 262 int groupId; 263 } 264 } 265 266 private class ArchiveTarExtractEntry : ArchiveExtractEntry 267 { 268 import std.stdio : File; 269 270 private Cursor _input; 271 private size_t _start; 272 private size_t _end; 273 private TarEntryInfo _info; 274 275 this(Cursor input, TarEntryInfo info) 276 { 277 _input = input; 278 _start = input.pos; 279 _end = _start + info.size; 280 _info = info; 281 } 282 283 @property EntryMode mode() 284 { 285 return EntryMode.extraction; 286 } 287 288 @property string path() 289 { 290 return _info.path; 291 } 292 293 @property EntryType type() 294 { 295 return _info.type; 296 } 297 298 @property string linkname() 299 { 300 return _info.linkname; 301 } 302 303 @property size_t size() 304 { 305 return _info.size; 306 } 307 308 @property size_t entrySize() 309 { 310 return _info.entrySize; 311 } 312 313 @property SysTime timeLastModified() 314 { 315 return _info.timeLastModified; 316 } 317 318 @property uint attributes() 319 { 320 return _info.attributes; 321 } 322 323 version (Posix) 324 { 325 @property int ownerId() 326 { 327 return _info.ownerId; 328 } 329 330 @property int groupId() 331 { 332 return _info.groupId; 333 } 334 } 335 336 ByteRange byChunk(size_t chunkSize) 337 { 338 import std.range.interfaces : inputRangeObject; 339 340 enforce( 341 _input.pos == _start, 342 "Data cursor has moved, this entry is not valid anymore" 343 ); 344 return inputRangeObject(cursorByteRange(_input, _end - _input.pos, chunkSize)); 345 } 346 } 347 348 private struct TarHeader 349 { 350 // dfmt off 351 char [100] name; // 0 0 352 char [8] mode; // 100 64 353 char [8] uid; // 108 6C 354 char [8] gid; // 116 74 355 char [12] size; // 124 7C 356 char [12] mtime; // 136 88 357 char [8] chksum; // 148 94 358 Typeflag typeflag; // 156 9C 359 char [100] linkname; // 157 9D 360 char [6] magic; // 257 101 361 char [2] version_; // 263 107 362 char [32] uname; // 265 109 363 char [32] gname; // 297 129 364 char [8] devmajor; // 329 149 365 char [8] devminor; // 337 151 366 char [155] prefix; // 345 159 367 char [12] padding; // 500 1F4 368 //dfmt on 369 370 private static ubyte[] fillWith(ArchiveEntry file, ubyte[] block) 371 in (block.length >= 512) 372 { 373 import std.algorithm : min; 374 import std.string : toStringz; 375 376 version (Posix) 377 { 378 char[512] buf; 379 } 380 381 block[0 .. 512] = 0; 382 383 TarHeader* th = cast(TarHeader*)(&block[0]); 384 385 // prefix and name 386 const name = file.path; 387 const prefLen = name.length > 100 ? cast(ptrdiff_t) name.length - 100 : 0; 388 if (prefLen) 389 th.prefix[0 .. prefLen] = name[0 .. prefLen]; 390 th.name[0 .. name.length - prefLen] = name[prefLen .. $]; 391 392 th.typeflag = toTypeflag(file.type); 393 394 if (th.typeflag == Typeflag.symLink) 395 { 396 const lname = file.linkname; 397 const len = min(lname.length, cast(ptrdiff_t) th.linkname.length - 1); 398 th.linkname[0 .. len] = lname[0 .. len]; 399 } 400 401 version (Posix) 402 { 403 import core.sys.posix.grp; 404 import core.sys.posix.pwd; 405 import core.stdc.string : strlen; 406 import std.conv : octal; 407 408 const uid = file.ownerId; 409 const gid = file.groupId; 410 411 toOctalString(file.attributes & octal!7777, th.mode[0 .. $ - 1]); 412 toOctalString(uid, th.uid[0 .. $ - 1]); 413 toOctalString(gid, th.gid[0 .. $ - 1]); 414 415 if (uid != 0) 416 { 417 passwd pwdbuf; 418 passwd* pwd; 419 enforce(getpwuid_r(uid, &pwdbuf, buf.ptr, buf.length, &pwd) == 0, "Could not read user name"); 420 const urlen = min(strlen(pwd.pw_name), th.uname.length); 421 th.uname[0 .. urlen] = pwd.pw_name[0 .. urlen]; 422 } 423 424 if (gid != 0) 425 { 426 group grpbuf; 427 group* grp; 428 enforce(getgrgid_r(gid, &grpbuf, buf.ptr, buf.length, &grp) == 0, "Could not read group name"); 429 const grlen = min(strlen(grp.gr_name), th.gname.length); 430 th.gname[0 .. grlen] = grp.gr_name[0 .. grlen]; 431 } 432 } 433 else version (Windows) 434 { 435 // default to mode 644 which is the most common on UNIX 436 th.mode[0 .. 7] = "0000644"; 437 438 // TODO: https://docs.microsoft.com/fr-fr/windows/win32/secauthz/finding-the-owner-of-a-file-object-in-c-- 439 } 440 441 toOctalString(file.size, th.size[0 .. $ - 1]); 442 const mtime = file.timeLastModified().toUnixTime!long(); 443 toOctalString(mtime, th.mtime[0 .. $ - 1]); 444 445 th.magic = "ustar\0"; 446 th.version_ = "00"; 447 448 const chksum = th.unsignedChecksum(); 449 450 toOctalString(chksum, th.chksum[0 .. $ - 1]); 451 452 return block[512 .. $]; 453 } 454 455 private uint unsignedChecksum() 456 { 457 uint sum = 0; 458 sum += unsignedSum(name); 459 sum += unsignedSum(mode); 460 sum += unsignedSum(uid); 461 sum += unsignedSum(gid); 462 sum += unsignedSum(size); 463 sum += unsignedSum(mtime); 464 sum += 32 * 8; 465 sum += cast(uint) typeflag; 466 sum += unsignedSum(linkname); 467 sum += unsignedSum(magic); 468 sum += unsignedSum(version_); 469 sum += unsignedSum(uname); 470 sum += unsignedSum(gname); 471 sum += unsignedSum(devmajor); 472 sum += unsignedSum(devminor); 473 sum += unsignedSum(prefix); 474 return sum; 475 } 476 } 477 478 static assert(TarHeader.sizeof == 512); 479 480 private enum Typeflag : ubyte 481 { 482 normalNul = 0, 483 normal = '0', 484 hardLink = '1', 485 symLink = '2', 486 charSpecial = '3', 487 blockSpecial = '4', 488 directory = '5', 489 fifo = '6', 490 contiguousFile = '7', 491 } 492 493 Typeflag toTypeflag(EntryType type) 494 { 495 final switch (type) 496 { 497 case EntryType.regular: 498 return Typeflag.normal; 499 case EntryType.directory: 500 return Typeflag.directory; 501 case EntryType.symlink: 502 return Typeflag.symLink; 503 } 504 } 505 506 EntryType toEntryType(Typeflag flag) 507 { 508 switch (flag) 509 { 510 case Typeflag.directory: 511 return EntryType.directory; 512 case Typeflag.symLink: 513 return EntryType.symlink; 514 default: 515 return EntryType.regular; 516 } 517 } 518 519 version (Posix) 520 { 521 // stat.st_mode part corresponding to file type 522 uint posixModeFileType(Typeflag flag) 523 { 524 import std.conv : octal; 525 526 final switch (flag) 527 { 528 case Typeflag.normalNul: 529 case Typeflag.normal: 530 return octal!100_000; 531 case Typeflag.hardLink: 532 // is regular file right for hard links? 533 return octal!100_000; 534 case Typeflag.symLink: 535 return octal!120_000; 536 case Typeflag.charSpecial: 537 return octal!20_000; 538 case Typeflag.blockSpecial: 539 return octal!60_000; 540 case Typeflag.directory: 541 return octal!40_000; 542 case Typeflag.fifo: 543 return octal!10_000; 544 case Typeflag.contiguousFile: 545 // is regular file right for contiguous files? 546 return octal!100_000; 547 } 548 } 549 } 550 551 private uint unsignedSum(const(char)[] buf) 552 { 553 uint sum; 554 foreach (ubyte b; cast(const(ubyte)[]) buf) 555 { 556 sum += cast(uint) b; 557 } 558 return sum; 559 } 560 561 private void toOctalString(T)(T val, char[] buf) 562 { 563 import std.format : sformat; 564 565 sformat(buf, "%0*o", buf.length, val); 566 } 567 568 private T parseOctalString(T = uint)(const(char)[] octal) 569 { 570 import std.algorithm : countUntil; 571 import std.conv : parse; 572 import std.range : retro; 573 574 size_t nuls = retro(octal).countUntil!(c => c != '\0'); 575 576 if (nuls == octal.length || nuls == -1) 577 return 0; 578 579 auto src = octal[0 .. $ - nuls]; 580 581 return parse!(T)(src, 8); 582 } 583 584 private char[] parseString(char[] chars) 585 { 586 import core.stdc.string : strlen; 587 588 const len = strlen(chars.ptr); 589 return chars[0 .. len]; 590 } 591 592 private size_t next512(size_t off) 593 { 594 const rem = off % 512; 595 if (rem == 0) 596 return off; 597 return off + 512 - rem; 598 } 599 600 @("next512") 601 unittest 602 { 603 assert(next512(0) == 0); 604 assert(next512(1) == 512); 605 assert(next512(300) == 512); 606 assert(next512(511) == 512); 607 assert(next512(512) == 512); 608 assert(next512(1024) == 1024); 609 assert(next512(1025) == 1536); 610 assert(next512(1225) == 1536); 611 assert(next512(1535) == 1536); 612 assert(next512(1536) == 1536); 613 }