1 module squiz_box.box; 2 3 import squiz_box.priv; 4 import squiz_box.squiz; 5 import squiz_box.util; 6 7 import std.datetime.systime; 8 import std.exception; 9 import std.range; 10 11 public import squiz_box.box.tar; 12 public import squiz_box.box.zip; 13 14 /// A dynamic range of BoxEntry 15 alias BoxEntryRange = InputRange!BoxEntry; 16 17 /// A dynamic range of UnboxEntry 18 alias UnboxEntryRange = InputRange!UnboxEntry; 19 20 /// A dynamic interface to boxing/unboxing algorithm 21 interface BoxAlgo 22 { 23 /// Box the provided entries and return the associated byte range 24 ByteRange box(BoxEntryRange entries, size_t chunkSize = defaultChunkSize); 25 26 /// ditto 27 ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize) 28 if (isBoxEntryRange!I && !is(I == BoxEntryRange)) 29 { 30 return box(inputRangeObject(entries), chunkSize); 31 } 32 33 /// Unbox the given byte range to a range of entries 34 UnboxEntryRange unbox(ByteRange bytes); 35 36 /// ditto 37 UnboxEntryRange unbox(I)(I bytes) 38 if (isByteRange!I && !is(I == ByteRange)) 39 { 40 return unbox(inputRangeObject(bytes)); 41 } 42 43 static BoxAlgo forFilename(string filename) 44 { 45 import std.string : endsWith, toLower; 46 import std.path : baseName; 47 48 const fn = baseName(filename).toLower(); 49 50 if (fn.endsWith(".tar.xz")) 51 { 52 version (HaveSquizLzma) 53 { 54 return new TarXzAlgo(); 55 } 56 else { 57 assert(false, "Squiz-Box built without LZMA support"); 58 } 59 } 60 else if (fn.endsWith(".tar.gz")) 61 { 62 return new TarGzAlgo(); 63 } 64 else if (fn.endsWith(".zip")) 65 { 66 return new ZipAlgo(); 67 } 68 else if (fn.endsWith(".tar.bz2")) 69 { 70 version (HaveSquizBzip2) 71 { 72 return new TarBzip2Algo(); 73 } 74 else { 75 assert(false, "Squiz-Box built without Bzip2 support"); 76 } 77 } 78 79 throw new Exception(fn ~ " has unsupported archive extension"); 80 } 81 } 82 83 /// Static check that a type is an InputRange of BoxEntry 84 template isBoxEntryRange(I) 85 { 86 import std.range : ElementType, isInputRange; 87 88 enum isBoxEntryRange = isInputRange!I && is(ElementType!I : BoxEntry); 89 } 90 91 static assert(isBoxEntryRange!(BoxEntry[])); 92 93 /// Static check that a type is an InputRange of UnboxEntry 94 template isUnboxEntryRange(I) 95 { 96 import std.range : ElementType, isInputRange; 97 98 enum isUnboxEntryRange = isInputRange!I && is(ElementType!I : UnboxEntry); 99 } 100 101 static assert(isUnboxEntryRange!(UnboxEntry[])); 102 103 /// Type of an archive entry 104 enum EntryType 105 { 106 /// Regular file 107 regular, 108 /// Directory 109 directory, 110 /// Symlink 111 symlink, 112 } 113 114 /// Describe in what archive mode an entry is for. 115 enum EntryMode 116 { 117 /// Entry is used for archive creation 118 creation, 119 /// Entry is used for archive extraction 120 extraction, 121 } 122 123 /// Common interface to archive entry. 124 /// Each type implementing ArchiveEntry is either for creation or for extraction, but not both. 125 /// Entries for archive creation implement BoxEntry. 126 /// Entries for archive extraction implement ArchiveExtractionEntry. 127 /// 128 /// Instances of BoxEntry are typically instanciated directly by the user or by thin helpers (e.g. FileBoxEntry) 129 /// Instances of UnboxEntry are instantiated by the extraction algorithm and their final type is hidden. 130 interface ArchiveEntry 131 { 132 /// Tell whether the entry is used for creation (BoxEntry) 133 /// or extraction (UnboxEntry) 134 @property EntryMode mode(); 135 136 /// The archive mode this entry is for. 137 /// The path of the entry within the archive. 138 /// Should always be a relative path, and never go backward (..) 139 @property string path(); 140 141 /// The type of entry (directory, file, symlink) 142 @property EntryType type(); 143 144 /// If symlink, this is the path pointed to by the link (relative to the symlink). 145 /// For directories and regular file, returns null. 146 @property string linkname(); 147 148 /// The size of the entry in bytes (returns zero for directories and symlink) 149 /// This is the size of uncompressed, extracted data. 150 @property ulong size(); 151 152 /// The timeLastModified of the entry 153 @property SysTime timeLastModified(); 154 155 /// The file attributes (as returned std.file.getLinkAttributes) 156 @property uint attributes(); 157 158 version (Posix) 159 { 160 /// The owner id of the entry 161 @property int ownerId(); 162 /// The group id of the entry 163 @property int groupId(); 164 } 165 166 /// Check if the entry is a potential bomb. 167 /// A bomb is typically an entry that may overwrite other files 168 /// outside of the extraction directory. 169 /// isBomb will return true if the path is an absolute path 170 /// or a relative path going backwards (containing '..' after normalization). 171 /// In addition, a criteria of maximum allowed size can be provided (by default all sizes are accepted). 172 final bool isBomb(ulong allowedSz = ulong.max) 173 { 174 import std.path : buildNormalizedPath, isAbsolute; 175 import std.string : startsWith; 176 177 if (allowedSz != ulong.max && size > allowedSz) 178 return true; 179 180 const p = path; 181 return isAbsolute(p) || buildNormalizedPath(p).startsWith(".."); 182 } 183 } 184 185 /// Interface of ArchiveEntry used to create archives 186 interface BoxEntry : ArchiveEntry 187 { 188 /// A byte range to the content of the entry. 189 /// Only relevant for regular files. 190 /// Other types of entry will return an empty range. 191 ByteRange byChunk(size_t chunkSize = defaultChunkSize); 192 193 /// Helper function that read the complete data of the entry (using byChunk). 194 final ubyte[] readContent() 195 { 196 ubyte[] result = new ubyte[size]; 197 size_t offset; 198 199 foreach (chunk; byChunk()) 200 { 201 assert(offset + chunk.length <= result.length); 202 result[offset .. offset + chunk.length] = chunk; 203 offset += chunk.length; 204 } 205 206 return result; 207 } 208 } 209 210 /// Interface of ArchiveEntry used for archive extraction 211 interface UnboxEntry : ArchiveEntry 212 { 213 /// The size occupied by the entry in the archive. 214 @property ulong entrySize(); 215 216 /// A byte range to the content of the entry. 217 /// Only relevant for regular files. 218 /// Other types of entry will return an empty range. 219 ByteRange byChunk(size_t chunkSize = defaultChunkSize); 220 221 /// Helper function that read the complete data of the entry (using byChunk). 222 final ubyte[] readContent() 223 { 224 ubyte[] result = new ubyte[size]; 225 size_t offset; 226 227 foreach (chunk; byChunk()) 228 { 229 assert(offset + chunk.length <= result.length); 230 result[offset .. offset + chunk.length] = chunk; 231 offset += chunk.length; 232 } 233 234 return result; 235 } 236 237 /// Extract the entry to a file under the given base directory 238 final void extractTo(string baseDirectory) 239 { 240 import std.file : exists, isDir, mkdirRecurse, setAttributes, setTimes; 241 import std.path : buildNormalizedPath, dirName; 242 import std.stdio : File; 243 244 assert(exists(baseDirectory) && isDir(baseDirectory)); 245 246 enforce( 247 !this.isBomb, 248 "archive bomb detected! Extraction aborted (entry will extract to " ~ 249 this.path ~ " - outside of extraction directory).", 250 ); 251 252 const extractPath = buildNormalizedPath(baseDirectory, this.path); 253 254 final switch (this.type) 255 { 256 case EntryType.directory: 257 mkdirRecurse(extractPath); 258 break; 259 case EntryType.symlink: 260 mkdirRecurse(dirName(extractPath)); 261 version (Posix) 262 { 263 import core.sys.posix.unistd : lchown; 264 import std.file : symlink; 265 import std.string : toStringz; 266 267 symlink(this.linkname, extractPath); 268 lchown(toStringz(extractPath), this.ownerId, this.groupId); 269 } 270 else version (Windows) 271 { 272 import core.sys.windows.winbase : CreateSymbolicLinkW, SYMBOLIC_LINK_FLAG_DIRECTORY; 273 import core.sys.windows.windows : DWORD; 274 import std.utf : toUTF16z; 275 276 DWORD flags; 277 // if not exists (yet - we don't control order of extraction) 278 // regular file is assumed 279 if (exists(extractPath) && isDir(extractPath)) 280 { 281 flags = SYMBOLIC_LINK_FLAG_DIRECTORY; 282 } 283 CreateSymbolicLinkW(extractPath.toUTF16z, this.linkname.toUTF16z, flags); 284 } 285 break; 286 case EntryType.regular: 287 mkdirRecurse(dirName(extractPath)); 288 289 writeBinaryFile(this.byChunk(), extractPath); 290 291 setTimes(extractPath, Clock.currTime, this.timeLastModified); 292 293 const attrs = this.attributes; 294 if (attrs != 0) 295 { 296 setAttributes(extractPath, attrs); 297 } 298 299 version (Posix) 300 { 301 import core.sys.posix.unistd : chown; 302 import std.string : toStringz; 303 304 chown(toStringz(extractPath), this.ownerId, this.groupId); 305 } 306 break; 307 } 308 } 309 } 310 311 /// Create a file entry from a file path, relative to a base. 312 /// archiveBase must be a parent path from filename, 313 /// such as the the path of the entry is filename, relative to archiveBase. 314 /// prefix is prepended to the name of the file in the archive. 315 BoxEntry fileEntry(string filename, string archiveBase, string prefix = null) 316 { 317 import std.path : absolutePath, buildNormalizedPath, relativePath; 318 import std.string : startsWith; 319 320 const fn = buildNormalizedPath(absolutePath(filename)); 321 const ab = buildNormalizedPath(absolutePath(archiveBase)); 322 323 enforce(fn.startsWith(ab), "archiveBase is not a parent of filename"); 324 325 auto pathInArchive = relativePath(fn, ab); 326 if (prefix) 327 pathInArchive = prefix ~ pathInArchive; 328 329 return new FileBoxEntry(filename, pathInArchive); 330 } 331 332 /// File based implementation of BoxEntry. 333 /// Used to create archives from files in the file system. 334 class FileBoxEntry : BoxEntry 335 { 336 string filePath; 337 string pathInArchive; 338 339 this(string filePath, string pathInArchive) 340 { 341 import std.algorithm : canFind; 342 import std.file : exists; 343 import std.path : isAbsolute; 344 345 enforce(exists(filePath), filePath ~ ": No such file or directory"); 346 enforce(!isAbsolute(pathInArchive) && !pathInArchive.canFind(".."), "Potential archive bomb"); 347 348 if (!pathInArchive) 349 { 350 pathInArchive = filePath; 351 } 352 this.filePath = filePath; 353 this.pathInArchive = pathInArchive; 354 } 355 356 @property EntryMode mode() 357 { 358 return EntryMode.creation; 359 } 360 361 @property string path() 362 { 363 return pathInArchive; 364 } 365 366 @property EntryType type() 367 { 368 import std.file : isDir, isSymlink; 369 370 if (isDir(filePath)) 371 return EntryType.directory; 372 if (isSymlink(filePath)) 373 return EntryType.symlink; 374 return EntryType.regular; 375 } 376 377 @property string linkname() 378 { 379 version (Posix) 380 { 381 import std.file : isSymlink, readLink; 382 383 if (isSymlink(filePath)) 384 return readLink(filePath); 385 } 386 return null; 387 } 388 389 @property ulong size() 390 { 391 import std.file : getSize; 392 393 return getSize(filePath); 394 } 395 396 @property SysTime timeLastModified() 397 { 398 import std.file : stdmtime = timeLastModified; 399 400 return stdmtime(filePath); 401 } 402 403 @property uint attributes() 404 { 405 import std.file : getAttributes; 406 407 return getAttributes(filePath); 408 } 409 410 version (Posix) 411 { 412 import core.sys.posix.sys.stat : stat_t, stat; 413 414 stat_t statStruct; 415 bool statFetched; 416 417 private void ensureStat() 418 { 419 import std.string : toStringz; 420 421 if (!statFetched) 422 { 423 errnoEnforce( 424 stat(toStringz(filePath), &statStruct) == 0, 425 "Could not retrieve file stat of " ~ filePath 426 ); 427 statFetched = true; 428 } 429 } 430 431 @property int ownerId() 432 { 433 ensureStat(); 434 435 return statStruct.st_uid; 436 } 437 438 @property int groupId() 439 { 440 ensureStat(); 441 442 return statStruct.st_gid; 443 } 444 } 445 446 ByteRange byChunk(size_t chunkSize) 447 { 448 import std.stdio : File; 449 450 return inputRangeObject(ByChunkImpl(File(filePath, "rb"), chunkSize)); 451 } 452 }