1 module squiz_box.box;
2 
3 import squiz_box.priv;
4 import squiz_box.squiz;
5 import squiz_box.util;
6 
7 import std.datetime.systime;
8 import std.exception;
9 import std.range;
10 
11 public import squiz_box.box.tar;
12 public import squiz_box.box.zip;
13 
14 /// A dynamic range of BoxEntry
15 alias BoxEntryRange = InputRange!BoxEntry;
16 
17 /// A dynamic range of UnboxEntry
18 alias UnboxEntryRange = InputRange!UnboxEntry;
19 
20 /// A dynamic interface to boxing/unboxing algorithm
21 interface BoxAlgo
22 {
23     /// Box the provided entries and return the associated byte range
24     ByteRange box(BoxEntryRange entries, size_t chunkSize = defaultChunkSize);
25 
26     /// ditto
27     ByteRange box(I)(I entries, size_t chunkSize = defaultChunkSize)
28     if (isBoxEntryRange!I && !is(I == BoxEntryRange))
29     {
30         return box(inputRangeObject(entries), chunkSize);
31     }
32 
33     /// Unbox the given byte range to a range of entries
34     UnboxEntryRange unbox(ByteRange bytes);
35 
36     /// ditto
37     UnboxEntryRange unbox(I)(I bytes)
38     if (isByteRange!I && !is(I == ByteRange))
39     {
40         return unbox(inputRangeObject(bytes));
41     }
42 
43     static BoxAlgo forFilename(string filename)
44     {
45         import std.string : endsWith, toLower;
46         import std.path : baseName;
47 
48         const fn = baseName(filename).toLower();
49 
50         if (fn.endsWith(".tar.xz"))
51         {
52             version (HaveSquizLzma)
53             {
54                 return new TarXzAlgo();
55             }
56             else {
57                 assert(false, "Squiz-Box built without LZMA support");
58             }
59         }
60         else if (fn.endsWith(".tar.gz"))
61         {
62             return new TarGzAlgo();
63         }
64         else if (fn.endsWith(".zip"))
65         {
66             return new ZipAlgo();
67         }
68         else if (fn.endsWith(".tar.bz2"))
69         {
70             version (HaveSquizBzip2)
71             {
72                 return new TarBzip2Algo();
73             }
74             else {
75                 assert(false, "Squiz-Box built without Bzip2 support");
76             }
77         }
78 
79         throw new Exception(fn ~ " has unsupported archive extension");
80     }
81 }
82 
83 /// Static check that a type is an InputRange of BoxEntry
84 template isBoxEntryRange(I)
85 {
86     import std.range : ElementType, isInputRange;
87 
88     enum isBoxEntryRange = isInputRange!I && is(ElementType!I : BoxEntry);
89 }
90 
91 static assert(isBoxEntryRange!(BoxEntry[]));
92 
93 /// Static check that a type is an InputRange of UnboxEntry
94 template isUnboxEntryRange(I)
95 {
96     import std.range : ElementType, isInputRange;
97 
98     enum isUnboxEntryRange = isInputRange!I && is(ElementType!I : UnboxEntry);
99 }
100 
101 static assert(isUnboxEntryRange!(UnboxEntry[]));
102 
103 /// Type of an archive entry
104 enum EntryType
105 {
106     /// Regular file
107     regular,
108     /// Directory
109     directory,
110     /// Symlink
111     symlink,
112 }
113 
114 /// Describe in what archive mode an entry is for.
115 enum EntryMode
116 {
117     /// Entry is used for archive creation
118     creation,
119     /// Entry is used for archive extraction
120     extraction,
121 }
122 
123 /// Common interface to archive entry.
124 /// Each type implementing ArchiveEntry is either for creation or for extraction, but not both.
125 /// Entries for archive creation implement BoxEntry.
126 /// Entries for archive extraction implement ArchiveExtractionEntry.
127 ///
128 /// Instances of BoxEntry are typically instanciated directly by the user or by thin helpers (e.g. FileBoxEntry)
129 /// Instances of UnboxEntry are instantiated by the extraction algorithm and their final type is hidden.
130 interface ArchiveEntry
131 {
132     /// Tell whether the entry is used for creation (BoxEntry)
133     /// or extraction (UnboxEntry)
134     @property EntryMode mode();
135 
136     /// The archive mode this entry is for.
137     /// The path of the entry within the archive.
138     /// Should always be a relative path, and never go backward (..)
139     @property string path();
140 
141     /// The type of entry (directory, file, symlink)
142     @property EntryType type();
143 
144     /// If symlink, this is the path pointed to by the link (relative to the symlink).
145     /// For directories and regular file, returns null.
146     @property string linkname();
147 
148     /// The size of the entry in bytes (returns zero for directories and symlink)
149     /// This is the size of uncompressed, extracted data.
150     @property ulong size();
151 
152     /// The timeLastModified of the entry
153     @property SysTime timeLastModified();
154 
155     /// The file attributes (as returned std.file.getLinkAttributes)
156     @property uint attributes();
157 
158     version (Posix)
159     {
160         /// The owner id of the entry
161         @property int ownerId();
162         /// The group id of the entry
163         @property int groupId();
164     }
165 
166     /// Check if the entry is a potential bomb.
167     /// A bomb is typically an entry that may overwrite other files
168     /// outside of the extraction directory.
169     /// isBomb will return true if the path is an absolute path
170     /// or a relative path going backwards (containing '..' after normalization).
171     /// In addition, a criteria of maximum allowed size can be provided (by default all sizes are accepted).
172     final bool isBomb(ulong allowedSz = ulong.max)
173     {
174         import std.path : buildNormalizedPath, isAbsolute;
175         import std.string : startsWith;
176 
177         if (allowedSz != ulong.max && size > allowedSz)
178             return true;
179 
180         const p = path;
181         return isAbsolute(p) || buildNormalizedPath(p).startsWith("..");
182     }
183 }
184 
185 /// Interface of ArchiveEntry used to create archives
186 interface BoxEntry : ArchiveEntry
187 {
188     /// A byte range to the content of the entry.
189     /// Only relevant for regular files.
190     /// Other types of entry will return an empty range.
191     ByteRange byChunk(size_t chunkSize = defaultChunkSize);
192 
193     /// Helper function that read the complete data of the entry (using byChunk).
194     final ubyte[] readContent()
195     {
196         ubyte[] result = new ubyte[size];
197         size_t offset;
198 
199         foreach (chunk; byChunk())
200         {
201             assert(offset + chunk.length <= result.length);
202             result[offset .. offset + chunk.length] = chunk;
203             offset += chunk.length;
204         }
205 
206         return result;
207     }
208 }
209 
210 /// Interface of ArchiveEntry used for archive extraction
211 interface UnboxEntry : ArchiveEntry
212 {
213     /// The size occupied by the entry in the archive.
214     @property ulong entrySize();
215 
216     /// A byte range to the content of the entry.
217     /// Only relevant for regular files.
218     /// Other types of entry will return an empty range.
219     ByteRange byChunk(size_t chunkSize = defaultChunkSize);
220 
221     /// Helper function that read the complete data of the entry (using byChunk).
222     final ubyte[] readContent()
223     {
224         ubyte[] result = new ubyte[size];
225         size_t offset;
226 
227         foreach (chunk; byChunk())
228         {
229             assert(offset + chunk.length <= result.length);
230             result[offset .. offset + chunk.length] = chunk;
231             offset += chunk.length;
232         }
233 
234         return result;
235     }
236 
237     /// Extract the entry to a file under the given base directory
238     final void extractTo(string baseDirectory)
239     {
240         import std.file : exists, isDir, mkdirRecurse, setAttributes, setTimes;
241         import std.path : buildNormalizedPath, dirName;
242         import std.stdio : File;
243 
244         assert(exists(baseDirectory) && isDir(baseDirectory));
245 
246         enforce(
247             !this.isBomb,
248             "archive bomb detected! Extraction aborted (entry will extract to " ~
249                 this.path ~ " - outside of extraction directory).",
250         );
251 
252         const extractPath = buildNormalizedPath(baseDirectory, this.path);
253 
254         final switch (this.type)
255         {
256         case EntryType.directory:
257             mkdirRecurse(extractPath);
258             break;
259         case EntryType.symlink:
260             mkdirRecurse(dirName(extractPath));
261             version (Posix)
262             {
263                 import core.sys.posix.unistd : lchown;
264                 import std.file : symlink;
265                 import std.string : toStringz;
266 
267                 symlink(this.linkname, extractPath);
268                 lchown(toStringz(extractPath), this.ownerId, this.groupId);
269             }
270             else version (Windows)
271             {
272                 import core.sys.windows.winbase : CreateSymbolicLinkW, SYMBOLIC_LINK_FLAG_DIRECTORY;
273                 import core.sys.windows.windows : DWORD;
274                 import std.utf : toUTF16z;
275 
276                 DWORD flags;
277                 // if not exists (yet - we don't control order of extraction)
278                 // regular file is assumed
279                 if (exists(extractPath) && isDir(extractPath))
280                 {
281                     flags = SYMBOLIC_LINK_FLAG_DIRECTORY;
282                 }
283                 CreateSymbolicLinkW(extractPath.toUTF16z, this.linkname.toUTF16z, flags);
284             }
285             break;
286         case EntryType.regular:
287             mkdirRecurse(dirName(extractPath));
288 
289             writeBinaryFile(this.byChunk(), extractPath);
290 
291             setTimes(extractPath, Clock.currTime, this.timeLastModified);
292 
293             const attrs = this.attributes;
294             if (attrs != 0)
295             {
296                 setAttributes(extractPath, attrs);
297             }
298 
299             version (Posix)
300             {
301                 import core.sys.posix.unistd : chown;
302                 import std.string : toStringz;
303 
304                 chown(toStringz(extractPath), this.ownerId, this.groupId);
305             }
306             break;
307         }
308     }
309 }
310 
311 /// Create a file entry from a file path, relative to a base.
312 /// archiveBase must be a parent path from filename,
313 /// such as the the path of the entry is filename, relative to archiveBase.
314 /// prefix is prepended to the name of the file in the archive.
315 BoxEntry fileEntry(string filename, string archiveBase, string prefix = null)
316 {
317     import std.path : absolutePath, buildNormalizedPath, relativePath;
318     import std.string : startsWith;
319 
320     const fn = buildNormalizedPath(absolutePath(filename));
321     const ab = buildNormalizedPath(absolutePath(archiveBase));
322 
323     enforce(fn.startsWith(ab), "archiveBase is not a parent of filename");
324 
325     auto pathInArchive = relativePath(fn, ab);
326     if (prefix)
327         pathInArchive = prefix ~ pathInArchive;
328 
329     return new FileBoxEntry(filename, pathInArchive);
330 }
331 
332 /// File based implementation of BoxEntry.
333 /// Used to create archives from files in the file system.
334 class FileBoxEntry : BoxEntry
335 {
336     string filePath;
337     string pathInArchive;
338 
339     this(string filePath, string pathInArchive)
340     {
341         import std.algorithm : canFind;
342         import std.file : exists;
343         import std.path : isAbsolute;
344 
345         enforce(exists(filePath), filePath ~ ": No such file or directory");
346         enforce(!isAbsolute(pathInArchive) && !pathInArchive.canFind(".."), "Potential archive bomb");
347 
348         if (!pathInArchive)
349         {
350             pathInArchive = filePath;
351         }
352         this.filePath = filePath;
353         this.pathInArchive = pathInArchive;
354     }
355 
356     @property EntryMode mode()
357     {
358         return EntryMode.creation;
359     }
360 
361     @property string path()
362     {
363         return pathInArchive;
364     }
365 
366     @property EntryType type()
367     {
368         import std.file : isDir, isSymlink;
369 
370         if (isDir(filePath))
371             return EntryType.directory;
372         if (isSymlink(filePath))
373             return EntryType.symlink;
374         return EntryType.regular;
375     }
376 
377     @property string linkname()
378     {
379         version (Posix)
380         {
381             import std.file : isSymlink, readLink;
382 
383             if (isSymlink(filePath))
384                 return readLink(filePath);
385         }
386         return null;
387     }
388 
389     @property ulong size()
390     {
391         import std.file : getSize;
392 
393         return getSize(filePath);
394     }
395 
396     @property SysTime timeLastModified()
397     {
398         import std.file : stdmtime = timeLastModified;
399 
400         return stdmtime(filePath);
401     }
402 
403     @property uint attributes()
404     {
405         import std.file : getAttributes;
406 
407         return getAttributes(filePath);
408     }
409 
410     version (Posix)
411     {
412         import core.sys.posix.sys.stat : stat_t, stat;
413 
414         stat_t statStruct;
415         bool statFetched;
416 
417         private void ensureStat()
418         {
419             import std.string : toStringz;
420 
421             if (!statFetched)
422             {
423                 errnoEnforce(
424                     stat(toStringz(filePath), &statStruct) == 0,
425                     "Could not retrieve file stat of " ~ filePath
426                 );
427                 statFetched = true;
428             }
429         }
430 
431         @property int ownerId()
432         {
433             ensureStat();
434 
435             return statStruct.st_uid;
436         }
437 
438         @property int groupId()
439         {
440             ensureStat();
441 
442             return statStruct.st_gid;
443         }
444     }
445 
446     ByteRange byChunk(size_t chunkSize)
447     {
448         import std.stdio : File;
449 
450         return inputRangeObject(ByChunkImpl(File(filePath, "rb"), chunkSize));
451     }
452 }