**** WARNING **** PHP has an internal 2GB file size limitation for fseek()! Use the Ruby version below for images larger than 2GB!
Originally built to copy a block device (a mac mini in firewire mass storage mode) to an image file, excluding all-null blocks (sparse file) to save space.
Also contains examples of thousands-separator number formatting (i.e. 10,000,000) and human-readable magnitude number formatting (i.e. 10.0GB) functions.
#!/usr/bin/env php <?php declare(ticks=1); bcscale(3); // Read a file and process it in chunks. $infile = "/dev/sde"; $outfile = "/media/recover/MacMini-whole-sparse.img"; $blksz = 4096; $blkct = 19537686; $blkct_b = bcmul($blkct, $blksz, 0); function ksep($number) { $number = (string) $number; $max = strlen($number); $recip = $max; for ($i = 0; $i < $max; ++$i) { --$recip; if ($i && !($i % 3)) $out = "," . $out; $out = $number{$recip} . $out; } return $out; } function prettyGigs($bytes) { $powers = " kMGTPEZY"; $power = 0; $mod = 1; while (bcdiv($bytes, $mod) > 1024) { $mod = bcmul($mod, 1024); ++$power; } $num = bcdiv($bytes, $mod); switch (((int) $num != $num)? strpos($num, "."): 0) { case "0": $fmt = "%0.0f%s"; break; case "1": $fmt = "%0.3f%s"; break; case "2": $fmt = "%0.2f%s"; break; case "3": default: $fmt = "%0.1f%s"; break; } if ($power) $power = $powers{$power}."B"; else $power = "B"; return sprintf($fmt, $num, $power); } function stats() { pcntl_alarm(1); $blksz = $GLOBALS['blksz']; $blkct = $GLOBALS['blkct']; $blkct_b = $GLOBALS['blkct_b']; $data = $GLOBALS['data']; $data_b = bcmul($data, $blksz, 0); $sparse = $GLOBALS['sparse']; $sparse_b = bcmul($sparse, $blksz, 0); $count = bcadd($GLOBALS['data'], $GLOBALS['sparse'], 0); $count_b = bcmul($count, $blksz, 0); $pct = bcmul(bcdiv($count, $GLOBALS['blkct'], 10), 100.0); $speed = bcmul(bcsub($count, $GLOBALS['old_count']), $blksz); printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.4f %s/sec ", ksep($data), prettyGigs($data_b), ksep($sparse), prettyGigs($sparse_b), ksep($count), prettyGigs($count_b), ksep($blkct), prettyGigs($blkct_b), $pct, prettyGigs($speed)); /* printf("\rBlk: %s Spa: %s Sum: %s Tot: %s Pct: %0.4f ", thousands($GLOBALS['data']), thousands($GLOBALS['sparse']), thousands($count), thousands($GLOBALS['blkct']), $pct); */ $GLOBALS['old_count'] = $count; } pcntl_signal(SIGALRM, "stats", true); pcntl_alarm(1); echo "\n\nPrecaching..."; /* No longer required // Precalculate crc $bufr = str_repeat("\0", 4096); $zero = crc32($bufr); */ $input = fopen($infile, "rb"); $output = fopen($outfile, "wb"); if (!is_resource($input)) die(sprintf("%s failed!\n", $infile)); if (!is_resource($output)) die(sprintf("%s failed!\n", $outfile)); $data = 0; $sparse = 0; while (!feof($input)) { $bufr = fread($input, $blksz); $chars = count_chars($bufr, 3); if ($chars === "\0") { // Nul block. Skip it. fseek($output, ftell($output) + $blksz); $sparse++; continue; } /* Counting chars takes roughly half the time as computing the crc32. if (crc32($bufr) == $zero) { // Matching block? compare by bytes $chars = count_chars($bufr, 3); if ($chars === "\0") { // Null block. Skip it. fseek($output, ftell($output) + $blksz); continue; } } */ fwrite($output, $bufr, $blksz); $data++; } fclose($input); fclose($output); echo "\n\n"; ?>
Ruby version:
#!/usr/bin/env ruby # Fucking kludge. require 'dl/import' module Alarm extend DL::Importable dlload "libc.so.6" extern "unsigned int alarm(unsigned int)" end # $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately! # Variables: # * global variables start with $ # * instance variables start with @ # * local variables start with _ or a lowercase # * constants start with an uppercase. # Chr to ord: "x"[0] <- Access by index. # Ord to chr: 120.chr <- Access chr method # In for, start..end is start through end, while start...end is start up to but not including end. # $data is, apparently, a reserved word. $blksz = 4096; $blkct = 19537686; $blkct_b = $blkct * $blksz; src_file = "/dev/sde"; out_file = "/media/recover/MacMini-whole-sparse.img"; src = File.new(src_file); out = File.new(out_file, File::CREAT|File::TRUNC|File::RDWR, 0600); #$old_sum_b = 0; def kSep(kSep_num) kSep_num = kSep_num.to_s; max = kSep_num.length; recip = max; out = ""; for cur in 0...max recip-= 1; if (cur != 0 && (cur % 3) == 0) out = "," << out; end out = kSep_num[recip].chr << out; end return out; end def pMag(bytes) bytes = bytes.to_f; powers = " kMGTPEZY"; power = 0; fmt = ""; out = ""; mod = 1; while ((bytes / mod) > 1024) mod = mod * 1024; power+= 1; end num = bytes / mod; case (num.to_s.index(".")) when nil then fmt = "%0.0f%s"; when 0 then fmt = "%0.0f%s"; when 1 then fmt = "%0.3f%s"; when 2 then fmt = "%0.2f%s"; when 3 then fmt = "%0.1f%s"; else fmt = "$0.0f%s"; end out << sprintf(fmt, num, powers[power].chr); return out; end trap("ALRM") do Alarm.alarm(1); # Format and display stats all pretty-like isdata_b = $isdata * $blksz; issparse_b = $issparse * $blksz; sum = $isdata + $issparse; sum_b = sum * $blksz; pct = (sum.to_f / $blkct) * 100; perf = sum_b - (@old_sum_b? @old_sum_b: 0); $stdout.printf("\rD: %ss (%sB) S: %ss (%sB) +: %ss (%sB) T: %ss (%sB) %%: %0.3f%% %sB/sec ", kSep($isdata), pMag(isdata_b), kSep($issparse), pMag(issparse_b), kSep(sum), pMag(sum_b), kSep($blkct), pMag($blkct_b), pct, pMag(perf)); @old_sum_b = sum_b; $stdout.flush(); end # Control vars $isdata = 0; $issparse = 0; buf = ""; notnull = false; # Start display loop Alarm.alarm(1); $stdout.printf("Precaching..."); $stdout.flush(); # Start processing loop while (!src.eof?) buf = src.read($blksz); # Old byte-scanning section # notnull = false; # for byte in 0...$blksz # if (buf[byte] != 0) # # Break out of the loop on non-null data # notnull = true; # break; # end # end # if (notnull) # $isdata+= 1; # out.write(buf); # else # $issparse+= 1; # out.seek($blksz, File::SEEK_CUR); # end # New regex section # !!!! Ruby's regular expressions are different! ^/$ match after/before newlines! Use \A/\Z to match start/end of string! if (/\A\0+\Z/ =~ buf) $issparse+= 1; out.seek($blksz, File::SEEK_CUR); else $isdata+= 1; out.write(buf); end end # Make everything nice and even out.truncate($blkct * $blksz); src.close(); out.close(); $stdout.printf("\n\n"); exit(0);