**** WARNING **** PHP has an internal 2GB file size limitation for fseek()! Use the Ruby version below for images larger than 2GB!

Originally built to copy a block device (a mac mini in firewire mass storage mode) to an image file, excluding all-null blocks (sparse file) to save space.

Also contains examples of thousands-separator number formatting (i.e. 10,000,000) and human-readable magnitude number formatting (i.e. 10.0GB) functions.

001#!/usr/bin/env php
002<?php
003 
004declare(ticks=1);
005bcscale(3);
006 
007// Read a file and process it in chunks.
008$infile = "/dev/sde";
009$outfile = "/media/recover/MacMini-whole-sparse.img";
010$blksz = 4096;
011$blkct = 19537686;
012$blkct_b = bcmul($blkct, $blksz, 0);
013 
014function ksep($number) {
015        $number = (string) $number;
016        $max = strlen($number);
017        $recip = $max;
018        for ($i = 0; $i < $max; ++$i) {
019                --$recip;
020                if ($i && !($i % 3))
021                        $out = "," . $out;
022                $out = $number{$recip} . $out;
023        }
024        return $out;
025}
026 
027function prettyGigs($bytes) {
028        $powers = " kMGTPEZY";
029        $power = 0;
030        $mod = 1;
031        while (bcdiv($bytes, $mod) > 1024) {
032                $mod = bcmul($mod, 1024);
033                ++$power;
034        }
035        $num = bcdiv($bytes, $mod);
036        switch (((int) $num != $num)? strpos($num, "."): 0) {
037                case "0":
038                        $fmt = "%0.0f%s";
039                        break;
040                case "1":
041                        $fmt = "%0.3f%s";
042                        break;
043                case "2":
044                        $fmt = "%0.2f%s";
045                        break;
046                case "3":
047                default:
048                        $fmt = "%0.1f%s";
049                        break;
050        }
051        if ($power)
052                $power = $powers{$power}."B";
053        else    $power = "B";
054        return sprintf($fmt, $num, $power);
055}
056 
057function stats() {
058        pcntl_alarm(1);
059        $blksz = $GLOBALS['blksz'];
060        $blkct = $GLOBALS['blkct'];
061        $blkct_b = $GLOBALS['blkct_b'];
062        $data = $GLOBALS['data'];
063        $data_b = bcmul($data, $blksz, 0);
064        $sparse = $GLOBALS['sparse'];
065        $sparse_b = bcmul($sparse, $blksz, 0);
066        $count = bcadd($GLOBALS['data'], $GLOBALS['sparse'], 0);
067        $count_b = bcmul($count, $blksz, 0);
068        $pct = bcmul(bcdiv($count, $GLOBALS['blkct'], 10), 100.0);
069        $speed = bcmul(bcsub($count, $GLOBALS['old_count']), $blksz);
070 
071        printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.4f %s/sec    ",
072                ksep($data), prettyGigs($data_b), ksep($sparse), prettyGigs($sparse_b), ksep($count), prettyGigs($count_b),
073                ksep($blkct), prettyGigs($blkct_b), $pct, prettyGigs($speed));
074 
075/*      printf("\rBlk: %s Spa: %s Sum: %s Tot: %s Pct: %0.4f    ",
076                thousands($GLOBALS['data']), thousands($GLOBALS['sparse']),
077                thousands($count), thousands($GLOBALS['blkct']), $pct);
078*/      $GLOBALS['old_count'] = $count;
079}
080 
081pcntl_signal(SIGALRM, "stats", true);
082pcntl_alarm(1);
083echo "\n\nPrecaching...";
084 
085/* No longer required
086// Precalculate crc
087$bufr = str_repeat("\0", 4096);
088$zero = crc32($bufr);
089*/
090 
091$input = fopen($infile, "rb");
092$output = fopen($outfile, "wb");
093 
094if (!is_resource($input))
095        die(sprintf("%s failed!\n", $infile));
096if (!is_resource($output))
097        die(sprintf("%s failed!\n", $outfile));
098 
099$data = 0;
100$sparse = 0;
101 
102while (!feof($input)) {
103        $bufr = fread($input, $blksz);
104        $chars = count_chars($bufr, 3);
105        if ($chars === "\0") {
106                // Nul block. Skip it.
107                fseek($output, ftell($output) + $blksz);
108                $sparse++;
109                continue;
110        }
111/* Counting chars takes roughly half the time as computing the crc32.
112        if (crc32($bufr) == $zero) {
113                // Matching block? compare by bytes
114                $chars = count_chars($bufr, 3);
115                if ($chars === "\0") {
116                        // Null block. Skip it.
117                        fseek($output, ftell($output) + $blksz);
118                        continue;
119                }
120        }
121*/      fwrite($output, $bufr, $blksz);
122        $data++;
123}
124 
125fclose($input);
126fclose($output);
127 
128echo "\n\n";
129 
130?>

Ruby version:

001#!/usr/bin/env ruby
002 
003# Fucking kludge.
004require 'dl/import'
005module Alarm
006        extend DL::Importable
007        dlload "libc.so.6"
008        extern "unsigned int alarm(unsigned int)"
009end
010 
011# $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately!
012# Variables:
013# * global variables start with $
014# * instance variables start with @
015# * local variables start with _ or a lowercase
016# * constants start with an uppercase.
017# Chr to ord: "x"[0] <- Access by index.
018# Ord to chr: 120.chr <- Access chr method
019# In for, start..end is start through end, while start...end is start up to but not including end.
020# $data is, apparently, a reserved word.
021 
022$blksz = 4096;
023$blkct = 19537686;
024$blkct_b = $blkct * $blksz;
025 
026src_file = "/dev/sde";
027out_file = "/media/recover/MacMini-whole-sparse.img";
028 
029src = File.new(src_file);
030out = File.new(out_file, File::CREAT|File::TRUNC|File::RDWR, 0600);
031 
032#$old_sum_b = 0;
033 
034def kSep(kSep_num)
035        kSep_num = kSep_num.to_s;
036        max = kSep_num.length;
037        recip = max;
038        out = "";
039        for cur in 0...max
040                recip-= 1;
041                if (cur != 0 && (cur % 3) == 0)
042                        out = "," << out;
043                end
044                out = kSep_num[recip].chr << out;
045        end
046        return out;
047end
048 
049def pMag(bytes)
050        bytes = bytes.to_f;
051        powers = " kMGTPEZY";
052        power = 0;
053        fmt = "";
054        out = "";
055        mod = 1;
056        while ((bytes / mod) > 1024)
057                mod = mod * 1024;
058                power+= 1;
059        end
060        num = bytes / mod;
061        case (num.to_s.index("."))
062                when nil then fmt = "%0.0f%s";
063                when 0 then fmt = "%0.0f%s";
064                when 1 then fmt = "%0.3f%s";
065                when 2 then fmt = "%0.2f%s";
066                when 3 then fmt = "%0.1f%s";
067                else fmt = "$0.0f%s";
068        end
069        out << sprintf(fmt, num, powers[power].chr);
070        return out;
071end
072 
073trap("ALRM") do
074        Alarm.alarm(1);
075        # Format and display stats all pretty-like
076        isdata_b = $isdata * $blksz;
077        issparse_b = $issparse * $blksz;
078        sum = $isdata + $issparse;
079        sum_b = sum * $blksz;
080        pct = (sum.to_f / $blkct) * 100;
081        perf = sum_b - (@old_sum_b? @old_sum_b: 0);
082 
083        $stdout.printf("\rD: %ss (%sB) S: %ss (%sB) +: %ss (%sB) T: %ss (%sB) %%: %0.3f%% %sB/sec    ",
084                kSep($isdata), pMag(isdata_b), kSep($issparse), pMag(issparse_b),
085                kSep(sum), pMag(sum_b), kSep($blkct), pMag($blkct_b), pct, pMag(perf));
086        @old_sum_b = sum_b;
087        $stdout.flush();
088end
089 
090# Control vars
091$isdata = 0;
092$issparse = 0;
093buf = "";
094notnull = false;
095 
096# Start display loop
097Alarm.alarm(1);
098 
099$stdout.printf("Precaching...");
100$stdout.flush();
101 
102# Start processing loop
103while (!src.eof?)
104        buf = src.read($blksz);
105 
106# Old byte-scanning section
107#       notnull = false;
108#       for byte in 0...$blksz
109#               if (buf[byte] != 0)
110#                       # Break out of the loop on non-null data
111#                       notnull = true;
112#                       break;
113#               end
114#       end
115#       if (notnull)
116#               $isdata+= 1;
117#               out.write(buf);
118#       else
119#               $issparse+= 1;
120#               out.seek($blksz, File::SEEK_CUR);
121#       end
122 
123# New regex section
124# !!!! Ruby's regular expressions are different! ^/$ match after/before newlines! Use \A/\Z to match start/end of string!
125        if (/\A\0+\Z/ =~ buf)
126                $issparse+= 1;
127                out.seek($blksz, File::SEEK_CUR);
128        else
129                $isdata+= 1;
130                out.write(buf);
131        end
132end
133 
134# Make everything nice and even
135out.truncate($blkct * $blksz);
136 
137src.close();
138out.close();
139 
140$stdout.printf("\n\n");
141 
142exit(0);