Note: You are viewing an old version of this page. View the current version.

WARNING PHP has an internal 2GB file size limitation for fseek()! Use the Ruby version below for images larger than 2GB!

Originally built to copy a block device (a mac mini in firewire mass storage mode) to an image file, excluding all-null blocks (sparse file) to save space.

Also contains examples of thousands-separator number formatting (i.e. 10,000,000) and human-readable magnitude number formatting (i.e. 10.0GB) functions.

#!/usr/bin/env php
<?php

declare(ticks=1);
bcscale(3);

// Read a file and process it in chunks.
$infile = "/dev/sde";
$outfile = "/media/recover/MacMini-whole-sparse.img";
$blksz = 4096;
$blkct = 19537686;
$blkct_b = bcmul($blkct, $blksz, 0);

function ksep($number) {
        $number = (string) $number;
        $max = strlen($number);
        $recip = $max;
        for ($i = 0; $i < $max; ++$i) {
                --$recip;
                if ($i && !($i % 3))
                        $out = "," . $out;
                $out = $number{$recip} . $out;
        }
        return $out;
}

function prettyGigs($bytes) {
        $powers = " kMGTPEZY";
        $power = 0;
        $mod = 1;
        while (bcdiv($bytes, $mod) > 1024) {
                $mod = bcmul($mod, 1024);
                ++$power;
        }
        $num = bcdiv($bytes, $mod);
        switch (((int) $num != $num)? strpos($num, "."): 0) {
                case "0":
                        $fmt = "%0.0f%s";
                        break;
                case "1":
                        $fmt = "%0.3f%s";
                        break;
                case "2":
                        $fmt = "%0.2f%s";
                        break;
                case "3":
                default:
                        $fmt = "%0.1f%s";
                        break;
        }
        if ($power)
                $power = $powers{$power}."B";
        else    $power = "B";
        return sprintf($fmt, $num, $power);
}

function stats() {
        pcntl_alarm(1);
        $blksz = $GLOBALS['blksz'];
        $blkct = $GLOBALS['blkct'];
        $blkct_b = $GLOBALS['blkct_b'];
        $data = $GLOBALS['data'];
        $data_b = bcmul($data, $blksz, 0);
        $sparse = $GLOBALS['sparse'];
        $sparse_b = bcmul($sparse, $blksz, 0);
        $count = bcadd($GLOBALS['data'], $GLOBALS['sparse'], 0);
        $count_b = bcmul($count, $blksz, 0);
        $pct = bcmul(bcdiv($count, $GLOBALS['blkct'], 10), 100.0);
        $speed = bcmul(bcsub($count, $GLOBALS['old_count']), $blksz);

        printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.4f %s/sec    ",
                ksep($data), prettyGigs($data_b), ksep($sparse), prettyGigs($sparse_b), ksep($count), prettyGigs($count_b),
                ksep($blkct), prettyGigs($blkct_b), $pct, prettyGigs($speed));

/*      printf("\rBlk: %s Spa: %s Sum: %s Tot: %s Pct: %0.4f    ",
                thousands($GLOBALS['data']), thousands($GLOBALS['sparse']),
                thousands($count), thousands($GLOBALS['blkct']), $pct);
*/      $GLOBALS['old_count'] = $count;
}

pcntl_signal(SIGALRM, "stats", true);
pcntl_alarm(1);
echo "\n\nPrecaching...";

/* No longer required
// Precalculate crc
$bufr = str_repeat("\0", 4096);
$zero = crc32($bufr);
*/

$input = fopen($infile, "rb");
$output = fopen($outfile, "wb");

if (!is_resource($input))
        die(sprintf("%s failed!\n", $infile));
if (!is_resource($output))
        die(sprintf("%s failed!\n", $outfile));

$data = 0;
$sparse = 0;

while (!feof($input)) {
        $bufr = fread($input, $blksz);
        $chars = count_chars($bufr, 3);
        if ($chars === "\0") {
                // Nul block. Skip it.
                fseek($output, ftell($output) + $blksz);
                $sparse++;
                continue;
        }
/* Counting chars takes roughly half the time as computing the crc32.
        if (crc32($bufr) == $zero) {
                // Matching block? compare by bytes
                $chars = count_chars($bufr, 3);
                if ($chars === "\0") {
                        // Null block. Skip it.
                        fseek($output, ftell($output) + $blksz);
                        continue;
                }
        }
*/      fwrite($output, $bufr, $blksz);
        $data++;
}

fclose($input);
fclose($output);

echo "\n\n";

?>

Ruby version:

#!/usr/bin/env ruby

# Fucking kludge.
require 'dl/import'
module Alarm
        extend DL::Importable
        dlload "libc.so.6"
        extern "unsigned int alarm(unsigned int)"
end

# $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately!
# Variables:
# * global variables start with $
# * instance variables start with @
# * local variables start with _ or a lowercase
# * constants start with an uppercase.
# Chr to ord: "x"[0] <- Access by index.
# Ord to chr: 120.chr <- Access chr method
# In for, start..end is start through end, while start...end is start up to but not including end.
# $data is, apparently, a reserved word.

$blksz = 4096;
$blkct = 19537686;
$blkct_b = $blkct * $blksz;

src_file = "/dev/sde";
out_file = "/media/recover/MacMini-whole-sparse.img";

src = File.new(src_file);
out = File.new(out_file, File::CREAT|File::TRUNC|File::RDWR, 0600);

#$old_sum_b = 0;

def kSep(kSep_num)
        kSep_num = kSep_num.to_s;
        max = kSep_num.length;
        recip = max;
        out = "";
        for cur in 0...max
                recip-= 1;
                if (cur != 0 && (cur % 3) == 0)
                        out = "," << out;
                end
                out = kSep_num[recip].chr << out;
        end
        return out;
end

def pMag(bytes)
        bytes = bytes.to_f;
        powers = " kMGTPEZY";
        power = 0;
        fmt = "";
        out = "";
        mod = 1;
        while ((bytes / mod) > 1024)
                mod = mod * 1024;
                power+= 1;
        end
        num = bytes / mod;
        case (num.to_s.index("."))
                when nil then fmt = "%0.0f%s";
                when 0 then fmt = "%0.0f%s";
                when 1 then fmt = "%0.3f%s";
                when 2 then fmt = "%0.2f%s";
                when 3 then fmt = "%0.1f%s";
                else fmt = "$0.0f%s";
        end
        out << sprintf(fmt, num, powers[power].chr);
        return out;
end

trap("ALRM") do
        Alarm.alarm(1);
        # Format and display stats all pretty-like
        isdata_b = $isdata * $blksz;
        issparse_b = $issparse * $blksz;
        sum = $isdata + $issparse;
        sum_b = sum * $blksz;
        pct = (sum.to_f / $blkct) * 100;
        perf = sum_b - (@old_sum_b? @old_sum_b: 0);

        $stdout.printf("\rD: %ss (%sB) S: %ss (%sB) +: %ss (%sB) T: %ss (%sB) %%: %0.3f%% %sB/sec    ",
                kSep($isdata), pMag(isdata_b), kSep($issparse), pMag(issparse_b),
                kSep(sum), pMag(sum_b), kSep($blkct), pMag($blkct_b), pct, pMag(perf));
        @old_sum_b = sum_b;
        $stdout.flush();
end

# Control vars
$isdata = 0;
$issparse = 0;
buf = "";
notnull = false;

# Start display loop
Alarm.alarm(1);

$stdout.printf("Precaching...");
$stdout.flush();

# Start processing loop
while (!src.eof?)
        buf = src.read($blksz);

# Old byte-scanning section
#       notnull = false;
#       for byte in 0...$blksz
#               if (buf[byte] != 0)
#                       # Break out of the loop on non-null data
#                       notnull = true;
#                       break;
#               end
#       end
#       if (notnull)
#               $isdata+= 1;
#               out.write(buf);
#       else
#               $issparse+= 1;
#               out.seek($blksz, File::SEEK_CUR);
#       end

# New regex section
# !!!! Ruby's regular expressions are different! ^/$ match after/before newlines! Use \A/\Z to match start/end of string!
        if (/\A\0+\Z/ =~ buf)
                $issparse+= 1;
                out.seek($blksz, File::SEEK_CUR);
        else
                $isdata+= 1;
                out.write(buf);
        end
end

# Make everything nice and even
out.truncate($blkct * $blksz);

src.close();
out.close();

$stdout.printf("\n\n");

exit(0);