Note: You are viewing an old version of this page. View the current version.

Differences between version 3 and previous revision of copysparse.php.

Other diffs: Previous Major Revision, Previous Author

Newer page: version 3 Last edited on Saturday, 28 June 2008 20:51:17 by CyberLeo Revert
Older page: version 2 Last edited on Saturday, 28 June 2008 2:46:06 by CyberLeo Revert
@@ -132,5 +132,139 @@
  
 echo "\n\n"; 
  
 ?> 
+</verbatim>  
+  
+Ruby version:  
+<verbatim>  
+#!/usr/bin/env ruby  
+  
+# Fucking kludge.  
+require 'dl/import'  
+module Alarm  
+ extend DL::Importable  
+ dlload "libc.so.6"  
+ extern "unsigned int alarm(unsigned int)"  
+end  
+  
+# $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately!  
+# Variables:  
+# * global variables start with $  
+# * instance variables start with @  
+# * local variables start with _ or a lowercase  
+# * constants start with an uppercase.  
+# Chr to ord: "x"[0] <- Access by index.  
+# Ord to chr: 120.chr <- Access chr method  
+# In for, start..end is start through end, while start...end is start up to but not including end.  
+# $data is, apparently, a reserved word.  
+  
+$blksz = 4096;  
+$blkct = 19537686;  
+$blkct_b = $blkct * $blksz;  
+  
+src_file = "/dev/sde";  
+out_file = "/media/recover/testfile";  
+  
+src = File.new(src_file);  
+out = File.new(out_file, File::CREAT|File::TRUNC|File::RDWR, 0600);  
+  
+#$old_sum_b = 0;  
+  
+def kSep(kSep_num)  
+ kSep_num = kSep_num.to_s;  
+ max = kSep_num.length;  
+ recip = max;  
+ out = "";  
+ for cur in 0...max  
+ recip-= 1;  
+ if (cur != 0 && (cur % 3) == 0)  
+ out = "," << out;  
+ end  
+ out = kSep_num[recip].chr << out;  
+ end  
+ return out;  
+end  
+  
+def pMag(bytes)  
+ bytes = bytes.to_f;  
+ powers = " kMGTPEZY";  
+ power = 0;  
+ fmt = "";  
+ out = "";  
+ mod = 1;  
+ while ((bytes / mod) > 1024)  
+ mod = mod * 1024;  
+ power+= 1;  
+ end  
+ num = bytes / mod;  
+ case (num.to_s.index("."))  
+ when nil then fmt = "%0.0f%s";  
+ when 0 then fmt = "%0.0f%s";  
+ when 1 then fmt = "%0.3f%s";  
+ when 2 then fmt = "%0.2f%s";  
+ when 3 then fmt = "%0.1f%s";  
+ else fmt = "$0.0f%s";  
+ end  
+ out << sprintf(fmt, num, powers[power].chr);  
+ return out;  
+end  
+  
+trap("ALRM") do  
+ Alarm.alarm(1);  
+ # Format and display stats all pretty-like  
+ isdata_b = $isdata * $blksz;  
+ issparse_b = $issparse * $blksz;  
+ sum = $isdata + $issparse;  
+ sum_b = sum * $blksz;  
+ pct = (sum / $blkct) * 100;  
+ perf = sum_b - (@old_sum_b? @old_sum_b: 0);  
+  
+ $stdout.printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.3f%% %s/sec ",  
+ kSep($isdata), pMag(isdata_b), kSep($issparse), pMag(issparse_b),  
+ kSep(sum), pMag(sum_b), kSep($blkct), pMag($blkct_b), pct, pMag(perf));  
+ @old_sum_b = sum_b;  
+ $stdout.flush();  
+end  
+  
+# Control vars  
+$isdata = 0;  
+$issparse = 0;  
+buf = "";  
+notnull = false;  
+  
+# Start display loop  
+Alarm.alarm(1);  
+  
+$stdout.printf("Precaching...");  
+$stdout.flush();  
+  
+# Start processing loop  
+while (!src.eof?)  
+ buf = src.read($blksz);  
+ notnull = false;  
+ for byte in 0...$blksz  
+ if (buf[byte] != 0)  
+ # Break out of the loop on non-null data  
+ notnull = true;  
+ break;  
+ end  
+ end  
+ if (notnull)  
+ $isdata+= 1;  
+ out.write(buf);  
+ else  
+ $issparse+= 1;  
+ out.seek($blksz, File::SEEK_CUR);  
+ end  
+end  
+  
+# Make everything nice and even  
+out.truncate($blkct * $blksz);  
+  
+src.close();  
+out.close();  
+  
+$stdout.printf("\n\n");  
+  
+exit(0);  
 </verbatim> 

version 3

Originally built to copy a block device (a mac mini in firewire mass storage mode) to an image file, excluding all-null blocks (sparse file) to save space.

Also contains examples of thousands-separator number formatting (i.e. 10,000,000) and human-readable magnitude number formatting (i.e. 10.0GB) functions.

#!/usr/bin/env php
<?php

declare(ticks=1);
bcscale(3);

// Read a file and process it in chunks.
$infile = "/dev/sde";
$outfile = "/media/recover/MacMini-whole-sparse.img";
$blksz = 4096;
$blkct = 19537686;
$blkct_b = bcmul($blkct, $blksz, 0);

function ksep($number) {
        $number = (string) $number;
        $max = strlen($number);
        $recip = $max;
        for ($i = 0; $i < $max; ++$i) {
                --$recip;
                if ($i && !($i % 3))
                        $out = "," . $out;
                $out = $number{$recip} . $out;
        }
        return $out;
}

function prettyGigs($bytes) {
        $powers = " kMGTPEZY";
        $power = 0;
        $mod = 1;
        while (bcdiv($bytes, $mod) > 1024) {
                $mod = bcmul($mod, 1024);
                ++$power;
        }
        $num = bcdiv($bytes, $mod);
        switch (((int) $num != $num)? strpos($num, "."): 0) {
                case "0":
                        $fmt = "%0.0f%s";
                        break;
                case "1":
                        $fmt = "%0.3f%s";
                        break;
                case "2":
                        $fmt = "%0.2f%s";
                        break;
                case "3":
                default:
                        $fmt = "%0.1f%s";
                        break;
        }
        if ($power)
                $power = $powers{$power}."B";
        else    $power = "B";
        return sprintf($fmt, $num, $power);
}

function stats() {
        pcntl_alarm(1);
        $blksz = $GLOBALS['blksz'];
        $blkct = $GLOBALS['blkct'];
        $blkct_b = $GLOBALS['blkct_b'];
        $data = $GLOBALS['data'];
        $data_b = bcmul($data, $blksz, 0);
        $sparse = $GLOBALS['sparse'];
        $sparse_b = bcmul($sparse, $blksz, 0);
        $count = bcadd($GLOBALS['data'], $GLOBALS['sparse'], 0);
        $count_b = bcmul($count, $blksz, 0);
        $pct = bcmul(bcdiv($count, $GLOBALS['blkct'], 10), 100.0);
        $speed = bcmul(bcsub($count, $GLOBALS['old_count']), $blksz);

        printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.4f %s/sec    ",
                ksep($data), prettyGigs($data_b), ksep($sparse), prettyGigs($sparse_b), ksep($count), prettyGigs($count_b),
                ksep($blkct), prettyGigs($blkct_b), $pct, prettyGigs($speed));

/*      printf("\rBlk: %s Spa: %s Sum: %s Tot: %s Pct: %0.4f    ",
                thousands($GLOBALS['data']), thousands($GLOBALS['sparse']),
                thousands($count), thousands($GLOBALS['blkct']), $pct);
*/      $GLOBALS['old_count'] = $count;
}

pcntl_signal(SIGALRM, "stats", true);
pcntl_alarm(1);
echo "\n\nPrecaching...";

/* No longer required
// Precalculate crc
$bufr = str_repeat("\0", 4096);
$zero = crc32($bufr);
*/

$input = fopen($infile, "rb");
$output = fopen($outfile, "wb");

if (!is_resource($input))
        die(sprintf("%s failed!\n", $infile));
if (!is_resource($output))
        die(sprintf("%s failed!\n", $outfile));

$data = 0;
$sparse = 0;

while (!feof($input)) {
        $bufr = fread($input, $blksz);
        $chars = count_chars($bufr, 3);
        if ($chars === "\0") {
                // Nul block. Skip it.
                fseek($output, ftell($output) + $blksz);
                $sparse++;
                continue;
        }
/* Counting chars takes roughly half the time as computing the crc32.
        if (crc32($bufr) == $zero) {
                // Matching block? compare by bytes
                $chars = count_chars($bufr, 3);
                if ($chars === "\0") {
                        // Null block. Skip it.
                        fseek($output, ftell($output) + $blksz);
                        continue;
                }
        }
*/      fwrite($output, $bufr, $blksz);
        $data++;
}

fclose($input);
fclose($output);

echo "\n\n";

?>

Ruby version:

#!/usr/bin/env ruby

# Fucking kludge.
require 'dl/import'
module Alarm
        extend DL::Importable
        dlload "libc.so.6"
        extern "unsigned int alarm(unsigned int)"
end

# $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately!
# Variables:
# * global variables start with $
# * instance variables start with @
# * local variables start with _ or a lowercase
# * constants start with an uppercase.
# Chr to ord: "x"[0] <- Access by index.
# Ord to chr: 120.chr <- Access chr method
# In for, start..end is start through end, while start...end is start up to but not including end.
# $data is, apparently, a reserved word.

$blksz = 4096;
$blkct = 19537686;
$blkct_b = $blkct * $blksz;

src_file = "/dev/sde";
out_file = "/media/recover/testfile";

src = File.new(src_file);
out = File.new(out_file, File::CREAT|File::TRUNC|File::RDWR, 0600);

#$old_sum_b = 0;

def kSep(kSep_num)
        kSep_num = kSep_num.to_s;
        max = kSep_num.length;
        recip = max;
        out = "";
        for cur in 0...max
                recip-= 1;
                if (cur != 0 && (cur % 3) == 0)
                        out = "," << out;
                end
                out = kSep_num[recip].chr << out;
        end
        return out;
end

def pMag(bytes)
        bytes = bytes.to_f;
        powers = " kMGTPEZY";
        power = 0;
        fmt = "";
        out = "";
        mod = 1;
        while ((bytes / mod) > 1024)
                mod = mod * 1024;
                power+= 1;
        end
        num = bytes / mod;
        case (num.to_s.index("."))
                when nil then fmt = "%0.0f%s";
                when 0 then fmt = "%0.0f%s";
                when 1 then fmt = "%0.3f%s";
                when 2 then fmt = "%0.2f%s";
                when 3 then fmt = "%0.1f%s";
                else fmt = "$0.0f%s";
        end
        out << sprintf(fmt, num, powers[power].chr);
        return out;
end

trap("ALRM") do
        Alarm.alarm(1);
        # Format and display stats all pretty-like
        isdata_b = $isdata * $blksz;
        issparse_b = $issparse * $blksz;
        sum = $isdata + $issparse;
        sum_b = sum * $blksz;
        pct = (sum / $blkct) * 100;
        perf = sum_b - (@old_sum_b? @old_sum_b: 0);

        $stdout.printf("\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.3f%% %s/sec    ",
                kSep($isdata), pMag(isdata_b), kSep($issparse), pMag(issparse_b),
                kSep(sum), pMag(sum_b), kSep($blkct), pMag($blkct_b), pct, pMag(perf));
        @old_sum_b = sum_b;
        $stdout.flush();
end

# Control vars
$isdata = 0;
$issparse = 0;
buf = "";
notnull = false;

# Start display loop
Alarm.alarm(1);

$stdout.printf("Precaching...");
$stdout.flush();

# Start processing loop
while (!src.eof?)
        buf = src.read($blksz);
        notnull = false;
        for byte in 0...$blksz
                if (buf[byte] != 0)
                        # Break out of the loop on non-null data
                        notnull = true;
                        break;
                end
        end
        if (notnull)
                $isdata+= 1;
                out.write(buf);
        else
                $issparse+= 1;
                out.seek($blksz, File::SEEK_CUR);
        end
end

# Make everything nice and even
out.truncate($blkct * $blksz);

src.close();
out.close();

$stdout.printf("\n\n");

exit(0);