**** WARNING **** PHP has an internal 2GB file size limitation for fseek()! Use the Ruby version below for images larger than 2GB!
Originally built to copy a block device (a mac mini in firewire mass storage mode) to an image file, excluding all-null blocks (sparse file) to save space.
Also contains examples of thousands-separator number formatting (i.e. 10,000,000) and human-readable magnitude number formatting (i.e. 10.0GB) functions.
001 | #!/usr/bin/env php |
002 | <?php |
003 |
004 | declare (ticks=1); |
005 | bcscale(3); |
006 |
007 | // Read a file and process it in chunks. |
008 | $infile = "/dev/sde" ; |
009 | $outfile = "/media/recover/MacMini-whole-sparse.img" ; |
010 | $blksz = 4096; |
011 | $blkct = 19537686; |
012 | $blkct_b = bcmul ( $blkct , $blksz , 0); |
013 |
014 | function ksep( $number ) { |
015 | $number = (string) $number ; |
016 | $max = strlen ( $number ); |
017 | $recip = $max ; |
018 | for ( $i = 0; $i < $max ; ++ $i ) { |
019 | -- $recip ; |
020 | if ( $i && !( $i % 3)) |
021 | $out = "," . $out ; |
022 | $out = $number { $recip } . $out ; |
023 | } |
024 | return $out ; |
025 | } |
026 |
027 | function prettyGigs( $bytes ) { |
028 | $powers = " kMGTPEZY" ; |
029 | $power = 0; |
030 | $mod = 1; |
031 | while ( bcdiv ( $bytes , $mod ) > 1024) { |
032 | $mod = bcmul ( $mod , 1024); |
033 | ++ $power ; |
034 | } |
035 | $num = bcdiv ( $bytes , $mod ); |
036 | switch (((int) $num != $num )? strpos ( $num , "." ): 0) { |
037 | case "0" : |
038 | $fmt = "%0.0f%s" ; |
039 | break ; |
040 | case "1" : |
041 | $fmt = "%0.3f%s" ; |
042 | break ; |
043 | case "2" : |
044 | $fmt = "%0.2f%s" ; |
045 | break ; |
046 | case "3" : |
047 | default : |
048 | $fmt = "%0.1f%s" ; |
049 | break ; |
050 | } |
051 | if ( $power ) |
052 | $power = $powers { $power }. "B" ; |
053 | else $power = "B" ; |
054 | return sprintf( $fmt , $num , $power ); |
055 | } |
056 |
057 | function stats() { |
058 | pcntl_alarm(1); |
059 | $blksz = $GLOBALS [ 'blksz' ]; |
060 | $blkct = $GLOBALS [ 'blkct' ]; |
061 | $blkct_b = $GLOBALS [ 'blkct_b' ]; |
062 | $data = $GLOBALS [ 'data' ]; |
063 | $data_b = bcmul ( $data , $blksz , 0); |
064 | $sparse = $GLOBALS [ 'sparse' ]; |
065 | $sparse_b = bcmul ( $sparse , $blksz , 0); |
066 | $count = bcadd ( $GLOBALS [ 'data' ], $GLOBALS [ 'sparse' ], 0); |
067 | $count_b = bcmul ( $count , $blksz , 0); |
068 | $pct = bcmul ( bcdiv ( $count , $GLOBALS [ 'blkct' ], 10), 100.0); |
069 | $speed = bcmul (bcsub( $count , $GLOBALS [ 'old_count' ]), $blksz ); |
070 |
071 | printf( "\rD: %ss (%s) S: %ss (%s) +: %ss (%s) T: %ss (%s) %%: %0.4f %s/sec " , |
072 | ksep( $data ), prettyGigs( $data_b ), ksep( $sparse ), prettyGigs( $sparse_b ), ksep( $count ), prettyGigs( $count_b ), |
073 | ksep( $blkct ), prettyGigs( $blkct_b ), $pct , prettyGigs( $speed )); |
074 |
075 | /* printf("\rBlk: %s Spa: %s Sum: %s Tot: %s Pct: %0.4f ", |
076 | thousands($GLOBALS['data']), thousands($GLOBALS['sparse']), |
077 | thousands($count), thousands($GLOBALS['blkct']), $pct); |
078 | */ $GLOBALS [ 'old_count' ] = $count ; |
079 | } |
080 |
081 | pcntl_signal(SIGALRM, "stats" , true); |
082 | pcntl_alarm(1); |
083 | echo "\n\nPrecaching..." ; |
084 |
085 | /* No longer required |
086 | // Precalculate crc |
087 | $bufr = str_repeat("\0", 4096); |
088 | $zero = crc32($bufr); |
089 | */ |
090 |
091 | $input = fopen ( $infile , "rb" ); |
092 | $output = fopen ( $outfile , "wb" ); |
093 |
094 | if (! is_resource ( $input )) |
095 | die (sprintf( "%s failed!\n" , $infile )); |
096 | if (! is_resource ( $output )) |
097 | die (sprintf( "%s failed!\n" , $outfile )); |
098 |
099 | $data = 0; |
100 | $sparse = 0; |
101 |
102 | while (! feof ( $input )) { |
103 | $bufr = fread ( $input , $blksz ); |
104 | $chars = count_chars ( $bufr , 3); |
105 | if ( $chars === "\0" ) { |
106 | // Nul block. Skip it. |
107 | fseek ( $output , ftell ( $output ) + $blksz ); |
108 | $sparse ++; |
109 | continue ; |
110 | } |
111 | /* Counting chars takes roughly half the time as computing the crc32. |
112 | if (crc32($bufr) == $zero) { |
113 | // Matching block? compare by bytes |
114 | $chars = count_chars($bufr, 3); |
115 | if ($chars === "\0") { |
116 | // Null block. Skip it. |
117 | fseek($output, ftell($output) + $blksz); |
118 | continue; |
119 | } |
120 | } |
121 | */ fwrite( $output , $bufr , $blksz ); |
122 | $data ++; |
123 | } |
124 |
125 | fclose( $input ); |
126 | fclose( $output ); |
127 |
128 | echo "\n\n" ; |
129 |
130 | ?> |
Ruby version:
001 | #!/usr/bin/env ruby |
002 |
003 | # Fucking kludge. |
004 | require 'dl/import' |
005 | module Alarm |
006 | extend DL ::Importable |
007 | dlload "libc.so.6" |
008 | extern "unsigned int alarm(unsigned int)" |
009 | end |
010 |
011 | # $stdout (standard out) is buffered! $stdout.flush() if you want it to show up immediately! |
012 | # Variables: |
013 | # * global variables start with $ |
014 | # * instance variables start with @ |
015 | # * local variables start with _ or a lowercase |
016 | # * constants start with an uppercase. |
017 | # Chr to ord: "x"[0] <- Access by index. |
018 | # Ord to chr: 120.chr <- Access chr method |
019 | # In for, start..end is start through end, while start...end is start up to but not including end. |
020 | # $data is, apparently, a reserved word. |
021 |
022 | $blksz = 4096 ; |
023 | $blkct = 19537686 ; |
024 | $blkct_b = $blkct * $blksz ; |
025 |
026 | src_file = "/dev/sde" ; |
027 | out_file = "/media/recover/MacMini-whole-sparse.img" ; |
028 |
029 | src = File . new (src_file); |
030 | out = File . new (out_file, File :: CREAT | File :: TRUNC | File :: RDWR , 0600 ); |
031 |
032 | #$old_sum_b = 0; |
033 |
034 | def kSep(kSep_num) |
035 | kSep_num = kSep_num.to_s; |
036 | max = kSep_num.length; |
037 | recip = max; |
038 | out = "" ; |
039 | for cur in 0 ...max |
040 | recip-= 1 ; |
041 | if (cur != 0 && (cur % 3 ) == 0 ) |
042 | out = "," << out; |
043 | end |
044 | out = kSep_num[recip].chr << out; |
045 | end |
046 | return out; |
047 | end |
048 |
049 | def pMag(bytes) |
050 | bytes = bytes.to_f; |
051 | powers = " kMGTPEZY" ; |
052 | power = 0 ; |
053 | fmt = "" ; |
054 | out = "" ; |
055 | mod = 1 ; |
056 | while ((bytes / mod) > 1024 ) |
057 | mod = mod * 1024 ; |
058 | power+= 1 ; |
059 | end |
060 | num = bytes / mod; |
061 | case (num.to_s.index( "." )) |
062 | when nil then fmt = "%0.0f%s" ; |
063 | when 0 then fmt = "%0.0f%s" ; |
064 | when 1 then fmt = "%0.3f%s" ; |
065 | when 2 then fmt = "%0.2f%s" ; |
066 | when 3 then fmt = "%0.1f%s" ; |
067 | else fmt = "$0.0f%s" ; |
068 | end |
069 | out << sprintf(fmt, num, powers[power].chr); |
070 | return out; |
071 | end |
072 |
073 | trap( "ALRM" ) do |
074 | Alarm.alarm( 1 ); |
075 | # Format and display stats all pretty-like |
076 | isdata_b = $isdata * $blksz ; |
077 | issparse_b = $issparse * $blksz ; |
078 | sum = $isdata + $issparse ; |
079 | sum_b = sum * $blksz ; |
080 | pct = (sum.to_f / $blkct ) * 100 ; |
081 | perf = sum_b - ( @old_sum_b ? @old_sum_b : 0 ); |
082 |
083 | $stdout .printf( "\rD: %ss (%sB) S: %ss (%sB) +: %ss (%sB) T: %ss (%sB) %%: %0.3f%% %sB/sec " , |
084 | kSep( $isdata ), pMag(isdata_b), kSep( $issparse ), pMag(issparse_b), |
085 | kSep(sum), pMag(sum_b), kSep( $blkct ), pMag( $blkct_b ), pct, pMag(perf)); |
086 | @old_sum_b = sum_b; |
087 | $stdout .flush(); |
088 | end |
089 |
090 | # Control vars |
091 | $isdata = 0 ; |
092 | $issparse = 0 ; |
093 | buf = "" ; |
094 | notnull = false ; |
095 |
096 | # Start display loop |
097 | Alarm.alarm( 1 ); |
098 |
099 | $stdout .printf( "Precaching..." ); |
100 | $stdout .flush(); |
101 |
102 | # Start processing loop |
103 | while (!src.eof?) |
104 | buf = src.read( $blksz ); |
105 |
106 | # Old byte-scanning section |
107 | # notnull = false; |
108 | # for byte in 0...$blksz |
109 | # if (buf[byte] != 0) |
110 | # # Break out of the loop on non-null data |
111 | # notnull = true; |
112 | # break; |
113 | # end |
114 | # end |
115 | # if (notnull) |
116 | # $isdata+= 1; |
117 | # out.write(buf); |
118 | # else |
119 | # $issparse+= 1; |
120 | # out.seek($blksz, File::SEEK_CUR); |
121 | # end |
122 |
123 | # New regex section |
124 | # !!!! Ruby's regular expressions are different! ^/$ match after/before newlines! Use \A/\Z to match start/end of string! |
125 | if (/\ A \ 0 +\ Z / =~ buf) |
126 | $issparse += 1 ; |
127 | out.seek( $blksz , File :: SEEK_CUR ); |
128 | else |
129 | $isdata += 1 ; |
130 | out.write(buf); |
131 | end |
132 | end |
133 |
134 | # Make everything nice and even |
135 | out.truncate( $blkct * $blksz ); |
136 |
137 | src.close(); |
138 | out.close(); |
139 |
140 | $stdout .printf( "\n\n" ); |
141 |
142 | exit( 0 ); |