Differences between version 6 and predecessor to the previous major change of CyberLeo/hashery.sh.
Other diffs: Previous Revision, Previous Author
Newer page: | version 6 | Last edited on Thursday, 24 March 2016 0:31:47 | by CyberLeo | Revert |
Older page: | version 5 | Last edited on Thursday, 18 December 2014 20:41:54 | by CyberLeo | Revert |
@@ -91,9 +91,9 @@
printf "Links:%u Files:%u Dirs:%u Other:%u\n" "${ls}" "${lf}" "${ld}" "${lo}" >&2
if [ "${ls}" -gt 0 ]
then
- pv -lN Syms "${hdir}/lnk.lst" | sed -e 's/^\.\///' | while read link
+ pv -lN Syms "${hdir}/lnk.lst" | sed -e 's/^\.\///; s/\\/\\\\/g
' | while read link
do
hash="$(readlink "${link}" | sha256sum | cut -d' ' -f1)"
type="l"
size="$(stat -c %s "${link}")"
@@ -104,19 +104,24 @@
if [ "${lf}" -gt 0 ]
then
cf=0
- while read file
+ pv -lN Files -s "${lf}" "${hdir}/reg.lst" | sed -e 's/^\.\///; s/\\/\\\\/g' |
while read file
do
cf="$(( ${cf} + 1 ))"
- hash="$(pv -N "${cf}/${lf}" "${file}" | sha256sum | cut -d' ' -f1)"
type="-"
size="$(stat -c %s "${file}")"
+ if [ "${size}" -gt 33554432 ]
+ then
+ hash="$(pv -N "${cf}/${lf}" "${file}" | sha256sum | cut -d' ' -f1)"
+ else
+ hash="$(sha256sum "${file}" | cut -d' ' -f1)"
+ fi
name="${file#./}"
printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"
- done < "${hdir}/reg.lst"
>> "${hdir}/all.hash"
+ done >> "${hdir}/all.hash"
-# pv -lN Files "${hdir}/reg.lst" | sed -e 's/^\.\///' | while read file
+# pv -lN Files "${hdir}/reg.lst" | sed -e 's/^\.\///; s/\\/\\\\/g
' | while read file
# do
# hash="$(pv "${file}" | sha256sum | cut -d' ' -f1)"
# type="-"
# size="$(stat -c %s "${file}")"
version 6
#!/bin/sh # Format: # <hash> <type> <size> <filename> # # hash can be any supported hash, but it is up to the implementer to differentiate between them # hash does not make much sense for certain types # Set to all zeroes for fifos, chars, directories, blocks, sockets, whiteouts, or unknowns # Set to the hash of the content of the symlink, with tailing newline # (the link itself, not the file or directory to which it points) # # Type is one of # f - fifo # c - char # d - directory # b - block # - - regular # l - symlink # s - socket # w - whiteout # ? - unknown # # Size is expressed zero-padded in at least 8 hexadecimal digits # more digits can be used to express filesizes larger than 4 gigabytes # # Name is unquoted, and extends from the character after the first space after the size to the end of the line # Non-empty directories need not be included, since they are implicitly specified by the stuff inside them sudo sh <<"EOF" minsize=16777216 emptyhash="$(sha256 -qs "")" zerohash="$(echo "${emptyhash}" | tr '[0-9a-f]' '0')" lst="$(mktemp -t hashery)" trap "rm -f '${lst}'" exit hup int term kill # First, compute the number of files for reporting listlen=$(find . | pv -cN count -l | sed -e '/^\.$/d; s/^\.\///; s/\\/\\\\/g' | sort | tee "${lst}" | wc -l) oldifs="${IFS}" IFS=' ' cat "${lst}" | pv -cN list -ls ${listlen} | while read file do IFS="${oldifs}" stat -f '%p %z %Y' "./${file}" | while read mode size target do # Now we have name, target, mode, size; compute the type hash="${zerohash}" type=$(printf %07o $(( 0${mode} & 0170000 )) ) case ${type} in 0010000) stype="f" ;; 0020000) stype="c" ;; 0040000) stype="d" # Ignore non-empty directories [ -z "$(ls -1a "./${file}" | sed -e '/^\.\{1,2\}$/d')" ] || continue ;; 0060000) stype="b" ;; 0100000) stype="-" [ "${size}" -eq 0 ] && hash="${emptyhash}" [ "${size}" -le "${minsize}" ] && hash="$(sha256 -q < "${file}")" [ "${size}" -gt "${minsize}" ] && hash="$(pv -cN file "./${file}" | sha256 -q)" ;; 0120000) stype="l"; hash="$(sha256 -qs "${target}")" ;; 0140000) stype="s" ;; 0160000) stype="w" ;; *) stype="?" ;; esac printf "%s %s %08X %s\n" "${hash}" "${stype}" "${size:-0}" "${file}" done done EOF
Here are some possibilities for getting this to work on a Linux machine:
cat <<"EOF" | sudo sh -es -- /zfs/olddata/hashes/2010-08-07_05:00:00.hashery out="${1:-/dev/stdout}" hdir="$(mktemp -dt hashery.XXXXXXXX)" echo "Counting..." >&2 ls=$(find . -type l | pv -lN Syms | tee "${hdir}/lnk.lst" | wc -l) lf=$(find . -type f | pv -lN Files | tee "${hdir}/reg.lst" | wc -l) ld=$(find . -type d | pv -lN Dirs | tee "${hdir}/dir.lst" | wc -l) lo=$(find . -type l -o -type f -o -type d -o -print | pv -lN Other | tee "${hdir}/oth.lst" | wc -l) echo "" >&2 echo "Tempdir is ${hdir}" >&2 echo "Output file is ${out}" >&2 printf "Links:%u Files:%u Dirs:%u Other:%u\n" "${ls}" "${lf}" "${ld}" "${lo}" >&2 if [ "${ls}" -gt 0 ] then pv -lN Syms "${hdir}/lnk.lst" | sed -e 's/^\.\///; s/\\/\\\\/g' | while read link do hash="$(readlink "${link}" | sha256sum | cut -d' ' -f1)" type="l" size="$(stat -c %s "${link}")" name="${link}" printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}" done >> "${hdir}/all.hash" fi if [ "${lf}" -gt 0 ] then cf=0 pv -lN Files -s "${lf}" "${hdir}/reg.lst" | sed -e 's/^\.\///; s/\\/\\\\/g' | while read file do cf="$(( ${cf} + 1 ))" type="-" size="$(stat -c %s "${file}")" if [ "${size}" -gt 33554432 ] then hash="$(pv -N "${cf}/${lf}" "${file}" | sha256sum | cut -d' ' -f1)" else hash="$(sha256sum "${file}" | cut -d' ' -f1)" fi name="${file#./}" printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}" done >> "${hdir}/all.hash" # pv -lN Files "${hdir}/reg.lst" | sed -e 's/^\.\///; s/\\/\\\\/g' | while read file # do # hash="$(pv "${file}" | sha256sum | cut -d' ' -f1)" # type="-" # size="$(stat -c %s "${file}")" # name="${file}" # printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}" # done >> "${hdir}/all.hash" fi if [ "${ld}" -gt 0 ] then hash="$(sha256sum /dev/null | cut -d' ' -f1 | tr 'A-Za-z0-9' '0')" type="d" size="0" pv -lN Dirs "${hdir}/dir.lst" | sed -e '/^\.$/d; s/^\.\///' | while read dir do name="${dir}" ls -1a "${dir}" | sed -e '/^\.\{1,2\}$/d' | grep -q '.' && continue printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}" done >> "${hdir}/all.hash" fi LC_ALL=C sort -k 4,99999999 "${hdir}/all.hash" > "${out}" EOF