Note: You are viewing an old version of this page. View the current version.

Differences between version 3 and previous revision of CyberLeo/hashery.sh.

Other diffs: Previous Major Revision, Previous Author

Newer page: version 3 Last edited on Thursday, 20 June 2013 21:36:44 by CyberLeo Revert
Older page: version 2 Last edited on Tuesday, 10 January 2012 7:31:04 by CyberLeo Revert
@@ -74,34 +74,59 @@
 </code> 
  
 Here are some possibilities for getting this to work on a Linux machine: 
 <code brush="bash"> 
-cat ../.hash/lnk.lst | sed -e 's /^\.\ ///' | while read link  
-do  
- hash ="$(readlink "${link }" | sha256sum | cut -d' ' -f1) "  
- type="l"  
- size ="$(stat -c %s "${link}" )"  
- name="${link}"  
- printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"  
-done | tee -a ../.hash/all.hash  
+cat <<"EOF" | sudo sh -es - - /zfs /olddata /hashes /2010-08-07_05:00:00.hashery  
+out ="${1:-/dev/stdout }"  
+hdir ="$(mktemp -dt hashery.XXXXXXXX )" 
  
-cat ../ .hash/reg .lst | sed -e 's /^\ .\///' | while read file  
-do  
- hash =" $(sha256sum "${file }" | cut -d' ' -f1 )"  
- type ="-"  
- size=" $(stat -c %s "${file }")"  
- name =" ${file}"  
- printf "% s %s %08X %s\n" "${hash}" "${ type}" "${size}" "${name }"  
-done | tee -a ../.hash/all.hash  
+echo "Counting ..." >&2  
+ls=$(find . -type l | pv -lN Syms | tee "${hdir} /lnk .lst" | wc -l)  
+lf =$(find . -type f | pv -lN Files | tee "${hdir }/reg.lst " | wc -l )  
+ld =$(find . -type d | pv -lN Dirs | tee "${hdir }/dir.lst " | wc -l )  
+lo =$(find . -type s -o - type f -o -type d -o -print | pv -lN Other | tee "${hdir }/oth.lst " | wc -l)  
  
-hash= "$(sha256sum /dev/null | cut -d' ' -f1 | tr 'A-Za-z0-9' '0') "  
-type="d"  
-size="0"  
-cat ../.hash/dir.lst | sed -e '/^\.$/d; s/^\.\///' | while read dir  
-do  
- name= "${dir }"  
- ls -1a "${dir }" | sed -e '/^\.\{1, 2\}$/d' | grep -q '.' && continue  
- printf "%s %s %08X %s \n" "${hash }" "${type }" "${size }" "${name }"  
-done | tee -a ../.hash/all.hash  
+echo "" >&2  
+echo "Tempdir is ${hdir }" >&2  
+echo "Output file is ${out }" >& 2  
+printf "Links: %u Files: %u Dirs: %u Other: %u \n" "${ls }" "${lf }" "${ld }" "${lo }" >&2  
  
-LC_ALL=C sort -k 4,99999999 all.hash 
+if [ "${ls}" -gt 0 ]  
+then  
+ pv -lN Syms "${hdir}/lnk.lst" | sed -e 's/^\.\///' | while read link  
+ do  
+ hash="$(readlink "${link}" | sha256sum | cut -d' ' -f1)"  
+ type="l"  
+ size="$(stat -c %s "${link}")"  
+ name="${link}"  
+ printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"  
+ done > "${hdir}/all.hash"  
+fi  
+  
+if [ "${lf}" -gt 0 ]  
+then  
+ pv -lN Files "${hdir}/reg.lst" | sed -e 's/^\.\///' | while read file  
+ do  
+ hash="$(sha256sum "${file}" | cut -d' ' -f1)"  
+ type="-"  
+ size="$(stat -c %s "${file}")"  
+ name="${file}"  
+ printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"  
+ done > "${hdir}/all.hash"  
+fi  
+  
+if [ "${ld}" -gt 0 ]  
+then  
+ hash="$(sha256sum /dev/null | cut -d' ' -f1 | tr 'A-Za-z0-9' '0')"  
+ type="d"  
+ size="0"  
+ pv -lN Dirs "${hdir}/dir.lst" | sed -e '/^\.$/d; s/^\.\///' | while read dir  
+ do  
+ name="${dir}"  
+ ls -1a "${dir}" | sed -e '/^\.\{1,2\}$/d' | grep -q '.' && continue  
+ printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"  
+ done > "${hdir}/all.hash"  
+fi  
+  
+ LC_ALL=C sort -k 4,99999999 "${hdir}/ all.hash" > "${out}"  
+EOF  
 </code> 

version 3

#!/bin/sh

# Format:
# <hash> <type> <size> <filename>
#
# hash can be any supported hash, but it is up to the implementer to differentiate between them
# hash does not make much sense for certain types
# Set to all zeroes for fifos, chars, directories, blocks, sockets, whiteouts, or unknowns
# Set to the hash of the content of the symlink, with tailing newline
# (the link itself, not the file or directory to which it points)
#
# Type is one of
#  f - fifo
#  c - char
#  d - directory
#  b - block
#  - - regular
#  l - symlink
#  s - socket
#  w - whiteout
#  ? - unknown
#
# Size is expressed zero-padded in at least 8 hexadecimal digits
# more digits can be used to express filesizes larger than 4 gigabytes
#
# Name is unquoted, and extends from the character after the first space after the size to the end of the line
# Non-empty directories need not be included, since they are implicitly specified by the stuff inside them

sudo sh <<"EOF"
minsize=16777216
emptyhash="$(sha256 -qs "")"
zerohash="$(echo "${emptyhash}" | tr '[0-9a-f]' '0')"

lst="$(mktemp -t hashery)"
trap "rm -f '${lst}'" exit hup int term kill

# First, compute the number of files for reporting
listlen=$(find . | pv -cN count -l | sed -e '/^\.$/d; s/^\.\///; s/\\/\\\\/g' | sort | tee "${lst}" | wc -l)

oldifs="${IFS}"
IFS='
'
cat "${lst}" | pv -cN list -ls ${listlen} | while read file
do
  IFS="${oldifs}"
  stat -f '%p %z %Y' "./${file}" | while read mode size target
  do
    # Now we have name, target, mode, size; compute the type
    hash="${zerohash}"
    type=$(printf %07o $(( 0${mode} & 0170000 )) )
    case ${type} in
    0010000) stype="f" ;;
    0020000) stype="c" ;;
    0040000) stype="d"
      # Ignore non-empty directories
      [ -z "$(ls -1a "./${file}" | sed -e '/^\.\{1,2\}$/d')" ] || continue
      ;;
    0060000) stype="b" ;;
    0100000) stype="-"
      [ "${size}" -eq 0 ] && hash="${emptyhash}"
      [ "${size}" -le "${minsize}" ] && hash="$(sha256 -q < "${file}")"
      [ "${size}" -gt "${minsize}" ] && hash="$(pv -cN file "./${file}" | sha256 -q)"
      ;;
    0120000) stype="l"; hash="$(sha256 -qs "${target}")" ;;
    0140000) stype="s" ;;
    0160000) stype="w" ;;
    *) stype="?" ;;
    esac
    printf "%s %s %08X %s\n" "${hash}" "${stype}" "${size:-0}" "${file}"
  done
done
EOF

Here are some possibilities for getting this to work on a Linux machine:

cat <<"EOF" | sudo sh -es -- /zfs/olddata/hashes/2010-08-07_05:00:00.hashery
out="${1:-/dev/stdout}"
hdir="$(mktemp -dt hashery.XXXXXXXX)"

echo "Counting..." >&2
ls=$(find . -type l | pv -lN Syms | tee "${hdir}/lnk.lst" | wc -l)
lf=$(find . -type f | pv -lN Files | tee "${hdir}/reg.lst" | wc -l)
ld=$(find . -type d | pv -lN Dirs | tee "${hdir}/dir.lst" | wc -l)
lo=$(find . -type s -o -type f -o -type d -o -print | pv -lN Other | tee "${hdir}/oth.lst" | wc -l)

echo "" >&2
echo "Tempdir is ${hdir}" >&2
echo "Output file is ${out}" >&2
printf "Links:%u Files:%u Dirs:%u Other:%u\n" "${ls}" "${lf}" "${ld}" "${lo}" >&2

if [ "${ls}" -gt 0 ]
then
  pv -lN Syms "${hdir}/lnk.lst" | sed -e 's/^\.\///' | while read link
  do
    hash="$(readlink "${link}" | sha256sum | cut -d' ' -f1)"
    type="l"
    size="$(stat -c %s "${link}")"
    name="${link}"
    printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"
  done > "${hdir}/all.hash"
fi

if [ "${lf}" -gt 0 ]
then
  pv -lN Files "${hdir}/reg.lst" | sed -e 's/^\.\///' | while read file
  do
    hash="$(sha256sum "${file}" | cut -d' ' -f1)"
    type="-"
    size="$(stat -c %s "${file}")"
    name="${file}"
    printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"
  done > "${hdir}/all.hash"
fi

if [ "${ld}" -gt 0 ]
then
  hash="$(sha256sum /dev/null | cut -d' ' -f1 | tr 'A-Za-z0-9' '0')"
  type="d"
  size="0"
  pv -lN Dirs "${hdir}/dir.lst" | sed -e '/^\.$/d; s/^\.\///' | while read dir
  do
    name="${dir}"
    ls -1a "${dir}" | sed -e '/^\.\{1,2\}$/d' | grep -q '.' && continue
    printf "%s %s %08X %s\n" "${hash}" "${type}" "${size}" "${name}"
  done > "${hdir}/all.hash"
fi

LC_ALL=C sort -k 4,99999999 "${hdir}/all.hash" > "${out}"
EOF