Still needs a lot of cleanup, but the theory is sound.
#!/bin/sh -e
# Remove all vids that are 138 bytes in size (empty)
case "$(uname -s)" in
Linux)
xargs_0_stat_size_name() {
xargs -0 stat -c '%s %n'
}
xargs_0_sha256() {
xargs -0 sha256sum | sed -e 's/ / /'
}
;;
FreeBSD)
xargs_0_stat_size_name() {
xargs -0 stat -f '%z %N'
}
xargs_0_sha256() {
xargs -0 sha256 -r
}
;;
*)
echo "Unsupported API: $(uname -s)"
exit -1
;;
esac
scrub() {
# Relink all duplicate files that are less than 1MB in size
find . -type f -print0 | xargs_0_sha256 | sort | awk 'BEGIN{ hash = ""; name = "" }{ if( hash == $1 ){ print "ln -vf " name " " $2 }else{ hash = $1; name = $2 } }'
exit $?
}
shorthash() {
printf "%s..%s\n" "$(echo "${1}" | cut -c1-7)" "$(echo "${1}" | cut -c57-64)"
}
oldhash=""
count=1
handle() {
hash="${1}"
name="${2}"
if [ "${oldhash}" = "${hash}" ]
then
# duplicate file
ln -f "${shastor}/${hash}" "${name}"
count="$(( ${count} + 1 ))"
else
# new file
echo "Detected hash change after ${count} files: $(shorthash "${oldhash}") -> $(shorthash "${hash}")"
ln -f "${name}" "${shastor}/${hash}"
oldhash="${hash}"
count=1
fi
}
shalist=cdjs.sha
shastor=shas
mkdir -p "${shastor}"
sort "${shalist}" | while read hash name
do
handle "${hash}" "${name}"
done
