Still needs a lot of cleanup, but the theory is sound.

#!/bin/sh -e

# Remove all vids that are 138 bytes in size (empty)

case "$(uname -s)" in
Linux)
  xargs_0_stat_size_name() {
    xargs -0 stat -c '%s %n'
  }
  xargs_0_sha256() {
    xargs -0 sha256sum | sed -e 's/  / /'
  }
  ;;
FreeBSD)
  xargs_0_stat_size_name() {
    xargs -0 stat -f '%z %N'
  }
  xargs_0_sha256() {
    xargs -0 sha256 -r
  }
  ;;
*)
  echo "Unsupported API: $(uname -s)"
  exit -1
  ;;
esac

scrub() {
  # Relink all duplicate files that are less than 1MB in size
  find . -type f -print0 | xargs_0_sha256 | sort | awk 'BEGIN{ hash = ""; name = "" }{ if( hash == $1 ){ print "ln -vf " name " " $2 }else{ hash = $1; name = $2 } }'
  exit $?
}

shorthash() {
  printf "%s..%s\n" "$(echo "${1}" | cut -c1-7)" "$(echo "${1}" | cut -c57-64)"
}

oldhash=""
count=1
handle() {
  hash="${1}"
  name="${2}"

  if [ "${oldhash}" = "${hash}" ]
  then
    # duplicate file
    ln -f "${shastor}/${hash}" "${name}"
    count="$(( ${count} + 1 ))"
  else
    # new file
    echo "Detected hash change after ${count} files: $(shorthash "${oldhash}") -> $(shorthash "${hash}")"
    ln -f "${name}" "${shastor}/${hash}"
    oldhash="${hash}"
    count=1
  fi
}

shalist=cdjs.sha
shastor=shas
mkdir -p "${shastor}"

sort "${shalist}" | while read hash name
do
  handle "${hash}" "${name}"
done