2023-01-14 14:56:59 +08:00
|
|
|
import hashlib
|
|
|
|
import os.path
|
|
|
|
|
2025-02-10 15:14:32 +08:00
|
|
|
from modules import shared, errors
|
2023-07-15 14:20:43 +08:00
|
|
|
import modules.cache
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-07-15 14:20:43 +08:00
|
|
|
dump_cache = modules.cache.dump_cache
|
|
|
|
cache = modules.cache.cache
|
2023-01-14 14:56:59 +08:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_sha256(filename):
|
|
|
|
hash_sha256 = hashlib.sha256()
|
2023-01-15 22:42:34 +08:00
|
|
|
blksize = 1024 * 1024
|
2023-01-14 14:56:59 +08:00
|
|
|
|
|
|
|
with open(filename, "rb") as f:
|
2023-01-15 22:42:34 +08:00
|
|
|
for chunk in iter(lambda: f.read(blksize), b""):
|
2023-01-14 14:56:59 +08:00
|
|
|
hash_sha256.update(chunk)
|
|
|
|
|
|
|
|
return hash_sha256.hexdigest()
|
|
|
|
|
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
def sha256_from_cache(filename, title, use_addnet_hash=False):
|
|
|
|
hashes = cache("hashes-addnet") if use_addnet_hash else cache("hashes")
|
2024-02-16 02:31:44 +08:00
|
|
|
try:
|
|
|
|
ondisk_mtime = os.path.getmtime(filename)
|
|
|
|
except FileNotFoundError:
|
|
|
|
return None
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-01-14 20:55:40 +08:00
|
|
|
if title not in hashes:
|
|
|
|
return None
|
|
|
|
|
|
|
|
cached_sha256 = hashes[title].get("sha256", None)
|
|
|
|
cached_mtime = hashes[title].get("mtime", 0)
|
|
|
|
|
2025-02-10 15:14:32 +08:00
|
|
|
if ondisk_mtime != cached_mtime or cached_sha256 is None:
|
2023-01-14 20:55:40 +08:00
|
|
|
return None
|
|
|
|
|
|
|
|
return cached_sha256
|
|
|
|
|
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
def sha256(filename, title, use_addnet_hash=False):
|
|
|
|
hashes = cache("hashes-addnet") if use_addnet_hash else cache("hashes")
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
sha256_value = sha256_from_cache(filename, title, use_addnet_hash)
|
2023-01-14 20:55:40 +08:00
|
|
|
if sha256_value is not None:
|
|
|
|
return sha256_value
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-02-04 16:38:56 +08:00
|
|
|
if shared.cmd_opts.no_hashing:
|
|
|
|
return None
|
|
|
|
|
2023-01-14 14:56:59 +08:00
|
|
|
print(f"Calculating sha256 for {filename}: ", end='')
|
2023-05-20 03:59:29 +08:00
|
|
|
if use_addnet_hash:
|
|
|
|
with open(filename, "rb") as file:
|
|
|
|
sha256_value = addnet_hash_safetensors(file)
|
|
|
|
else:
|
|
|
|
sha256_value = calculate_sha256(filename)
|
2023-01-14 14:56:59 +08:00
|
|
|
print(f"{sha256_value}")
|
|
|
|
|
|
|
|
hashes[title] = {
|
2023-01-14 20:55:40 +08:00
|
|
|
"mtime": os.path.getmtime(filename),
|
2023-01-14 14:56:59 +08:00
|
|
|
"sha256": sha256_value,
|
|
|
|
}
|
|
|
|
|
|
|
|
dump_cache()
|
|
|
|
|
|
|
|
return sha256_value
|
|
|
|
|
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
def addnet_hash_safetensors(b):
|
|
|
|
"""kohya-ss hash for safetensors from https://github.com/kohya-ss/sd-scripts/blob/main/library/train_util.py"""
|
|
|
|
hash_sha256 = hashlib.sha256()
|
|
|
|
blksize = 1024 * 1024
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
b.seek(0)
|
|
|
|
header = b.read(8)
|
|
|
|
n = int.from_bytes(header, "little")
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2023-05-20 03:59:29 +08:00
|
|
|
offset = n + 8
|
|
|
|
b.seek(offset)
|
|
|
|
for chunk in iter(lambda: b.read(blksize), b""):
|
|
|
|
hash_sha256.update(chunk)
|
|
|
|
|
|
|
|
return hash_sha256.hexdigest()
|
2023-01-14 14:56:59 +08:00
|
|
|
|
2025-02-10 15:14:32 +08:00
|
|
|
|
2025-02-15 23:49:48 +08:00
|
|
|
def partial_hash_from_cache(filename, *, ignore_cache: bool = False, digits: int = 8):
|
2025-02-10 15:14:32 +08:00
|
|
|
"""old hash that only looks at a small part of the file and is prone to collisions
|
|
|
|
kept for compatibility, don't use this for new things
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
filename = str(filename)
|
|
|
|
mtime = os.path.getmtime(filename)
|
|
|
|
hashes = cache('partial-hash')
|
|
|
|
cache_entry = hashes.get(filename, {})
|
|
|
|
cache_mtime = cache_entry.get("mtime", 0)
|
|
|
|
cache_hash = cache_entry.get("hash", None)
|
|
|
|
if mtime == cache_mtime and cache_hash and not ignore_cache:
|
2025-02-15 23:49:48 +08:00
|
|
|
return cache_hash[0:digits]
|
2025-02-10 15:14:32 +08:00
|
|
|
|
|
|
|
with open(filename, 'rb') as file:
|
|
|
|
m = hashlib.sha256()
|
|
|
|
file.seek(0x100000)
|
|
|
|
m.update(file.read(0x10000))
|
|
|
|
partial_hash = m.hexdigest()
|
|
|
|
hashes[filename] = {'mtime': mtime, 'hash': partial_hash}
|
2025-02-15 23:49:48 +08:00
|
|
|
return partial_hash[0:digits]
|
2025-02-10 15:14:32 +08:00
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
except Exception:
|
|
|
|
errors.report(f'Error calculating partial hash for {filename}', exc_info=True)
|
|
|
|
return 'NOFILE'
|