Add message retention and hide emails by default

- db: cleanup_old_messages(days) purges messages older than N days in
  batches; recipients/reads/deliveries/reactions follow via ON DELETE
  CASCADE. Returns attachment file_ids no longer referenced by any
  surviving message (forwarded copies keep their files) and removes
  their image_uploads rows
- server: MESSAGE_RETENTION_DAYS env var (default 0 = keep forever);
  hourly cleanup deletes expired messages and securely removes orphaned
  attachment blobs from the upload dir
- schema: email_visible now defaults to 0 — previously any logged-in
  user who knew a UUID could read another user's email via get_profile
- migrations: SQL script to apply the new default and reset the flag on
  existing databases (run manually, see file header)
- docker-compose: document MESSAGE_RETENTION_DAYS

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Filip
2026-06-12 10:30:42 +02:00
parent 750290ddc1
commit f42ecf5c5b
5 changed files with 118 additions and 1 deletions

81
db.py
View File

@@ -1724,3 +1724,84 @@ def cleanup_old_reactions(days: int = 90, batch_size: int = 10000) -> int:
finally:
conn.close()
return total
def cleanup_old_messages(days: int, batch_size: int = 1000) -> tuple[int, list[str]]:
"""Delete messages older than N days in batches.
message_recipients / message_reads / message_deliveries / message_reactions
rows go with them via ON DELETE CASCADE.
Returns (deleted_count, orphaned_file_ids) — file_ids whose encrypted
blobs are no longer referenced by any surviving message. The caller is
responsible for removing those files from the upload directory (db layer
does not touch the filesystem).
"""
# Collect attachment file_ids referenced by messages about to be deleted
conn = get_connection()
try:
cursor = conn.cursor()
cursor.execute(
"SELECT DISTINCT image_file_id FROM messages "
"WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) "
"AND image_file_id IS NOT NULL",
(days,),
)
candidate_files = [row[0] for row in cursor.fetchall()]
finally:
conn.close()
total = 0
while True:
conn = get_connection()
try:
cursor = conn.cursor()
cursor.execute(
"DELETE FROM messages WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) LIMIT %s",
(days, batch_size),
)
count = cursor.rowcount
conn.commit()
total += count
if count < batch_size:
break
finally:
conn.close()
# A file is orphaned only if no surviving (newer) message still references
# it (e.g. a forwarded copy)
orphaned: list[str] = []
if candidate_files:
still_referenced: set[str] = set()
conn = get_connection()
try:
cursor = conn.cursor()
for i in range(0, len(candidate_files), 500):
chunk = candidate_files[i:i + 500]
placeholders = ", ".join(["%s"] * len(chunk))
cursor.execute(
f"SELECT DISTINCT image_file_id FROM messages "
f"WHERE image_file_id IN ({placeholders})",
chunk,
)
still_referenced.update(row[0] for row in cursor.fetchall())
finally:
conn.close()
orphaned = [f for f in candidate_files if f not in still_referenced]
if orphaned:
conn = get_connection()
try:
cursor = conn.cursor()
for i in range(0, len(orphaned), 500):
chunk = orphaned[i:i + 500]
placeholders = ", ".join(["%s"] * len(chunk))
cursor.execute(
f"DELETE FROM image_uploads WHERE file_id IN ({placeholders})",
chunk,
)
conn.commit()
finally:
conn.close()
return total, orphaned