Add message retention and hide emails by default

- db: cleanup_old_messages(days) purges messages older than N days in
  batches; recipients/reads/deliveries/reactions follow via ON DELETE
  CASCADE. Returns attachment file_ids no longer referenced by any
  surviving message (forwarded copies keep their files) and removes
  their image_uploads rows
- server: MESSAGE_RETENTION_DAYS env var (default 0 = keep forever);
  hourly cleanup deletes expired messages and securely removes orphaned
  attachment blobs from the upload dir
- schema: email_visible now defaults to 0 — previously any logged-in
  user who knew a UUID could read another user's email via get_profile
- migrations: SQL script to apply the new default and reset the flag on
  existing databases (run manually, see file header)
- docker-compose: document MESSAGE_RETENTION_DAYS

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Filip
2026-06-12 10:30:42 +02:00
parent 750290ddc1
commit f42ecf5c5b
5 changed files with 118 additions and 1 deletions

81
db.py
View File

@@ -1724,3 +1724,84 @@ def cleanup_old_reactions(days: int = 90, batch_size: int = 10000) -> int:
finally: finally:
conn.close() conn.close()
return total return total
def cleanup_old_messages(days: int, batch_size: int = 1000) -> tuple[int, list[str]]:
"""Delete messages older than N days in batches.
message_recipients / message_reads / message_deliveries / message_reactions
rows go with them via ON DELETE CASCADE.
Returns (deleted_count, orphaned_file_ids) — file_ids whose encrypted
blobs are no longer referenced by any surviving message. The caller is
responsible for removing those files from the upload directory (db layer
does not touch the filesystem).
"""
# Collect attachment file_ids referenced by messages about to be deleted
conn = get_connection()
try:
cursor = conn.cursor()
cursor.execute(
"SELECT DISTINCT image_file_id FROM messages "
"WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) "
"AND image_file_id IS NOT NULL",
(days,),
)
candidate_files = [row[0] for row in cursor.fetchall()]
finally:
conn.close()
total = 0
while True:
conn = get_connection()
try:
cursor = conn.cursor()
cursor.execute(
"DELETE FROM messages WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) LIMIT %s",
(days, batch_size),
)
count = cursor.rowcount
conn.commit()
total += count
if count < batch_size:
break
finally:
conn.close()
# A file is orphaned only if no surviving (newer) message still references
# it (e.g. a forwarded copy)
orphaned: list[str] = []
if candidate_files:
still_referenced: set[str] = set()
conn = get_connection()
try:
cursor = conn.cursor()
for i in range(0, len(candidate_files), 500):
chunk = candidate_files[i:i + 500]
placeholders = ", ".join(["%s"] * len(chunk))
cursor.execute(
f"SELECT DISTINCT image_file_id FROM messages "
f"WHERE image_file_id IN ({placeholders})",
chunk,
)
still_referenced.update(row[0] for row in cursor.fetchall())
finally:
conn.close()
orphaned = [f for f in candidate_files if f not in still_referenced]
if orphaned:
conn = get_connection()
try:
cursor = conn.cursor()
for i in range(0, len(orphaned), 500):
chunk = orphaned[i:i + 500]
placeholders = ", ".join(["%s"] * len(chunk))
cursor.execute(
f"DELETE FROM image_uploads WHERE file_id IN ({placeholders})",
chunk,
)
conn.commit()
finally:
conn.close()
return total, orphaned

View File

@@ -65,6 +65,9 @@ services:
# Metadata retention (days) # Metadata retention (days)
METADATA_RETENTION_DAYS: 90 METADATA_RETENTION_DAYS: 90
# Message retention (days); 0 = keep messages forever
MESSAGE_RETENTION_DAYS: 0
volumes: volumes:
db_data: db_data:
uploads: uploads:

View File

@@ -0,0 +1,17 @@
-- Privacy hardening: hide email addresses by default.
--
-- Previously email_visible defaulted to 1, so any logged-in user who knew
-- (or guessed) a UUID could read another user's email via get_profile.
-- New installs get DEFAULT 0 from schema.sql; this migration fixes
-- EXISTING databases.
--
-- Run manually against the encrypted_chat database:
-- mysql -u chat -p encrypted_chat < migrations/2026-06-12_email_visible_default_off.sql
--
-- NOTE: the UPDATE resets the flag for ALL users, including any who
-- explicitly opted in to a visible email. Users who want their email
-- visible must re-enable it in their profile settings.
ALTER TABLE user_profiles ALTER COLUMN email_visible SET DEFAULT 0;
UPDATE user_profiles SET email_visible = 0 WHERE email_visible = 1;

View File

@@ -155,7 +155,7 @@ CREATE TABLE IF NOT EXISTS user_profiles (
user_id CHAR(36) NOT NULL PRIMARY KEY, user_id CHAR(36) NOT NULL PRIMARY KEY,
phone VARCHAR(50) DEFAULT NULL, phone VARCHAR(50) DEFAULT NULL,
phone_visible TINYINT(1) NOT NULL DEFAULT 0, phone_visible TINYINT(1) NOT NULL DEFAULT 0,
email_visible TINYINT(1) NOT NULL DEFAULT 1, email_visible TINYINT(1) NOT NULL DEFAULT 0,
location VARCHAR(255) DEFAULT NULL, location VARCHAR(255) DEFAULT NULL,
location_visible TINYINT(1) NOT NULL DEFAULT 0, location_visible TINYINT(1) NOT NULL DEFAULT 0,
avatar_file VARCHAR(255) DEFAULT NULL, avatar_file VARCHAR(255) DEFAULT NULL,

View File

@@ -203,6 +203,10 @@ CONNECTION_RL_MAX = 20 # max requests per window per connection
MAX_CONNECTIONS_PER_IP = 10 MAX_CONNECTIONS_PER_IP = 10
MAX_CONNECTIONS_GLOBAL = 200 MAX_CONNECTIONS_GLOBAL = 200
METADATA_RETENTION_DAYS = int(os.getenv("METADATA_RETENTION_DAYS", "90")) METADATA_RETENTION_DAYS = int(os.getenv("METADATA_RETENTION_DAYS", "90"))
# Message retention: 0 (default) keeps messages forever; N > 0 purges
# messages (and their per-recipient ciphertexts, deliveries, reads,
# reactions and orphaned attachment blobs) older than N days.
MESSAGE_RETENTION_DAYS = int(os.getenv("MESSAGE_RETENTION_DAYS", "0"))
# TCP keepalive settings (seconds) # TCP keepalive settings (seconds)
TCP_KEEPALIVE_IDLE = 25 # Start keepalive probes after 25s of idle TCP_KEEPALIVE_IDLE = 25 # Start keepalive probes after 25s of idle
TCP_KEEPALIVE_INTERVAL = 10 # Send probes every 10s TCP_KEEPALIVE_INTERVAL = 10 # Send probes every 10s
@@ -3177,6 +3181,18 @@ async def main():
reads_del, reactions_del) reads_del, reactions_del)
except Exception as e: except Exception as e:
logger.warning("Metadata cleanup error: %s", e) logger.warning("Metadata cleanup error: %s", e)
if MESSAGE_RETENTION_DAYS > 0:
try:
msgs_del, orphan_files = await adb.cleanup_old_messages(MESSAGE_RETENTION_DAYS)
for fid in orphan_files:
p = _safe_upload_path(fid, ".enc")
if p:
await asyncio.to_thread(_secure_delete, p)
if msgs_del or orphan_files:
logger.info("Message retention: %d messages, %d attachment files purged",
msgs_del, len(orphan_files))
except Exception as e:
logger.warning("Message retention cleanup error: %s", e)
asyncio.create_task(_periodic_cleanup()) asyncio.create_task(_periodic_cleanup())