From f42ecf5c5b85d94f8dcc3ac1461bfc17f0858a60 Mon Sep 17 00:00:00 2001 From: Filip Date: Fri, 12 Jun 2026 10:30:42 +0200 Subject: [PATCH] Add message retention and hide emails by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - db: cleanup_old_messages(days) purges messages older than N days in batches; recipients/reads/deliveries/reactions follow via ON DELETE CASCADE. Returns attachment file_ids no longer referenced by any surviving message (forwarded copies keep their files) and removes their image_uploads rows - server: MESSAGE_RETENTION_DAYS env var (default 0 = keep forever); hourly cleanup deletes expired messages and securely removes orphaned attachment blobs from the upload dir - schema: email_visible now defaults to 0 — previously any logged-in user who knew a UUID could read another user's email via get_profile - migrations: SQL script to apply the new default and reset the flag on existing databases (run manually, see file header) - docker-compose: document MESSAGE_RETENTION_DAYS Co-Authored-By: Claude Fable 5 --- db.py | 81 +++++++++++++++++++ docker-compose.yml | 3 + .../2026-06-12_email_visible_default_off.sql | 17 ++++ schema.sql | 2 +- server.py | 16 ++++ 5 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 migrations/2026-06-12_email_visible_default_off.sql diff --git a/db.py b/db.py index 5ab5196..e6857e9 100644 --- a/db.py +++ b/db.py @@ -1724,3 +1724,84 @@ def cleanup_old_reactions(days: int = 90, batch_size: int = 10000) -> int: finally: conn.close() return total + + +def cleanup_old_messages(days: int, batch_size: int = 1000) -> tuple[int, list[str]]: + """Delete messages older than N days in batches. + + message_recipients / message_reads / message_deliveries / message_reactions + rows go with them via ON DELETE CASCADE. + + Returns (deleted_count, orphaned_file_ids) — file_ids whose encrypted + blobs are no longer referenced by any surviving message. The caller is + responsible for removing those files from the upload directory (db layer + does not touch the filesystem). + """ + # Collect attachment file_ids referenced by messages about to be deleted + conn = get_connection() + try: + cursor = conn.cursor() + cursor.execute( + "SELECT DISTINCT image_file_id FROM messages " + "WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) " + "AND image_file_id IS NOT NULL", + (days,), + ) + candidate_files = [row[0] for row in cursor.fetchall()] + finally: + conn.close() + + total = 0 + while True: + conn = get_connection() + try: + cursor = conn.cursor() + cursor.execute( + "DELETE FROM messages WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) LIMIT %s", + (days, batch_size), + ) + count = cursor.rowcount + conn.commit() + total += count + if count < batch_size: + break + finally: + conn.close() + + # A file is orphaned only if no surviving (newer) message still references + # it (e.g. a forwarded copy) + orphaned: list[str] = [] + if candidate_files: + still_referenced: set[str] = set() + conn = get_connection() + try: + cursor = conn.cursor() + for i in range(0, len(candidate_files), 500): + chunk = candidate_files[i:i + 500] + placeholders = ", ".join(["%s"] * len(chunk)) + cursor.execute( + f"SELECT DISTINCT image_file_id FROM messages " + f"WHERE image_file_id IN ({placeholders})", + chunk, + ) + still_referenced.update(row[0] for row in cursor.fetchall()) + finally: + conn.close() + orphaned = [f for f in candidate_files if f not in still_referenced] + + if orphaned: + conn = get_connection() + try: + cursor = conn.cursor() + for i in range(0, len(orphaned), 500): + chunk = orphaned[i:i + 500] + placeholders = ", ".join(["%s"] * len(chunk)) + cursor.execute( + f"DELETE FROM image_uploads WHERE file_id IN ({placeholders})", + chunk, + ) + conn.commit() + finally: + conn.close() + + return total, orphaned diff --git a/docker-compose.yml b/docker-compose.yml index 776fe42..2e4c096 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,6 +65,9 @@ services: # Metadata retention (days) METADATA_RETENTION_DAYS: 90 + # Message retention (days); 0 = keep messages forever + MESSAGE_RETENTION_DAYS: 0 + volumes: db_data: uploads: diff --git a/migrations/2026-06-12_email_visible_default_off.sql b/migrations/2026-06-12_email_visible_default_off.sql new file mode 100644 index 0000000..832b60a --- /dev/null +++ b/migrations/2026-06-12_email_visible_default_off.sql @@ -0,0 +1,17 @@ +-- Privacy hardening: hide email addresses by default. +-- +-- Previously email_visible defaulted to 1, so any logged-in user who knew +-- (or guessed) a UUID could read another user's email via get_profile. +-- New installs get DEFAULT 0 from schema.sql; this migration fixes +-- EXISTING databases. +-- +-- Run manually against the encrypted_chat database: +-- mysql -u chat -p encrypted_chat < migrations/2026-06-12_email_visible_default_off.sql +-- +-- NOTE: the UPDATE resets the flag for ALL users, including any who +-- explicitly opted in to a visible email. Users who want their email +-- visible must re-enable it in their profile settings. + +ALTER TABLE user_profiles ALTER COLUMN email_visible SET DEFAULT 0; + +UPDATE user_profiles SET email_visible = 0 WHERE email_visible = 1; diff --git a/schema.sql b/schema.sql index b7d8426..4bd49aa 100644 --- a/schema.sql +++ b/schema.sql @@ -155,7 +155,7 @@ CREATE TABLE IF NOT EXISTS user_profiles ( user_id CHAR(36) NOT NULL PRIMARY KEY, phone VARCHAR(50) DEFAULT NULL, phone_visible TINYINT(1) NOT NULL DEFAULT 0, - email_visible TINYINT(1) NOT NULL DEFAULT 1, + email_visible TINYINT(1) NOT NULL DEFAULT 0, location VARCHAR(255) DEFAULT NULL, location_visible TINYINT(1) NOT NULL DEFAULT 0, avatar_file VARCHAR(255) DEFAULT NULL, diff --git a/server.py b/server.py index c1a58f8..c3a0f64 100644 --- a/server.py +++ b/server.py @@ -203,6 +203,10 @@ CONNECTION_RL_MAX = 20 # max requests per window per connection MAX_CONNECTIONS_PER_IP = 10 MAX_CONNECTIONS_GLOBAL = 200 METADATA_RETENTION_DAYS = int(os.getenv("METADATA_RETENTION_DAYS", "90")) +# Message retention: 0 (default) keeps messages forever; N > 0 purges +# messages (and their per-recipient ciphertexts, deliveries, reads, +# reactions and orphaned attachment blobs) older than N days. +MESSAGE_RETENTION_DAYS = int(os.getenv("MESSAGE_RETENTION_DAYS", "0")) # TCP keepalive settings (seconds) TCP_KEEPALIVE_IDLE = 25 # Start keepalive probes after 25s of idle TCP_KEEPALIVE_INTERVAL = 10 # Send probes every 10s @@ -3177,6 +3181,18 @@ async def main(): reads_del, reactions_del) except Exception as e: logger.warning("Metadata cleanup error: %s", e) + if MESSAGE_RETENTION_DAYS > 0: + try: + msgs_del, orphan_files = await adb.cleanup_old_messages(MESSAGE_RETENTION_DAYS) + for fid in orphan_files: + p = _safe_upload_path(fid, ".enc") + if p: + await asyncio.to_thread(_secure_delete, p) + if msgs_del or orphan_files: + logger.info("Message retention: %d messages, %d attachment files purged", + msgs_del, len(orphan_files)) + except Exception as e: + logger.warning("Message retention cleanup error: %s", e) asyncio.create_task(_periodic_cleanup())