Add message retention and hide emails by default
- db: cleanup_old_messages(days) purges messages older than N days in batches; recipients/reads/deliveries/reactions follow via ON DELETE CASCADE. Returns attachment file_ids no longer referenced by any surviving message (forwarded copies keep their files) and removes their image_uploads rows - server: MESSAGE_RETENTION_DAYS env var (default 0 = keep forever); hourly cleanup deletes expired messages and securely removes orphaned attachment blobs from the upload dir - schema: email_visible now defaults to 0 — previously any logged-in user who knew a UUID could read another user's email via get_profile - migrations: SQL script to apply the new default and reset the flag on existing databases (run manually, see file header) - docker-compose: document MESSAGE_RETENTION_DAYS Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
81
db.py
81
db.py
@@ -1724,3 +1724,84 @@ def cleanup_old_reactions(days: int = 90, batch_size: int = 10000) -> int:
|
||||
finally:
|
||||
conn.close()
|
||||
return total
|
||||
|
||||
|
||||
def cleanup_old_messages(days: int, batch_size: int = 1000) -> tuple[int, list[str]]:
|
||||
"""Delete messages older than N days in batches.
|
||||
|
||||
message_recipients / message_reads / message_deliveries / message_reactions
|
||||
rows go with them via ON DELETE CASCADE.
|
||||
|
||||
Returns (deleted_count, orphaned_file_ids) — file_ids whose encrypted
|
||||
blobs are no longer referenced by any surviving message. The caller is
|
||||
responsible for removing those files from the upload directory (db layer
|
||||
does not touch the filesystem).
|
||||
"""
|
||||
# Collect attachment file_ids referenced by messages about to be deleted
|
||||
conn = get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT DISTINCT image_file_id FROM messages "
|
||||
"WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) "
|
||||
"AND image_file_id IS NOT NULL",
|
||||
(days,),
|
||||
)
|
||||
candidate_files = [row[0] for row in cursor.fetchall()]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
total = 0
|
||||
while True:
|
||||
conn = get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"DELETE FROM messages WHERE created_at < DATE_SUB(NOW(), INTERVAL %s DAY) LIMIT %s",
|
||||
(days, batch_size),
|
||||
)
|
||||
count = cursor.rowcount
|
||||
conn.commit()
|
||||
total += count
|
||||
if count < batch_size:
|
||||
break
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# A file is orphaned only if no surviving (newer) message still references
|
||||
# it (e.g. a forwarded copy)
|
||||
orphaned: list[str] = []
|
||||
if candidate_files:
|
||||
still_referenced: set[str] = set()
|
||||
conn = get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
for i in range(0, len(candidate_files), 500):
|
||||
chunk = candidate_files[i:i + 500]
|
||||
placeholders = ", ".join(["%s"] * len(chunk))
|
||||
cursor.execute(
|
||||
f"SELECT DISTINCT image_file_id FROM messages "
|
||||
f"WHERE image_file_id IN ({placeholders})",
|
||||
chunk,
|
||||
)
|
||||
still_referenced.update(row[0] for row in cursor.fetchall())
|
||||
finally:
|
||||
conn.close()
|
||||
orphaned = [f for f in candidate_files if f not in still_referenced]
|
||||
|
||||
if orphaned:
|
||||
conn = get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
for i in range(0, len(orphaned), 500):
|
||||
chunk = orphaned[i:i + 500]
|
||||
placeholders = ", ".join(["%s"] * len(chunk))
|
||||
cursor.execute(
|
||||
f"DELETE FROM image_uploads WHERE file_id IN ({placeholders})",
|
||||
chunk,
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return total, orphaned
|
||||
|
||||
@@ -65,6 +65,9 @@ services:
|
||||
# Metadata retention (days)
|
||||
METADATA_RETENTION_DAYS: 90
|
||||
|
||||
# Message retention (days); 0 = keep messages forever
|
||||
MESSAGE_RETENTION_DAYS: 0
|
||||
|
||||
volumes:
|
||||
db_data:
|
||||
uploads:
|
||||
|
||||
17
migrations/2026-06-12_email_visible_default_off.sql
Normal file
17
migrations/2026-06-12_email_visible_default_off.sql
Normal file
@@ -0,0 +1,17 @@
|
||||
-- Privacy hardening: hide email addresses by default.
|
||||
--
|
||||
-- Previously email_visible defaulted to 1, so any logged-in user who knew
|
||||
-- (or guessed) a UUID could read another user's email via get_profile.
|
||||
-- New installs get DEFAULT 0 from schema.sql; this migration fixes
|
||||
-- EXISTING databases.
|
||||
--
|
||||
-- Run manually against the encrypted_chat database:
|
||||
-- mysql -u chat -p encrypted_chat < migrations/2026-06-12_email_visible_default_off.sql
|
||||
--
|
||||
-- NOTE: the UPDATE resets the flag for ALL users, including any who
|
||||
-- explicitly opted in to a visible email. Users who want their email
|
||||
-- visible must re-enable it in their profile settings.
|
||||
|
||||
ALTER TABLE user_profiles ALTER COLUMN email_visible SET DEFAULT 0;
|
||||
|
||||
UPDATE user_profiles SET email_visible = 0 WHERE email_visible = 1;
|
||||
@@ -155,7 +155,7 @@ CREATE TABLE IF NOT EXISTS user_profiles (
|
||||
user_id CHAR(36) NOT NULL PRIMARY KEY,
|
||||
phone VARCHAR(50) DEFAULT NULL,
|
||||
phone_visible TINYINT(1) NOT NULL DEFAULT 0,
|
||||
email_visible TINYINT(1) NOT NULL DEFAULT 1,
|
||||
email_visible TINYINT(1) NOT NULL DEFAULT 0,
|
||||
location VARCHAR(255) DEFAULT NULL,
|
||||
location_visible TINYINT(1) NOT NULL DEFAULT 0,
|
||||
avatar_file VARCHAR(255) DEFAULT NULL,
|
||||
|
||||
16
server.py
16
server.py
@@ -203,6 +203,10 @@ CONNECTION_RL_MAX = 20 # max requests per window per connection
|
||||
MAX_CONNECTIONS_PER_IP = 10
|
||||
MAX_CONNECTIONS_GLOBAL = 200
|
||||
METADATA_RETENTION_DAYS = int(os.getenv("METADATA_RETENTION_DAYS", "90"))
|
||||
# Message retention: 0 (default) keeps messages forever; N > 0 purges
|
||||
# messages (and their per-recipient ciphertexts, deliveries, reads,
|
||||
# reactions and orphaned attachment blobs) older than N days.
|
||||
MESSAGE_RETENTION_DAYS = int(os.getenv("MESSAGE_RETENTION_DAYS", "0"))
|
||||
# TCP keepalive settings (seconds)
|
||||
TCP_KEEPALIVE_IDLE = 25 # Start keepalive probes after 25s of idle
|
||||
TCP_KEEPALIVE_INTERVAL = 10 # Send probes every 10s
|
||||
@@ -3177,6 +3181,18 @@ async def main():
|
||||
reads_del, reactions_del)
|
||||
except Exception as e:
|
||||
logger.warning("Metadata cleanup error: %s", e)
|
||||
if MESSAGE_RETENTION_DAYS > 0:
|
||||
try:
|
||||
msgs_del, orphan_files = await adb.cleanup_old_messages(MESSAGE_RETENTION_DAYS)
|
||||
for fid in orphan_files:
|
||||
p = _safe_upload_path(fid, ".enc")
|
||||
if p:
|
||||
await asyncio.to_thread(_secure_delete, p)
|
||||
if msgs_del or orphan_files:
|
||||
logger.info("Message retention: %d messages, %d attachment files purged",
|
||||
msgs_del, len(orphan_files))
|
||||
except Exception as e:
|
||||
logger.warning("Message retention cleanup error: %s", e)
|
||||
|
||||
asyncio.create_task(_periodic_cleanup())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user