Files
livegraphs-django/dashboard_project/scripts/cleanup_duplicates.py
Kaj Kowalski c106792e78 chore(deps): update pre-commit config and apply bulk formatting
- build(pre-commit): upgrade hooks (django-upgrade 1.29.1, uv 0.9.7, ruff 0.14.3, bandit 1.8.6)
- build(pre-commit): add uv-lock hook, tombi TOML formatter, prettier-plugin-packagejson
- build(pre-commit): disable Django check hooks (commented out)
- build(pre-commit): switch npx → bunx for prettier execution
- build(node): add bun.lock, update prettier config with schema + packagejson plugin
- style: apply ruff format to all Python files (comments, spacing, imports)
- style: apply prettier format to all JS/CSS files (comment styles, spacing)
- style: apply tombi format to pyproject.toml (reordered sections, consistent formatting)
- chore: remove emoji from bash script comments for consistency

BREAKING CHANGE: Django check and migration check hooks disabled in pre-commit config
2025-11-05 14:34:08 +01:00

44 lines
1.4 KiB
Python

# dashboard_project/scripts/cleanup_duplicates.py
import os
import sys
# Add the project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dashboard_project.settings")
import django # noqa: I001
django.setup()
from dashboard.models import ChatSession # noqa: E402, I001
from django.db.models import Count # noqa: E402
def cleanup_duplicates():
print("Looking for duplicate ChatSessions...")
duplicates = ChatSession.objects.values("session_id", "data_source").annotate(count=Count("id")).filter(count__gt=1)
total_deleted = 0
for dup in duplicates:
session_id = dup["session_id"]
data_source = dup["data_source"]
# Get all ids for this duplicate group, order by id (keep the first, delete the rest)
ids = list(
ChatSession.objects.filter(session_id=session_id, data_source=data_source)
.order_by("id")
.values_list("id", flat=True)
)
# Keep the first, delete the rest
to_delete = ids[1:]
deleted, _ = ChatSession.objects.filter(id__in=to_delete).delete()
total_deleted += deleted
print(f"Removed {deleted} duplicates for session_id={session_id}, data_source={data_source}")
print(f"Done. Total duplicates removed: {total_deleted}")
if __name__ == "__main__":
cleanup_duplicates()