Coverage for src/django_audit_log/migrations/0007_merge_duplicate_paths.py: 44%
32 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-02 11:43 +0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-02 11:43 +0700
1from django.db import migrations
2from urllib.parse import urlparse
3from collections import defaultdict
6def normalize_path(url):
7 """Normalize a URL by removing method, server, and port information."""
8 if not url:
9 return ""
11 # Parse the URL
12 parsed = urlparse(url)
14 # If it's already just a path (no scheme/netloc), return it cleaned
15 if not parsed.scheme and not parsed.netloc:
16 return parsed.path
18 # Return just the path component
19 return parsed.path
22def merge_duplicate_paths(apps, schema_editor):
23 """
24 Merge LogPath records that point to the same normalized path.
25 Updates all foreign keys to point to the first instance of each path.
26 """
27 LogPath = apps.get_model('django_audit_log', 'LogPath')
28 AccessLog = apps.get_model('django_audit_log', 'AccessLog')
29 db_alias = schema_editor.connection.alias
31 # Group paths by their normalized version
32 path_groups = defaultdict(list)
33 for path in LogPath.objects.using(db_alias).all():
34 normalized = normalize_path(path.path)
35 path_groups[normalized].append(path)
37 # Process each group of paths
38 for normalized_path, paths in path_groups.items():
39 if len(paths) > 1:
40 # Keep the first path instance and merge others into it
41 primary_path = paths[0]
42 duplicate_paths = paths[1:]
44 # Update the primary path to use the normalized version
45 primary_path.path = normalized_path
46 primary_path.save()
48 # Update all foreign keys to point to the primary path
49 for duplicate in duplicate_paths:
50 # Update AccessLog foreign keys
51 AccessLog.objects.using(db_alias).filter(
52 path=duplicate
53 ).update(path=primary_path)
55 AccessLog.objects.using(db_alias).filter(
56 referrer=duplicate
57 ).update(referrer=primary_path)
59 AccessLog.objects.using(db_alias).filter(
60 response_url=duplicate
61 ).update(response_url=primary_path)
63 # Delete the duplicate path
64 duplicate.delete()
67class Migration(migrations.Migration):
69 dependencies = [
70 ('django_audit_log', '0006_loguseragent_operating_system_version'),
71 ]
73 operations = [
74 migrations.RunPython(
75 merge_duplicate_paths,
76 # No reverse migration provided as this is a data cleanup
77 reverse_code=migrations.RunPython.noop
78 ),
79 ]