diff --git a/backend/apps/archive/admin.py b/backend/apps/archive/admin.py index b0ee50a..54feacb 100644 --- a/backend/apps/archive/admin.py +++ b/backend/apps/archive/admin.py @@ -1,8 +1,6 @@ from django.contrib import admin -from django.utils.dateformat import format -from django.utils.timezone import localtime -from .models import SourceSiteModel, CreatorModel, DescriptionModel, TagModel, PostModel +from .models import SourceSiteModel class SourceSiteAdmin(admin.ModelAdmin): @@ -10,74 +8,3 @@ class SourceSiteAdmin(admin.ModelAdmin): admin.site.register(SourceSiteModel, SourceSiteAdmin) - - -class DescriptionAdmin(admin.ModelAdmin): - list_display = ["hash", "description_at", "date_modified", "date_created"] - - def description_at(self, obj): - if len(str(obj.content)) >= 80: - return obj.content[:77] + "..." - else: - return obj.content - - description_at.short_description = "Description" - - -admin.site.register(DescriptionModel, DescriptionAdmin) - - -class TagAdmin(admin.ModelAdmin): - list_display = ["slug", "name", "date_modified", "date_created"] - - -admin.site.register(TagModel, TagAdmin) - - -class CreatorAdmin(admin.ModelAdmin): - list_display = [ - "slug", - "name", - "source_site", - "date_created", - "date_last_import", - ] - - -admin.site.register(CreatorModel, CreatorAdmin) - - -class PostAdmin(admin.ModelAdmin): - list_display = [ - "post_id", - "source_site", - "creator", - "title", - "description_at", - "mature", - "date_created_fromated", - "date_imported_formated", - "date_last_import_formated", - ] - - @admin.display(description="Description") - def description_at(self, obj): - if len(str(obj.description.first().content)) >= 80: - return obj.description.first().content[:77] + "..." - else: - return obj.description.first().content - - @admin.display(description="Date Created") - def date_created_fromated(self, obj): - return format(localtime(obj.date_created), "Y-m-d H:i:s") - - @admin.display(description="Date Imported") - def date_imported_formated(self, obj): - return format(localtime(obj.date_imported), "Y-m-d H:i:s") - - @admin.display(description="Last Import Date") - def date_last_import_formated(self, obj): - return format(localtime(obj.date_last_import), "Y-m-d H:i:s") - - -admin.site.register(PostModel, PostAdmin) diff --git a/backend/apps/archive/management/__init__.py b/backend/apps/archive/management/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/apps/archive/management/commands/__init__.py b/backend/apps/archive/management/commands/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/apps/archive/management/commands/import_data.py b/backend/apps/archive/management/commands/import_data.py deleted file mode 100644 index b037daa..0000000 --- a/backend/apps/archive/management/commands/import_data.py +++ /dev/null @@ -1,326 +0,0 @@ -from datetime import datetime -import os -import json - -from django.utils import timezone -from tqdm.auto import tqdm - -from PIL import Image as PillowImage - -from django.core.management.base import BaseCommand - -from apps.files.models import ( - FileNameModel, - PostFileModel, -) - -from apps.archive.models import ( - SourceSiteModel, - CategoryModel, - CreatorModel, - PostModel, - DescriptionModel, - CreatorDescription, - PostDescription, - TagModel, -) - -from utils.hash import compute_string_hash_blake3, compute_file_hash_blake3 -from utils.files import get_mime_type, categorize_mime_type - -from apps.files.tasks import ( - generate_blur_hash_PostFile, - generate_md5_hash_PostFile, - generate_video_thumbnail, -) - - -class Command(BaseCommand): - help = "Import data from JSON files in a folder or a single JSON file to the Twitter archive" - - def add_arguments(self, parser): - parser.add_argument( - "path", - type=str, - help="Path to the folder containing JSON files or a single JSON file", - ) - parser.add_argument( - "--delete", action="store_true", help="Delete imported files" - ) - - def handle(self, *args, **kwargs): - path = kwargs["path"] - delete = kwargs["delete"] - - if os.path.isfile(path): - self.process_json_file(path, delete) - elif os.path.isdir(path): - self.process_json_folder(path, delete) - else: - self.stdout.write( - self.style.ERROR(f"The path '{path}' is not a valid file or folder.") - ) - return - - def process_json_file(self, file_path, delete): - # self.stdout.write(self.style.NOTICE(f"Importing data from: {file_path}")) - tqdm.write(f"Importing data from: {file_path}") - - with open(file_path, "r") as f: - data = json.load(f) - - self.import_data(data, file_path, delete) - - # self.stdout.write(self.style.SUCCESS('Data imported successfully.')) - tqdm.write(self.style.SUCCESS("Data imported successfully.")) - - def process_json_folder(self, folder_path, delete): - if not os.path.exists(folder_path): - # self.stdout.write(self.style.ERROR(f"The folder '{folder_path}' does not exist.")) - tqdm.write(self.style.ERROR(f"The folder '{folder_path}' does not exist.")) - return - - # Use os.walk dynamically, and count total files to show progress incrementally - tqdm.write("Counting total files...") - total_files = sum(len(files) for _, _, files in os.walk(folder_path)) - - with tqdm( - total=total_files, desc="Processing JSON files", dynamic_ncols=True - ) as progress_bar: - for root, dirs, files in os.walk(folder_path): - for file_name in files: - progress_bar.update(1) # Increment progress for each file - if file_name.endswith(".json"): - file_path = os.path.join(root, file_name) - self.process_json_file(file_path, delete) - - def import_file(self, file_path, model, delete=False): - """ - Imports a file if it doesn't already exist in the database and returns the instance. - - :param file_path: The path to the file to import. - :param model: The model class to which the file instance should be linked. - :param delete: Whether to delete the imported file after processing. - :return: The file instance. - """ - - file_instance = None # Initialize file_instance to None - - if os.path.exists(file_path): - file_hash = compute_file_hash_blake3(file_path, logger=self) - - file_name = os.path.basename(file_path) - _, file_ext = os.path.splitext(file_name) - hash_file_name = file_hash + file_ext - - file_name_instance, _ = FileNameModel.objects.get_or_create( - filename=file_name - ) - - file_instance, created = PostFileModel.objects.get_or_create( - hash_blake3=file_hash - ) - - if created: - with open(file_path, "rb") as file: - file_instance.file.save(hash_file_name, file) - file_instance.save() - - file_instance.name.add(file_name_instance) - file_instance.extension = file_ext - file_instance.size = os.path.getsize(file_path) - - file_mime = get_mime_type(file_path) - file_type = categorize_mime_type(file_mime) - - file_instance.file_type = file_type - file_instance.mimetype = file_mime - - file_instance.save() - - if file_instance.mimetype.startswith("image/"): - # Add Image blur hash if not existing. - if not file_instance.blur_hash: - generate_blur_hash_PostFile.delay(file_instance.id) - - # Get image resolution - im = PillowImage.open(file_instance.file) - file_instance.height, file_instance.width = im.size - file_instance.save() - - if file_instance.file_type in ["video", "gif"]: - if not file_instance.thumbnail: - generate_video_thumbnail.delay(file_instance.id) - - if not file_instance.hash_md5: - generate_md5_hash_PostFile.delay(file_instance.id) - - if created: - tqdm.write( - self.style.SUCCESS( - f"Imported: {file_path} file, new instance created" - ) - ) - else: - tqdm.write( - self.style.SUCCESS(f"Imported: {file_path} file, instance updated") - ) - - # Delete the imported file if the --delete flag is used - self.delete_imported_file(file_path, delete) - - return file_instance - - def delete_imported_file(self, file_path, delete=False): - """ - Delete the file if the --delete flag is used - - :param delete: Whether to delete the imported file after processing. - """ - if delete: - if os.path.exists(file_path): - os.remove(file_path) - tqdm.write(self.style.SUCCESS(f"Deleted: {file_path}")) - else: - tqdm.write(self.style.WARNING(f"File not found: {file_path}")) - - def import_data(self, data, file_path_json, delete): - """ """ - - # Get source site and create it if it doesn't exist - category = data.get("category") - - source_site_instance, Null = SourceSiteModel.objects.get_or_create( - slug=category - ) - - source_site_instance.save() - - if category == "twitter": - if "author" in data.keys(): - creator_instance, Null = CreatorModel.objects.get_or_create( - slug=data["author"]["name"], source_site=source_site_instance - ) - - creator_instance.creator_id = data["author"]["id"] - creator_instance.name = data["author"]["nick"] - - if "description" in data["author"].keys(): - description_text = data["author"]["description"] - description_hash = compute_string_hash_blake3( - description_text, logger=self - ) - - description_instance, created = ( - DescriptionModel.objects.get_or_create(hash=description_hash) - ) - - if created: - description_instance.content = description_text - description_instance.save() - - # Add to CreatorDescription through model with a custom date_imported - creator_description_instance, created = ( - CreatorDescription.objects.get_or_create( - creator=creator_instance, description=description_instance - ) - ) - - creator_description_instance.date_imported = timezone.make_aware( - datetime.fromtimestamp(os.path.getmtime(file_path_json)) - ) - creator_description_instance.save() - - creator_instance.date_created = timezone.make_aware( - datetime.strptime(data["author"]["date"], "%Y-%m-%d %H:%M:%S") - ) - - creator_instance.save() - - post_instance, Null = PostModel.objects.get_or_create( - post_id=data["tweet_id"], source_site=source_site_instance - ) - - if "subcategory" in data.keys(): - category_instance, _ = CategoryModel.objects.get_or_create( - slug=data["subcategory"] - ) - if _: - category_instance.name = data["subcategory"].capitalize() - category_instance.save() - - creator_instance.refresh_from_db() - creator_instance.categories.add(category_instance) - creator_instance.save() - - post_instance.category.add(category_instance) - - post_instance.creator = creator_instance - - post_instance.date_created = timezone.make_aware( - datetime.strptime(data["date"], "%Y-%m-%d %H:%M:%S"), - ) - - if "sensitive" in data.keys(): - if data["sensitive"]: - post_instance.mature = data["sensitive"] - - if "content" in data.keys(): - description_text = data["content"] - description_hash = compute_string_hash_blake3(description_text, logger=self) - - description_instance, created = DescriptionModel.objects.get_or_create( - hash=description_hash - ) - - description_instance.save() - - if created: - description_instance.date_created = timezone.make_aware( - datetime.strptime(data["date"], "%Y-%m-%d %H:%M:%S") - ) - description_instance.content = description_text - description_instance.save() - - post_description_instance, created = PostDescription.objects.get_or_create( - post=post_instance, description=description_instance - ) - if created: - post_description_instance.date_imported = timezone.make_aware( - datetime.fromtimestamp(os.path.getmtime(file_path_json)) - ) - - post_description_instance.save() - - post_instance.description.add(description_instance) - - if "hashtags" in data.keys(): - for tag in data["hashtags"]: - tag_instance, Null = TagModel.objects.get_or_create(slug=tag) - - if tag_instance.name == "": - tag_instance.name = tag - - tag_instance.save() - - post_instance.tags.add(tag_instance) - - file_path = file_path_json.removesuffix(".json") - - # Handle file import - file_instance = self.import_file(file_path, PostFileModel, delete) - - if file_instance: - post_instance.files.add(file_instance) - - if category_instance.slug == "avatar": - creator_instance.refresh_from_db() - creator_instance.avatar = file_instance - creator_instance.save() - - if category_instance.slug == "background": - creator_instance.refresh_from_db() - creator_instance.banner = file_instance - creator_instance.save() - - post_instance.save()