# your_app/management/commands/import_data.py import os import json from blake3 import blake3 from tqdm.auto import tqdm #from PIL import Image from datetime import datetime from django.core.management.base import BaseCommand from django.utils.text import slugify from django.core.exceptions import ObjectDoesNotExist #from django.core.files import File from django.utils import timezone from sites.furaffinity.models import FA_Submission, FA_Submission_File, FA_Tags, FA_User, FA_Species, FA_Gender, FA_Mature class Command(BaseCommand): help = 'Import data from JSON files in a folder or a single JSON file to the furaffinity archive' def add_arguments(self, parser): parser.add_argument('path', type=str, help='Path to the folder containing JSON files or a single JSON file') def handle(self, *args, **kwargs): path = kwargs['path'] if os.path.isfile(path): self.process_json_file(path) elif os.path.isdir(path): self.process_json_folder(path) else: self.stdout.write(self.style.ERROR(f"The path '{path}' is not a valid file or folder.")) return def process_json_file(self, file_path): #self.stdout.write(self.style.NOTICE(f"Importing data from: {file_path}")) tqdm.write(self.style.NOTICE(f"Importing data from: {file_path}")) with open(file_path) as f: data = json.load(f) self.import_data(data, file_path) #self.stdout.write(self.style.SUCCESS('Data imported successfully.')) tqdm.write(self.style.SUCCESS('Data imported successfully.')) def process_json_folder(self, folder_path): if not os.path.exists(folder_path): #self.stdout.write(self.style.ERROR(f"The folder '{folder_path}' does not exist.")) tqdm.write(self.style.ERROR(f"The folder '{folder_path}' does not exist.")) return for file_name in tqdm(os.listdir(folder_path), dynamic_ncols=True): if file_name.endswith('.json'): file_path = os.path.join(folder_path, file_name) self.process_json_file(file_path) def import_data(self, data, json_file_path): #self.stdout.write(self.style.NOTICE(data)) submission, created = FA_Submission.objects.get_or_create(submission_id=data["id"]) submission.media_url = data["url"] submission.title = data["title"] submission.description = data["description"] submission.date = timezone.make_aware(datetime.strptime(data["date"], "%Y-%m-%d %H:%M:%S")) artist, created = FA_User.objects.get_or_create(artist_url=data["artist_url"], artist=data["artist"]) submission.artist = artist species, created = FA_Species.objects.get_or_create(species=data["species"]) submission.species = species for tag_name in data["tags"]: tag_slug = slugify(tag_name) try: # Check if the tag already exists in the database by name tag = FA_Tags.objects.get(tag_slug=tag_slug) except ObjectDoesNotExist: # If the tag does not exist, create a new tag and generate the slug tag = FA_Tags(tag=tag_name) tag.tag_slug = tag_slug tag.save() # Save the tag (either new or existing) submission.tags.add(tag) # Add the tag to the submission mature, created = FA_Mature.objects.get_or_create(mature=data["rating"]) submission.mature_rating = mature submission.number_of_comments = data["comments"] submission.views = data["views"] gender, created = FA_Gender.objects.get_or_create(gender=data["gender"]) submission.gender = gender submission.fa_theme = data["theme"] submission.fa_category = data["fa_category"] submission.gallery_type = data["subcategory"] submission.file_extension = data["extension"] submission.image_height = data["height"] submission.image_width = data["width"] file_path = json_file_path.removesuffix(".json") # Handle file import if os.path.exists(file_path): file_hash = self.compute_file_hash(file_path) try: file_instance = FA_Submission_File.objects.get(file_hash=file_hash) #self.stdout.write(self.style.NOTICE(f"Skipping: {file_path} file, already imported")) tqdm.write(self.style.NOTICE(f"Skipping: {file_path} file, already imported")) except FA_Submission_File.DoesNotExist: # If the file doesn't exist, create a new file instance and link it to the submission with open(file_path, 'rb') as file: file_instance = FA_Submission_File() file_instance.file_hash = file_hash file_name = os.path.basename(file_path) Null, file_ext = os.path.splitext(file_name) hash_file_name = file_hash + file_ext file_instance.file.save(hash_file_name, file) file_instance.file_name = file_name file_instance.save() # Now link the image_instance to your_model_instance submission.file = file_instance else: #self.stdout.write(self.style.WARNING(f"File not found: {file_path}")) tqdm.write(self.style.WARNING(f"File not found: {file_path}")) #file, created = FA_Submission_File.objects.get_or_create(file=file_hash) #print(file) #if not FA_Submission.objects.filter(image=image_hash).exists(): # with open(image_file_path, 'rb') as img_file: # submission.image.save(os.path.basename(image_file_path), File(img_file), save=True) # submission.image_hash = image_hash # Save the image hash in the model #else: # self.stdout.write(self.style.WARNING(f"Skipping duplicate image: {image_file_path}")) #if os.path.exists(image_file_path): # with open(image_file_path, 'rb') as img_file: # submission.image.save(os.path.basename(image_file_path), File(img_file), save=True) submission.save() def compute_file_hash(self, file_path): try: # Compute BLAKE3 hash of the file hasher = blake3() with open(file_path, 'rb') as f: while chunk := f.read(65536): hasher.update(chunk) return hasher.hexdigest() except Exception as e: #self.stdout.write(self.style.WARNING(f"Error computing file hash: {e}")) tqdm.write(self.style.WARNING(f"Error computing file hash: {e}")) return None