Add: import data management command
This commit is contained in:
parent
d2da96ad1b
commit
90a6b3d32b
3 changed files with 166 additions and 0 deletions
0
archivist/sites/furaffinity/management/__init__.py
Normal file
0
archivist/sites/furaffinity/management/__init__.py
Normal file
166
archivist/sites/furaffinity/management/commands/import_data.py
Normal file
166
archivist/sites/furaffinity/management/commands/import_data.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
# your_app/management/commands/import_data.py
|
||||
|
||||
import os
|
||||
import json
|
||||
from blake3 import blake3
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
#from PIL import Image
|
||||
from datetime import datetime
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils.text import slugify
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
#from django.core.files import File
|
||||
from django.utils import timezone
|
||||
|
||||
from sites.furaffinity.models import FA_Submission, FA_Submission_File, FA_Tags, FA_User, FA_Species, FA_Gender, FA_Mature
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Import data from JSON files in a folder or a single JSON file to the furaffinity archive'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('path', type=str, help='Path to the folder containing JSON files or a single JSON file')
|
||||
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
path = kwargs['path']
|
||||
|
||||
if os.path.isfile(path):
|
||||
self.process_json_file(path)
|
||||
elif os.path.isdir(path):
|
||||
self.process_json_folder(path)
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR(f"The path '{path}' is not a valid file or folder."))
|
||||
return
|
||||
|
||||
|
||||
def process_json_file(self, file_path):
|
||||
#self.stdout.write(self.style.NOTICE(f"Importing data from: {file_path}"))
|
||||
tqdm.write(self.style.NOTICE(f"Importing data from: {file_path}"))
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
self.import_data(data, file_path)
|
||||
|
||||
#self.stdout.write(self.style.SUCCESS('Data imported successfully.'))
|
||||
tqdm.write(self.style.SUCCESS('Data imported successfully.'))
|
||||
|
||||
def process_json_folder(self, folder_path):
|
||||
if not os.path.exists(folder_path):
|
||||
#self.stdout.write(self.style.ERROR(f"The folder '{folder_path}' does not exist."))
|
||||
tqdm.write(self.style.ERROR(f"The folder '{folder_path}' does not exist."))
|
||||
return
|
||||
|
||||
for file_name in tqdm(os.listdir(folder_path), dynamic_ncols=True):
|
||||
if file_name.endswith('.json'):
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
self.process_json_file(file_path)
|
||||
|
||||
|
||||
def import_data(self, data, json_file_path):
|
||||
#self.stdout.write(self.style.NOTICE(data))
|
||||
|
||||
submission, created = FA_Submission.objects.get_or_create(submission_id=data["id"])
|
||||
|
||||
submission.media_url = data["url"]
|
||||
submission.title = data["title"]
|
||||
submission.description = data["description"]
|
||||
|
||||
submission.date = timezone.make_aware(datetime.strptime(data["date"], "%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
artist, created = FA_User.objects.get_or_create(artist_url=data["artist_url"], artist=data["artist"])
|
||||
submission.artist = artist
|
||||
|
||||
species, created = FA_Species.objects.get_or_create(species=data["species"])
|
||||
submission.species = species
|
||||
|
||||
for tag_name in data["tags"]:
|
||||
tag_slug = slugify(tag_name)
|
||||
try:
|
||||
# Check if the tag already exists in the database by name
|
||||
tag = FA_Tags.objects.get(tag_slug=tag_slug)
|
||||
except ObjectDoesNotExist:
|
||||
# If the tag does not exist, create a new tag and generate the slug
|
||||
tag = FA_Tags(tag=tag_name)
|
||||
tag.tag_slug = tag_slug
|
||||
|
||||
tag.save() # Save the tag (either new or existing)
|
||||
|
||||
submission.tags.add(tag) # Add the tag to the submission
|
||||
|
||||
mature, created = FA_Mature.objects.get_or_create(mature=data["rating"])
|
||||
submission.mature_rating = mature
|
||||
|
||||
submission.number_of_comments = data["comments"]
|
||||
submission.views = data["views"]
|
||||
|
||||
gender, created = FA_Gender.objects.get_or_create(gender=data["gender"])
|
||||
submission.gender = gender
|
||||
|
||||
submission.fa_theme = data["theme"]
|
||||
submission.fa_category = data["fa_category"]
|
||||
submission.gallery_type = data["subcategory"]
|
||||
submission.file_extension = data["extension"]
|
||||
submission.image_height = data["height"]
|
||||
submission.image_width = data["width"]
|
||||
|
||||
|
||||
file_path = json_file_path.removesuffix(".json")
|
||||
|
||||
# Handle file import
|
||||
if os.path.exists(file_path):
|
||||
file_hash = self.compute_file_hash(file_path)
|
||||
try:
|
||||
file_instance = FA_Submission_File.objects.get(file_hash=file_hash)
|
||||
#self.stdout.write(self.style.NOTICE(f"Skipping: {file_path} file, already imported"))
|
||||
tqdm.write(self.style.NOTICE(f"Skipping: {file_path} file, already imported"))
|
||||
except FA_Submission_File.DoesNotExist:
|
||||
# If the file doesn't exist, create a new file instance and link it to the submission
|
||||
with open(file_path, 'rb') as file:
|
||||
file_instance = FA_Submission_File()
|
||||
file_instance.file_hash = file_hash
|
||||
|
||||
file_name = os.path.basename(file_path)
|
||||
Null, file_ext = os.path.splitext(file_name)
|
||||
hash_file_name = file_hash + file_ext
|
||||
file_instance.file.save(hash_file_name, file)
|
||||
|
||||
file_instance.file_name = file_name
|
||||
file_instance.save()
|
||||
|
||||
# Now link the image_instance to your_model_instance
|
||||
submission.file = file_instance
|
||||
else:
|
||||
#self.stdout.write(self.style.WARNING(f"File not found: {file_path}"))
|
||||
tqdm.write(self.style.WARNING(f"File not found: {file_path}"))
|
||||
#file, created = FA_Submission_File.objects.get_or_create(file=file_hash)
|
||||
#print(file)
|
||||
#if not FA_Submission.objects.filter(image=image_hash).exists():
|
||||
# with open(image_file_path, 'rb') as img_file:
|
||||
# submission.image.save(os.path.basename(image_file_path), File(img_file), save=True)
|
||||
# submission.image_hash = image_hash # Save the image hash in the model
|
||||
#else:
|
||||
# self.stdout.write(self.style.WARNING(f"Skipping duplicate image: {image_file_path}"))
|
||||
|
||||
#if os.path.exists(image_file_path):
|
||||
# with open(image_file_path, 'rb') as img_file:
|
||||
# submission.image.save(os.path.basename(image_file_path), File(img_file), save=True)
|
||||
|
||||
submission.save()
|
||||
|
||||
|
||||
def compute_file_hash(self, file_path):
|
||||
try:
|
||||
# Compute BLAKE3 hash of the file
|
||||
hasher = blake3()
|
||||
with open(file_path, 'rb') as f:
|
||||
while chunk := f.read(65536):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
except Exception as e:
|
||||
#self.stdout.write(self.style.WARNING(f"Error computing file hash: {e}"))
|
||||
tqdm.write(self.style.WARNING(f"Error computing file hash: {e}"))
|
||||
return None
|
||||
|
Loading…
Reference in a new issue