import os import sys import mutagen if len(sys.argv) != 2: print("Usage: python3 MusicLibVisualizing/main.py ") sys.exit(1) music_library = sys.argv[1] music_data = [] from tqdm import tqdm for root, dirs, files in tqdm(list(os.walk(music_library)), desc="Walking folders", unit="folders"): for file in tqdm(files, desc=f"Processing folder {root}", unit="files"): if file.endswith(('.mp3', '.flac', '.opus')): try: audio = mutagen.File(os.path.join(root, file)) #print(f"Processing {file}") #print(audio.info) music_data.append({ "title": audio.get('title', ['Unknown'])[0], "artist": audio.get('artist', ['Unknown'])[0], "album": audio.get('album', ['Unknown'])[0], "genre": audio.get('genre', ['Unknown'])[0], "length": audio.info.length, #"bitrate": audio.info.bitrate }) except Exception as e: print(f"Error processing {file}: {e}") print(f"Found a total of {len(music_data)} files.") import pandas as pd df = pd.DataFrame(music_data) import seaborn as sns import matplotlib.pyplot as plt plt.figure(figsize=(10, 6)) sns.histplot(df['length'], bins=30, kde=True) plt.title('Track Length Distribution') plt.xlabel('Length (seconds)') plt.ylabel('Count') plt.show() top_artists = df['artist'].value_counts().head(10) plt.figure(figsize=(10, 6)) sns.barplot(x=top_artists.values, y=top_artists.index) plt.title('Top 10 Most Frequent Artists') plt.xlabel('Number of Tracks') plt.show() genre_counts = df['genre'].value_counts() plt.figure(figsize=(10, 6)) genre_counts.plot.pie(autopct='%1.1f%%') plt.title('Genre Distribution') plt.ylabel('') plt.show() avg_length_by_genre = df.groupby('genre')['length'].mean().sort_values() plt.figure(figsize=(10, 6)) sns.barplot(x=avg_length_by_genre.values, y=avg_length_by_genre.index) plt.title('Average Song Length by Genre') plt.xlabel('Average Length (seconds)') plt.show()