|
|
|
"""
|
|
Tag Series Grouper
|
|
|
|
This script analyzes metadata.json to find character tags that belong to specific series
|
|
and groups them by series for creating specialized tag mosaics.
|
|
|
|
Example:
|
|
- firefly_(honkai:_star_rail) -> belongs to "honkai: star rail" series
|
|
- raiden_mei_(honkai_impact) -> belongs to "honkai impact" series
|
|
|
|
Usage:
|
|
python tag_series_grouper.py --metadata_path path/to/metadata.json --output_path output/series_groups.json
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import re
|
|
import argparse
|
|
from collections import defaultdict
|
|
|
|
def extract_series_from_tag(tag):
|
|
"""
|
|
Extract the series name from a tag, if present
|
|
Example: "firefly_(honkai:_star_rail)" -> "honkai: star rail"
|
|
|
|
Returns: tuple of (character_name, series_name) or (tag, None) if no series found
|
|
"""
|
|
|
|
pattern = r'(.+)_\(([^)]+)\)$'
|
|
match = re.match(pattern, tag)
|
|
|
|
if match:
|
|
character = match.group(1)
|
|
series = match.group(2)
|
|
|
|
|
|
series = series.replace('_', ' ')
|
|
|
|
return character, series
|
|
|
|
return tag, None
|
|
|
|
def find_series_tags(metadata_path):
|
|
"""
|
|
Process metadata.json to find and group tags by series
|
|
|
|
Args:
|
|
metadata_path: Path to metadata.json file
|
|
|
|
Returns:
|
|
Dictionary with series information and tag groups
|
|
"""
|
|
if not os.path.exists(metadata_path):
|
|
raise FileNotFoundError(f"Metadata file not found: {metadata_path}")
|
|
|
|
|
|
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
metadata = json.load(f)
|
|
|
|
|
|
tags = []
|
|
if 'idx_to_tag' in metadata:
|
|
|
|
tags = list(metadata['idx_to_tag'].values())
|
|
elif 'tag_to_idx' in metadata:
|
|
|
|
tags = list(metadata['tag_to_idx'].keys())
|
|
|
|
|
|
series_tags = defaultdict(list)
|
|
characters_by_series = defaultdict(set)
|
|
character_to_series = {}
|
|
tags_with_series = []
|
|
tags_without_series = []
|
|
|
|
for tag in tags:
|
|
character, series = extract_series_from_tag(tag)
|
|
|
|
if series:
|
|
|
|
series_tags[series].append(tag)
|
|
characters_by_series[series].add(character)
|
|
character_to_series[character] = series
|
|
tags_with_series.append(tag)
|
|
else:
|
|
tags_without_series.append(tag)
|
|
|
|
|
|
sorted_series = sorted(series_tags.keys(), key=lambda x: len(series_tags[x]), reverse=True)
|
|
|
|
|
|
result = {
|
|
"stats": {
|
|
"total_tags": len(tags),
|
|
"series_tags": len(tags_with_series),
|
|
"regular_tags": len(tags_without_series),
|
|
"unique_series": len(series_tags),
|
|
"total_characters": sum(len(chars) for chars in characters_by_series.values())
|
|
},
|
|
"series": {},
|
|
"mosaic_configs": []
|
|
}
|
|
|
|
|
|
for series in sorted_series:
|
|
result["series"][series] = {
|
|
"tags": series_tags[series],
|
|
"tag_count": len(series_tags[series]),
|
|
"characters": list(characters_by_series[series]),
|
|
"character_count": len(characters_by_series[series])
|
|
}
|
|
|
|
|
|
if len(series_tags[series]) > 20:
|
|
series_key = series.replace(" ", "_").replace(":", "").lower()
|
|
result["mosaic_configs"].append({
|
|
"name": series_key,
|
|
"title": f"{series.title()} Collection",
|
|
"total_tags": len(series_tags[series]),
|
|
"width": 512,
|
|
"height": 512,
|
|
"pixel_size": None
|
|
})
|
|
|
|
|
|
small_series_tags = 0
|
|
for series in series_tags:
|
|
if len(series_tags[series]) < 20:
|
|
small_series_tags += len(series_tags[series])
|
|
|
|
if small_series_tags > 0:
|
|
result["mosaic_configs"].append({
|
|
"name": "other_series",
|
|
"title": "Other Series Collection",
|
|
"total_tags": small_series_tags,
|
|
"width": 512,
|
|
"height": 512,
|
|
"pixel_size": None
|
|
})
|
|
|
|
|
|
result["mosaic_configs"].append({
|
|
"name": "main",
|
|
"title": "Complete Collection",
|
|
"total_tags": len(tags),
|
|
"width": 1024,
|
|
"height": 1024,
|
|
"pixel_size": 4
|
|
})
|
|
|
|
return result
|
|
|
|
def save_series_data(series_data, output_path):
|
|
"""Save the series data to a JSON file"""
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(series_data, f, indent=2)
|
|
|
|
print(f"Series data saved to {output_path}")
|
|
print(f"Found {series_data['stats']['unique_series']} unique series")
|
|
print(f"Created {len(series_data['mosaic_configs'])} mosaic configurations")
|
|
|
|
def find_and_print_top_series(metadata_path, output_path=None, top_n=10):
|
|
"""Find series groupings and print top series information"""
|
|
series_data = find_series_tags(metadata_path)
|
|
|
|
|
|
print(f"\nTag Series Analysis")
|
|
print(f"=================")
|
|
print(f"Total tags analyzed: {series_data['stats']['total_tags']}")
|
|
print(f"Tags with series identifier: {series_data['stats']['series_tags']}")
|
|
print(f"Regular tags: {series_data['stats']['regular_tags']}")
|
|
print(f"Unique series found: {series_data['stats']['unique_series']}")
|
|
print(f"Total characters across series: {series_data['stats']['total_characters']}")
|
|
|
|
|
|
print(f"\nTop {top_n} Series by Tag Count:")
|
|
print(f"=========================")
|
|
sorted_series = sorted(series_data['series'].items(), key=lambda x: x[1]['tag_count'], reverse=True)
|
|
|
|
for i, (series, data) in enumerate(sorted_series[:top_n]):
|
|
print(f"{i+1}. {series.title()}: {data['tag_count']} tags, {data['character_count']} characters")
|
|
|
|
|
|
if output_path:
|
|
save_series_data(series_data, output_path)
|
|
|
|
return series_data
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Group tags by series from metadata.json')
|
|
parser.add_argument('--metadata_path', type=str, required=True, help='Path to metadata.json')
|
|
parser.add_argument('--output_path', type=str, default='series_groups.json', help='Output path for series groups')
|
|
parser.add_argument('--top_n', type=int, default=10, help='Number of top series to display')
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
find_and_print_top_series(args.metadata_path, args.output_path, args.top_n)
|
|
except Exception as e:
|
|
print(f"Error: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
main() |