Spaces:

ASesYusuf1
/

Jhfhnrqgx-Gxeelqj-Vwxglr

Running on Zero

App Files Files Community

Jhfhnrqgx-Gxeelqj-Vwxglr / inference.py

ASesYusuf1

Update inference.py

8a49b0c verified 23 days ago

raw

history blame contribute delete

12.7 kB

	# coding: utf-8
	__author__ = 'Roman Solovyev (ZFTurbo): https://github.com/ZFTurbo/'

	import argparse
	import time
	import logging
	import librosa
	import sys
	import os
	import glob
	import torch
	import torch.nn as nn
	import numpy as np
	import soundfile as sf
	import spaces
	import warnings
	warnings.filterwarnings("ignore")

	# Loglama ayarları
	logging.basicConfig(level=logging.DEBUG, filename='utils.log', format='%(asctime)s - %(levelname)s - %(message)s')

	# Colab kontrolü
	try:
	from google.colab import drive
	IS_COLAB = True
	except ImportError:
	IS_COLAB = False

	# i18n yer tutucu
	class I18nAuto:
	def __call__(self, message):
	return message
	def format(self, message, *args):
	return message.format(*args)

	i18n = I18nAuto()

	current_dir = os.path.dirname(os.path.abspath(__file__))
	sys.path.append(current_dir)

	from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
	from utils import prefer_target_instrument, apply_tta, load_start_checkpoint

	def shorten_filename(filename, max_length=30):
	base, ext = os.path.splitext(filename)
	if len(base) <= max_length:
	return filename
	shortened = base[:15] + "..." + base[-10:] + ext
	return shortened

	def get_soundfile_subtype(pcm_type, is_float=False):
	if pcm_type == 'FLOAT' or is_float:
	return 'FLOAT'
	subtype_map = {'PCM_16': 'PCM_16', 'PCM_24': 'PCM_24', 'FLOAT': 'FLOAT'}
	return subtype_map.get(pcm_type, 'FLOAT')

	def update_progress_html(progress_label, progress_percent):
	progress_percent = min(max(round(progress_percent), 0), 100)
	return f"""
	<div id="custom-progress" style="margin-top: 10px;">
	<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
	<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
	<div id="progress-bar" style="width: {progress_percent}%; height: 20px; background-color: #6e8efb; transition: width 0.3s; max-width: 100%;"></div>
	</div>
	</div>
	"""

	def run_folder(model, args, config, device, verbose: bool = False, progress=None):
	start_time = time.time()
	model.eval()

	mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '.')))
	sample_rate = getattr(config.audio, 'sample_rate', 44100)

	logging.info(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
	print(i18n("total_files_found").format(len(mixture_paths), sample_rate))

	instruments = prefer_target_instrument(config)[:]
	store_dir = args.store_dir
	os.makedirs(store_dir, exist_ok=True)

	total_files = len(mixture_paths)
	processed_files = 0
	base_progress_per_file = 100 / total_files if total_files > 0 else 100

	for path in mixture_paths:
	try:
	mix, sr = librosa.load(path, sr=sample_rate, mono=False)
	logging.info(f"Loaded audio: {path}, shape: {mix.shape}")
	print(i18n("loaded_audio").format(path, mix.shape))

	processed_files += 1
	base_progress = round((processed_files - 1) * base_progress_per_file)
	if progress is not None and callable(getattr(progress, '__call__', None)):
	progress(base_progress / 100, desc=i18n("processing_file").format(processed_files, total_files))
	update_progress_html(i18n("processing_file").format(processed_files, total_files), base_progress)

	mix_orig = mix.copy()
	if 'normalize' in config.inference and config.inference.get('normalize', False):
	mix, norm_params = normalize_audio(mix)

	waveforms_orig = demix(
	config, model, mix, device, model_type=args.model_type, pbar=False,
	progress=lambda p, desc: progress((base_progress + p * 50) / 100, desc=desc) if progress else None
	)

	if args.use_tta:
	waveforms_orig = apply_tta(
	config, model, mix, waveforms_orig, device, args.model_type,
	progress=lambda p, desc: progress((base_progress + 50 + p * 20) / 100, desc=desc) if progress else None
	)

	if args.demud_phaseremix_inst:
	logging.info(f"Demudding track: {path}")
	print(i18n("demudding_track").format(path))
	instr = 'vocals' if 'vocals' in instruments else instruments[0]
	instruments.append('instrumental_phaseremix')
	if 'instrumental' not in instruments and 'Instrumental' not in instruments:
	mix_modified = mix_orig - 2 * waveforms_orig[instr]
	mix_modified_ = mix_modified.copy()
	waveforms_modified = demix(
	config, model, mix_modified, device, model_type=args.model_type, pbar=False,
	progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
	)
	if args.use_tta:
	waveforms_modified = apply_tta(
	config, model, mix_modified, waveforms_modified, device, args.model_type,
	progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
	)
	waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
	else:
	mix_modified = 2 * waveforms_orig[instr] - mix_orig
	mix_modified_ = mix_modified.copy()
	waveforms_modified = demix(
	config, model, mix_modified, device, model_type=args.model_type, pbar=False,
	progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
	)
	if args.use_tta:
	waveforms_modified = apply_tta(
	config, model, mix_modified, waveforms_orig, device, args.model_type,
	progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
	)
	waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]

	if args.extract_instrumental:
	instr = 'vocals' if 'vocals' in instruments else instruments[0]
	waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
	if 'instrumental' not in instruments:
	instruments.append('instrumental')

	for i, instr in enumerate(instruments):
	estimates = waveforms_orig[instr]
	if 'normalize' in config.inference and config.inference.get('normalize', False):
	estimates = denormalize_audio(estimates, norm_params)

	is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
	codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
	subtype = get_soundfile_subtype(args.pcm_type, is_float=is_float)

	shortened_filename = shorten_filename(os.path.basename(path))
	output_filename = f"{shortened_filename}_{instr}.{codec}"
	output_path = os.path.join(store_dir, output_filename)
	sf.write(output_path, estimates.T, sr, subtype=subtype)

	save_progress = round(base_progress + 95 + (i / len(instruments)) * 5)
	if progress is not None and callable(getattr('progress', '__call__', None)):
	progress(save_progress / 100, desc=i18n("saving_output").format(instr, processed_files, total_files))
	update_progress_html(i18n("saving_output").format(instr, processed_files, total_files), save_progress)

	file_progress = round(processed_files * base_progress_per_file)
	if progress is not None and callable(getattr(progress, '__call__', None)):
	progress(file_progress / 100, desc=i18n("completed_file").format(processed_files, total_files))
	update_progress_html(i18n("completed_file").format(processed_files, total_files), file_progress)

	except Exception as e:
	logging.error(f"Cannot read track: {path}. Error: {str(e)}")
	print(i18n("cannot_read_track").format(path))
	print(i18n("error_message").format(str(e)))
	continue

	elapsed_time = time.time() - start_time
	logging.info(f"Processing time: {elapsed_time:.2f} seconds")
	print(i18n("elapsed_time").format(elapsed_time))

	if progress is not None and callable(getattr(progress, '__call__', None)):
	progress(1.0, desc=i18n("processing_complete"))
	update_progress_html(i18n("processing_complete"), 100)

	@spaces.GPU
	def proc_folder(args=None, progress=None):
	try:
	parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
	parser.add_argument("--model_type", type=str, default='melod_band_roformer', help=i18n("model_type_help"))
	parser.add_argument("--config_path", type=str, required=True, help=i18n("config_path_help"))
	parser.add_argument("--start_check_point", type=str, required=True, help=i18n("start_checkpoint_help"))
	parser.add_argument("--input_folder", type=str, required=True, help=i18n("input_folder_help"))
	parser.add_argument("--store_dir", type=str, required=True, help=i18n("store_dir_help"))
	parser.add_argument("--chunk_size", type=int, default=352800, help=i18n("chunk_size_help"))
	parser.add_argument("--overlap", type=int, default=2, help=i18n("overlap_help"))
	parser.add_argument("--export_format", type=str, default='wav FLOAT', choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], help=i18n("export_format_help"))
	parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
	parser.add_argument("--extract_instrumental", action='store_true', help=i18n("extract_instrumental_help"))
	parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
	parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
	parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
	parser.add_argument("--device_ids", nargs='+', type=int, default=[0], help=i18n("device_ids_help"))
	parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
	parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))

	args = parser.parse_args(args if args else [])
	except Exception as e:
	logging.error(f"Argument parsing failed: {str(e)}")
	raise ValueError(f"Invalid command-line arguments: {str(e)}")

	device = "cpu"
	if args.force_cpu:
	logging.info("Forced to use CPU")
	elif torch.cuda.is_available():
	logging.info("CUDA available")
	print(i18n("cuda_available"))
	device = f'cuda:{args.device_ids[0]}'
	elif torch.backends.mps.is_available():
	device = "mps"

	logging.info(f"Using device: {device}")
	print(i18n("using_device").format(device))

	model_load_start_time = time.time()
	torch.backends.cudnn.benchmark = True

	try:
	model, config = get_model_from_config(args.model_type, args.config_path)
	except Exception as e:
	logging.error(f"Failed to load model: {str(e)}")
	raise

	if args.start_check_point:
	try:
	load_start_checkpoint(args, model, type_='inference')
	except Exception as e:
	logging.error(f"Failed to load checkpoint: {str(e)}")
	raise

	logging.info(f"Instruments: {config.training.instruments}")
	print(i18n("instruments_print").format(config.training.instruments))

	if len(args.device_ids) > 1 and not args.force_cpu:
	model = nn.DataParallel(model, device_ids=args.device_ids)
	logging.info(f"Using DataParallel with devices: {args.device_ids}")

	model = model.to(device)

	elapsed_time = time.time() - model_load_start_time
	logging.info(f"Model load time: {elapsed_time:.2f} seconds")
	print(i18n("model_load_time").format(elapsed_time))

	run_folder(model, args, config, device, verbose=False, progress=progress)
	return "Processing completed"

	if __name__ == "__main__":
	try:
	proc_folder(None)
	except Exception as e:
	logging.error(f"Main execution failed: {str(e)}")
	raise