tts-service-standalone / test_ref_text_correto.py

marcos

feat: Implementação completa F5-TTS PT-BR com AgentF5TTSChunk

fab3b22 21 days ago

4.42 kB

	#!/usr/bin/env python3
	"""
	Teste fornecendo ref_text correto em vez de transcrição automática
	"""

	import os
	import sys
	from f5_tts.api import F5TTS

	def main():
	print("🔍 Teste com ref_text correto vs automático")
	print("=" * 60)

	# Criar diretórios
	os.makedirs("output", exist_ok=True)

	# Inicializar modelo PT-BR
	model_path = "models/firstpixel-ptbr/pt-br/model_last.safetensors"
	model = F5TTS(ckpt_file=model_path, device="cpu")

	# Áudio de referência PT-BR
	ref_audio = "voices/ref_audio.wav"

	# Texto para gerar
	gen_text = "olá, este é um teste com texto de referência correto."

	print("\n📝 TESTE 1: Com ref_text automático (vazio)")
	print("-" * 60)

	try:
	model.infer(
	ref_file=ref_audio,
	ref_text="", # Transcrição automática
	gen_text=gen_text,
	file_wave="output/teste_ref_auto.wav",
	remove_silence=True
	)
	print("✅ Gerado: output/teste_ref_auto.wav")
	except Exception as e:
	print(f"❌ Erro: {e}")

	print("\n📝 TESTE 2: Com ref_text manual correto")
	print("-" * 60)

	# Assumindo que o áudio de referência diz algo como:
	# "olá este é um teste em português brasileiro"
	ref_text_correto = "olá este é um teste em português brasileiro"

	try:
	model.infer(
	ref_file=ref_audio,
	ref_text=ref_text_correto, # Texto correto manual
	gen_text=gen_text,
	file_wave="output/teste_ref_manual.wav",
	remove_silence=True
	)
	print("✅ Gerado: output/teste_ref_manual.wav")
	except Exception as e:
	print(f"❌ Erro: {e}")

	print("\n📝 TESTE 3: Com Venus (inglês) e texto correto")
	print("-" * 60)

	ref_audio_venus = "voices/venus_en.wav"
	ref_text_venus = "Some call me nature others call me mother nature"
	gen_text_en = "hello this is a test with english reference"

	try:
	model.infer(
	ref_file=ref_audio_venus,
	ref_text=ref_text_venus,
	gen_text=gen_text_en,
	file_wave="output/teste_venus_manual.wav",
	remove_silence=True
	)
	print("✅ Gerado: output/teste_venus_manual.wav")
	except Exception as e:
	print(f"❌ Erro: {e}")

	print("\n📝 TESTE 4: Criar referência PT-BR nova")
	print("-" * 60)
	print("Gerando novo áudio de referência em PT-BR...")

	# Gerar um áudio PT-BR limpo para usar como referência
	ref_text_novo = "meu nome é assistente e eu falo português brasileiro naturalmente"

	try:
	# Usar Venus para gerar um PT-BR inicial
	model.infer(
	ref_file=ref_audio_venus,
	ref_text=ref_text_venus,
	gen_text=ref_text_novo,
	file_wave="voices/ref_ptbr_novo.wav",
	remove_silence=True
	)
	print("✅ Nova referência criada: voices/ref_ptbr_novo.wav")

	# Agora usar a nova referência
	print("\nTestando com nova referência...")
	model.infer(
	ref_file="voices/ref_ptbr_novo.wav",
	ref_text=ref_text_novo, # Texto exato do áudio
	gen_text="agora sim deve funcionar perfeitamente em português",
	file_wave="output/teste_ref_nova.wav",
	remove_silence=True
	)
	print("✅ Gerado com nova ref: output/teste_ref_nova.wav")

	except Exception as e:
	print(f"❌ Erro: {e}")

	# Resultados
	print("\n" + "=" * 60)
	print("📊 RESULTADOS:")
	print("-" * 60)

	arquivos = [
	("output/teste_ref_auto.wav", "Ref text automático"),
	("output/teste_ref_manual.wav", "Ref text manual PT"),
	("output/teste_venus_manual.wav", "Venus com texto"),
	("output/teste_ref_nova.wav", "Nova ref PT-BR")
	]

	for arquivo, desc in arquivos:
	if os.path.exists(arquivo):
	size = os.path.getsize(arquivo) / 1024
	print(f"✅ {desc:20} - {size:.1f} KB")
	else:
	print(f"❌ {desc:20} - Não gerado")

	print("\n🎯 CONCLUSÃO:")
	print("Se 'Ref text manual' funciona melhor que 'automático':")
	print("→ Problema era transcrição incorreta do áudio de referência")


	if __name__ == "__main__":
	main()