Witam
Mam problem z skryptem do elevenlabs.io
Głos nie generuje dźwięku tak jak wskazuje na plik z napisami srt
Czy ktoś już przerabiał temat ?
import os
import tempfile
import argparse
import requests
from pysubparser import parser
from pydub import AudioSegment
import io
from datetime import datetime, timedelta
import re
import subprocess
CHUNK_SIZE = 1024
ELEVENLAB_API_KEY = 'KLUCZ_API' # Zastąp swoim kluczem API Elevenlab
VOICE_ID = 'flq6f7yk4E4fJM5XTYuZ' # Zastąp odpowiednim identyfikatorem głosu
def time_to_seconds(time):
return (time.hour * 3600 + time.minute * 60 + time.second + time.microsecond / 1000000) if time else 0
def generate_audio(path, rate=200):
print("Generating audio files for {} with Elevenlab".format(path))
subtitles = list(parser.parse(path))
total_subtitles = len(subtitles)
output_folder = "audio_files"
os.makedirs(output_folder, exist_ok=True)
audio_segments = []
for i, subtitle in enumerate(subtitles):
text = subtitle.text
start_time = time_to_seconds(subtitle.start)
end_time = time_to_seconds(subtitle.end)
try:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": ELEVENLAB_API_KEY
}
data = {
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.6,
"similarity_boost": 0.8
}
}
response = requests.post(url, json=data, headers=headers, stream=True)
if response.status_code == 200:
audio_bytes = b""
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
if chunk:
audio_bytes += chunk
audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes), format="mp3")
# Oblicz długość segmentu na podstawie czasu rozpoczęcia i zakończenia
segment_duration = end_time - start_time
# Przygotowanie ciszy przed audio
silence_before = AudioSegment.silent(duration=int(start_time * 1000))
# Dodanie ciszy przed audio
audio_segment = silence_before + audio_segment
output_filename = os.path.join(output_folder, f"segment_{i}.mp3")
audio_segment.export(output_filename, format="mp3")
audio_segments.append(output_filename)
# Obliczanie postępu
progress = (i + 1) / total_subtitles * 100
print(f"Progress: {progress:.2f}% ({i + 1}/{total_subtitles}) - Generating audio for: {text}")
else:
print(f"Failed to generate audio for: {text}")
except Exception as e:
print(f"Error: {e}")
# Przesunięcie początkowe
initial_shift = time_to_seconds(subtitles[0].start)
# Łączenie segmentów przy użyciu FFmpeg z przesunięciem
combined_output_path = os.path.splitext(path)[0] + '_calosc.mp3'
# Tworzenie listy argumentów dla FFmpeg
cmd = [
"ffmpeg",
"-i", f"concat:{'|'.join(audio_segments)}",
"-af", f"aresample=async=1:first_pts=0",
"-c:a", "mp3",
combined_output_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"Generated combined audio file: {combined_output_path}")
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-p", "--path", help="subtitle file path", required=True)
arg_parser.add_argument("-r", "--rate", help="speech rate (words per minute)", type=int, default=150)
args = arg_parser.parse_args()
generate_audio(path=args.path, rate=args.rate)