Spaces:

akshit15
/

TranscriptAI

Sleeping

App Files Files Community

TranscriptAI / app.py

akshit15

Add TXT and PDF download buttons

f3a376b about 1 month ago

Raw

History Blame Contribute Delete

5.47 kB

	import gradio as gr
	import tempfile
	from faster_whisper import WhisperModel
	from fpdf import FPDF

	model = WhisperModel("base", device="cpu", compute_type="int8")


	def format_time(seconds: float) -> str:
	h = int(seconds // 3600)
	m = int((seconds % 3600) // 60)
	s = seconds % 60
	return f"{h:02d}:{m:02d}:{s:05.2f}"


	def transcribe(video_path: str) -> tuple[str, str]:
	if video_path is None:
	return "", "Please upload a video file first."

	segments, _ = model.transcribe(video_path, beam_size=5)
	segments = list(segments)

	if not segments:
	return "", "No speech detected in the video."

	timestamped = "\n".join(
	f"[{format_time(seg.start)} -> {format_time(seg.end)}] {seg.text.strip()}"
	for seg in segments
	)
	plain = " ".join(seg.text.strip() for seg in segments)
	return timestamped, plain


	def make_txt(timestamped: str, plain: str):
	if not timestamped:
	gr.Warning("Generate a transcript first.")
	return None
	content = (
	"TRANSCRIPTAI — TIMESTAMPED TRANSCRIPT\n"
	+ "=" * 50 + "\n\n"
	+ timestamped
	+ "\n\n\nPLAIN TRANSCRIPT\n"
	+ "=" * 50 + "\n\n"
	+ plain
	)
	tmp = tempfile.NamedTemporaryFile(
	mode="w", suffix=".txt", delete=False, encoding="utf-8"
	)
	tmp.write(content)
	tmp.close()
	return tmp.name


	def make_pdf(timestamped: str, plain: str):
	if not timestamped:
	gr.Warning("Generate a transcript first.")
	return None

	pdf = FPDF()
	pdf.set_margins(15, 15, 15)
	pdf.add_page()
	pdf.set_auto_page_break(auto=True, margin=15)

	# Title
	pdf.set_font("Helvetica", "B", 18)
	pdf.cell(0, 12, "TranscriptAI", ln=True, align="C")
	pdf.set_font("Helvetica", "", 10)
	pdf.set_text_color(120, 120, 120)
	pdf.cell(0, 6, "Generated transcript", ln=True, align="C")
	pdf.set_text_color(0, 0, 0)
	pdf.ln(6)

	# Divider
	pdf.set_draw_color(200, 200, 200)
	pdf.line(15, pdf.get_y(), 195, pdf.get_y())
	pdf.ln(6)

	# Timestamped section
	pdf.set_font("Helvetica", "B", 12)
	pdf.cell(0, 8, "Timestamped Transcript", ln=True)
	pdf.ln(2)
	pdf.set_font("Courier", "", 9)
	for line in timestamped.split("\n"):
	pdf.multi_cell(0, 5, line)
	pdf.ln(8)

	# Plain section
	pdf.set_font("Helvetica", "B", 12)
	pdf.cell(0, 8, "Plain Transcript", ln=True)
	pdf.ln(2)
	pdf.set_font("Helvetica", "", 10)
	pdf.multi_cell(0, 6, plain)

	tmp_path = tempfile.mktemp(suffix=".pdf")
	pdf.output(tmp_path)
	return tmp_path


	CSS = """
	.title { text-align: center; margin-bottom: 0.25rem; }
	.subtitle { text-align: center; color: #6b7280; margin-bottom: 1.5rem; }
	"""

	with gr.Blocks(title="TranscriptAI") as demo:
	gr.Markdown("# TranscriptAI", elem_classes="title")
	gr.Markdown(
	"Upload a video file and get a timestamped transcript — powered by Whisper.",
	elem_classes="subtitle",
	)

	with gr.Row():
	# ── Left column: upload + controls ──────────────────────────────
	with gr.Column(scale=1):
	video_input = gr.File(
	label="Upload Video",
	file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
	type="filepath",
	)
	btn = gr.Button("Generate Transcript", variant="primary", size="lg")
	gr.Markdown(
	"_Supported: MP4, MOV, AVI, MKV, WebM_\n\n"
	"_Model: Whisper base · CPU optimized_"
	)

	gr.Markdown("---")
	gr.Markdown("### ⬇ Download Transcript")

	with gr.Row():
	dl_txt_btn = gr.Button("Download TXT", variant="secondary", size="sm")
	dl_pdf_btn = gr.Button("Download PDF", variant="secondary", size="sm")

	txt_file = gr.File(label="TXT File", visible=False)
	pdf_file = gr.File(label="PDF File", visible=False)

	# ── Right column: transcript output ─────────────────────────────
	with gr.Column(scale=2):
	with gr.Tabs():
	with gr.Tab("Timestamped"):
	timestamped_out = gr.Textbox(
	label="Transcript with Timestamps",
	lines=28,
	placeholder="[00:00:00 -> 00:00:05] Transcript will appear here...",
	)
	with gr.Tab("Plain Text"):
	plain_out = gr.Textbox(
	label="Plain Transcript",
	lines=28,
	placeholder="Full transcript without timestamps...",
	)

	# ── Event wiring ────────────────────────────────────────────────────
	btn.click(fn=transcribe, inputs=video_input, outputs=[timestamped_out, plain_out])

	dl_txt_btn.click(
	fn=make_txt,
	inputs=[timestamped_out, plain_out],
	outputs=txt_file,
	).then(fn=lambda: gr.File(visible=True), outputs=txt_file)

	dl_pdf_btn.click(
	fn=make_pdf,
	inputs=[timestamped_out, plain_out],
	outputs=pdf_file,
	).then(fn=lambda: gr.File(visible=True), outputs=pdf_file)

	demo.launch(theme=gr.themes.Soft(), css=CSS)