import gradio as gr import tempfile from faster_whisper import WhisperModel from fpdf import FPDF model = WhisperModel("base", device="cpu", compute_type="int8") def format_time(seconds: float) -> str: h = int(seconds // 3600) m = int((seconds % 3600) // 60) s = seconds % 60 return f"{h:02d}:{m:02d}:{s:05.2f}" def transcribe(video_path: str) -> tuple[str, str]: if video_path is None: return "", "Please upload a video file first." segments, _ = model.transcribe(video_path, beam_size=5) segments = list(segments) if not segments: return "", "No speech detected in the video." timestamped = "\n".join( f"[{format_time(seg.start)} -> {format_time(seg.end)}] {seg.text.strip()}" for seg in segments ) plain = " ".join(seg.text.strip() for seg in segments) return timestamped, plain def make_txt(timestamped: str, plain: str): if not timestamped: gr.Warning("Generate a transcript first.") return None content = ( "TRANSCRIPTAI — TIMESTAMPED TRANSCRIPT\n" + "=" * 50 + "\n\n" + timestamped + "\n\n\nPLAIN TRANSCRIPT\n" + "=" * 50 + "\n\n" + plain ) tmp = tempfile.NamedTemporaryFile( mode="w", suffix=".txt", delete=False, encoding="utf-8" ) tmp.write(content) tmp.close() return tmp.name def make_pdf(timestamped: str, plain: str): if not timestamped: gr.Warning("Generate a transcript first.") return None pdf = FPDF() pdf.set_margins(15, 15, 15) pdf.add_page() pdf.set_auto_page_break(auto=True, margin=15) # Title pdf.set_font("Helvetica", "B", 18) pdf.cell(0, 12, "TranscriptAI", ln=True, align="C") pdf.set_font("Helvetica", "", 10) pdf.set_text_color(120, 120, 120) pdf.cell(0, 6, "Generated transcript", ln=True, align="C") pdf.set_text_color(0, 0, 0) pdf.ln(6) # Divider pdf.set_draw_color(200, 200, 200) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(6) # Timestamped section pdf.set_font("Helvetica", "B", 12) pdf.cell(0, 8, "Timestamped Transcript", ln=True) pdf.ln(2) pdf.set_font("Courier", "", 9) for line in timestamped.split("\n"): pdf.multi_cell(0, 5, line) pdf.ln(8) # Plain section pdf.set_font("Helvetica", "B", 12) pdf.cell(0, 8, "Plain Transcript", ln=True) pdf.ln(2) pdf.set_font("Helvetica", "", 10) pdf.multi_cell(0, 6, plain) tmp_path = tempfile.mktemp(suffix=".pdf") pdf.output(tmp_path) return tmp_path CSS = """ .title { text-align: center; margin-bottom: 0.25rem; } .subtitle { text-align: center; color: #6b7280; margin-bottom: 1.5rem; } """ with gr.Blocks(title="TranscriptAI") as demo: gr.Markdown("# TranscriptAI", elem_classes="title") gr.Markdown( "Upload a video file and get a timestamped transcript — powered by Whisper.", elem_classes="subtitle", ) with gr.Row(): # ── Left column: upload + controls ────────────────────────────── with gr.Column(scale=1): video_input = gr.File( label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"], type="filepath", ) btn = gr.Button("Generate Transcript", variant="primary", size="lg") gr.Markdown( "_Supported: MP4, MOV, AVI, MKV, WebM_\n\n" "_Model: Whisper base · CPU optimized_" ) gr.Markdown("---") gr.Markdown("### ⬇ Download Transcript") with gr.Row(): dl_txt_btn = gr.Button("Download TXT", variant="secondary", size="sm") dl_pdf_btn = gr.Button("Download PDF", variant="secondary", size="sm") txt_file = gr.File(label="TXT File", visible=False) pdf_file = gr.File(label="PDF File", visible=False) # ── Right column: transcript output ───────────────────────────── with gr.Column(scale=2): with gr.Tabs(): with gr.Tab("Timestamped"): timestamped_out = gr.Textbox( label="Transcript with Timestamps", lines=28, placeholder="[00:00:00 -> 00:00:05] Transcript will appear here...", ) with gr.Tab("Plain Text"): plain_out = gr.Textbox( label="Plain Transcript", lines=28, placeholder="Full transcript without timestamps...", ) # ── Event wiring ──────────────────────────────────────────────────── btn.click(fn=transcribe, inputs=video_input, outputs=[timestamped_out, plain_out]) dl_txt_btn.click( fn=make_txt, inputs=[timestamped_out, plain_out], outputs=txt_file, ).then(fn=lambda: gr.File(visible=True), outputs=txt_file) dl_pdf_btn.click( fn=make_pdf, inputs=[timestamped_out, plain_out], outputs=pdf_file, ).then(fn=lambda: gr.File(visible=True), outputs=pdf_file) demo.launch(theme=gr.themes.Soft(), css=CSS)