Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import tempfile | |
| from faster_whisper import WhisperModel | |
| from fpdf import FPDF | |
| model = WhisperModel("base", device="cpu", compute_type="int8") | |
| def format_time(seconds: float) -> str: | |
| h = int(seconds // 3600) | |
| m = int((seconds % 3600) // 60) | |
| s = seconds % 60 | |
| return f"{h:02d}:{m:02d}:{s:05.2f}" | |
| def transcribe(video_path: str) -> tuple[str, str]: | |
| if video_path is None: | |
| return "", "Please upload a video file first." | |
| segments, _ = model.transcribe(video_path, beam_size=5) | |
| segments = list(segments) | |
| if not segments: | |
| return "", "No speech detected in the video." | |
| timestamped = "\n".join( | |
| f"[{format_time(seg.start)} -> {format_time(seg.end)}] {seg.text.strip()}" | |
| for seg in segments | |
| ) | |
| plain = " ".join(seg.text.strip() for seg in segments) | |
| return timestamped, plain | |
| def make_txt(timestamped: str, plain: str): | |
| if not timestamped: | |
| gr.Warning("Generate a transcript first.") | |
| return None | |
| content = ( | |
| "TRANSCRIPTAI β TIMESTAMPED TRANSCRIPT\n" | |
| + "=" * 50 + "\n\n" | |
| + timestamped | |
| + "\n\n\nPLAIN TRANSCRIPT\n" | |
| + "=" * 50 + "\n\n" | |
| + plain | |
| ) | |
| tmp = tempfile.NamedTemporaryFile( | |
| mode="w", suffix=".txt", delete=False, encoding="utf-8" | |
| ) | |
| tmp.write(content) | |
| tmp.close() | |
| return tmp.name | |
| def make_pdf(timestamped: str, plain: str): | |
| if not timestamped: | |
| gr.Warning("Generate a transcript first.") | |
| return None | |
| pdf = FPDF() | |
| pdf.set_margins(15, 15, 15) | |
| pdf.add_page() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| # Title | |
| pdf.set_font("Helvetica", "B", 18) | |
| pdf.cell(0, 12, "TranscriptAI", ln=True, align="C") | |
| pdf.set_font("Helvetica", "", 10) | |
| pdf.set_text_color(120, 120, 120) | |
| pdf.cell(0, 6, "Generated transcript", ln=True, align="C") | |
| pdf.set_text_color(0, 0, 0) | |
| pdf.ln(6) | |
| # Divider | |
| pdf.set_draw_color(200, 200, 200) | |
| pdf.line(15, pdf.get_y(), 195, pdf.get_y()) | |
| pdf.ln(6) | |
| # Timestamped section | |
| pdf.set_font("Helvetica", "B", 12) | |
| pdf.cell(0, 8, "Timestamped Transcript", ln=True) | |
| pdf.ln(2) | |
| pdf.set_font("Courier", "", 9) | |
| for line in timestamped.split("\n"): | |
| pdf.multi_cell(0, 5, line) | |
| pdf.ln(8) | |
| # Plain section | |
| pdf.set_font("Helvetica", "B", 12) | |
| pdf.cell(0, 8, "Plain Transcript", ln=True) | |
| pdf.ln(2) | |
| pdf.set_font("Helvetica", "", 10) | |
| pdf.multi_cell(0, 6, plain) | |
| tmp_path = tempfile.mktemp(suffix=".pdf") | |
| pdf.output(tmp_path) | |
| return tmp_path | |
| CSS = """ | |
| .title { text-align: center; margin-bottom: 0.25rem; } | |
| .subtitle { text-align: center; color: #6b7280; margin-bottom: 1.5rem; } | |
| """ | |
| with gr.Blocks(title="TranscriptAI") as demo: | |
| gr.Markdown("# TranscriptAI", elem_classes="title") | |
| gr.Markdown( | |
| "Upload a video file and get a timestamped transcript β powered by Whisper.", | |
| elem_classes="subtitle", | |
| ) | |
| with gr.Row(): | |
| # ββ Left column: upload + controls ββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| video_input = gr.File( | |
| label="Upload Video", | |
| file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"], | |
| type="filepath", | |
| ) | |
| btn = gr.Button("Generate Transcript", variant="primary", size="lg") | |
| gr.Markdown( | |
| "_Supported: MP4, MOV, AVI, MKV, WebM_\n\n" | |
| "_Model: Whisper base Β· CPU optimized_" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("### β¬ Download Transcript") | |
| with gr.Row(): | |
| dl_txt_btn = gr.Button("Download TXT", variant="secondary", size="sm") | |
| dl_pdf_btn = gr.Button("Download PDF", variant="secondary", size="sm") | |
| txt_file = gr.File(label="TXT File", visible=False) | |
| pdf_file = gr.File(label="PDF File", visible=False) | |
| # ββ Right column: transcript output βββββββββββββββββββββββββββββ | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("Timestamped"): | |
| timestamped_out = gr.Textbox( | |
| label="Transcript with Timestamps", | |
| lines=28, | |
| placeholder="[00:00:00 -> 00:00:05] Transcript will appear here...", | |
| ) | |
| with gr.Tab("Plain Text"): | |
| plain_out = gr.Textbox( | |
| label="Plain Transcript", | |
| lines=28, | |
| placeholder="Full transcript without timestamps...", | |
| ) | |
| # ββ Event wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| btn.click(fn=transcribe, inputs=video_input, outputs=[timestamped_out, plain_out]) | |
| dl_txt_btn.click( | |
| fn=make_txt, | |
| inputs=[timestamped_out, plain_out], | |
| outputs=txt_file, | |
| ).then(fn=lambda: gr.File(visible=True), outputs=txt_file) | |
| dl_pdf_btn.click( | |
| fn=make_pdf, | |
| inputs=[timestamped_out, plain_out], | |
| outputs=pdf_file, | |
| ).then(fn=lambda: gr.File(visible=True), outputs=pdf_file) | |
| demo.launch(theme=gr.themes.Soft(), css=CSS) | |