TranscriptAI / app.py
akshit15's picture
Add TXT and PDF download buttons
f3a376b
Raw
History Blame Contribute Delete
5.47 kB
import gradio as gr
import tempfile
from faster_whisper import WhisperModel
from fpdf import FPDF
model = WhisperModel("base", device="cpu", compute_type="int8")
def format_time(seconds: float) -> str:
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = seconds % 60
return f"{h:02d}:{m:02d}:{s:05.2f}"
def transcribe(video_path: str) -> tuple[str, str]:
if video_path is None:
return "", "Please upload a video file first."
segments, _ = model.transcribe(video_path, beam_size=5)
segments = list(segments)
if not segments:
return "", "No speech detected in the video."
timestamped = "\n".join(
f"[{format_time(seg.start)} -> {format_time(seg.end)}] {seg.text.strip()}"
for seg in segments
)
plain = " ".join(seg.text.strip() for seg in segments)
return timestamped, plain
def make_txt(timestamped: str, plain: str):
if not timestamped:
gr.Warning("Generate a transcript first.")
return None
content = (
"TRANSCRIPTAI β€” TIMESTAMPED TRANSCRIPT\n"
+ "=" * 50 + "\n\n"
+ timestamped
+ "\n\n\nPLAIN TRANSCRIPT\n"
+ "=" * 50 + "\n\n"
+ plain
)
tmp = tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
)
tmp.write(content)
tmp.close()
return tmp.name
def make_pdf(timestamped: str, plain: str):
if not timestamped:
gr.Warning("Generate a transcript first.")
return None
pdf = FPDF()
pdf.set_margins(15, 15, 15)
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
# Title
pdf.set_font("Helvetica", "B", 18)
pdf.cell(0, 12, "TranscriptAI", ln=True, align="C")
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(120, 120, 120)
pdf.cell(0, 6, "Generated transcript", ln=True, align="C")
pdf.set_text_color(0, 0, 0)
pdf.ln(6)
# Divider
pdf.set_draw_color(200, 200, 200)
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
pdf.ln(6)
# Timestamped section
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Timestamped Transcript", ln=True)
pdf.ln(2)
pdf.set_font("Courier", "", 9)
for line in timestamped.split("\n"):
pdf.multi_cell(0, 5, line)
pdf.ln(8)
# Plain section
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Plain Transcript", ln=True)
pdf.ln(2)
pdf.set_font("Helvetica", "", 10)
pdf.multi_cell(0, 6, plain)
tmp_path = tempfile.mktemp(suffix=".pdf")
pdf.output(tmp_path)
return tmp_path
CSS = """
.title { text-align: center; margin-bottom: 0.25rem; }
.subtitle { text-align: center; color: #6b7280; margin-bottom: 1.5rem; }
"""
with gr.Blocks(title="TranscriptAI") as demo:
gr.Markdown("# TranscriptAI", elem_classes="title")
gr.Markdown(
"Upload a video file and get a timestamped transcript β€” powered by Whisper.",
elem_classes="subtitle",
)
with gr.Row():
# ── Left column: upload + controls ──────────────────────────────
with gr.Column(scale=1):
video_input = gr.File(
label="Upload Video",
file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
type="filepath",
)
btn = gr.Button("Generate Transcript", variant="primary", size="lg")
gr.Markdown(
"_Supported: MP4, MOV, AVI, MKV, WebM_\n\n"
"_Model: Whisper base Β· CPU optimized_"
)
gr.Markdown("---")
gr.Markdown("### ⬇ Download Transcript")
with gr.Row():
dl_txt_btn = gr.Button("Download TXT", variant="secondary", size="sm")
dl_pdf_btn = gr.Button("Download PDF", variant="secondary", size="sm")
txt_file = gr.File(label="TXT File", visible=False)
pdf_file = gr.File(label="PDF File", visible=False)
# ── Right column: transcript output ─────────────────────────────
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("Timestamped"):
timestamped_out = gr.Textbox(
label="Transcript with Timestamps",
lines=28,
placeholder="[00:00:00 -> 00:00:05] Transcript will appear here...",
)
with gr.Tab("Plain Text"):
plain_out = gr.Textbox(
label="Plain Transcript",
lines=28,
placeholder="Full transcript without timestamps...",
)
# ── Event wiring ────────────────────────────────────────────────────
btn.click(fn=transcribe, inputs=video_input, outputs=[timestamped_out, plain_out])
dl_txt_btn.click(
fn=make_txt,
inputs=[timestamped_out, plain_out],
outputs=txt_file,
).then(fn=lambda: gr.File(visible=True), outputs=txt_file)
dl_pdf_btn.click(
fn=make_pdf,
inputs=[timestamped_out, plain_out],
outputs=pdf_file,
).then(fn=lambda: gr.File(visible=True), outputs=pdf_file)
demo.launch(theme=gr.themes.Soft(), css=CSS)