How to Generate Subtitle from Video using Node.js & OpenAI
A complete guide to adding automated subtitles to videos. Compare the easy way using Orshot's Video API versus the manual DIY approach using FFmpeg and OpenAI Whisper
Rishi MohanA complete guide to adding automated subtitles to videos. Compare the easy way using Orshot's Video API versus the manual DIY approach using FFmpeg and OpenAI Whisper
Rishi MohanVideo content is dominant right now, but here is a statistic that matters: over 80% of social media videos are watched on mute. If you aren't burning subtitles into your videos, you are losing the vast majority of your audience.
If you have researched this topic—perhaps scrolling through Reddit threads on r/ffmpeg or r/node—you likely ran into the same common pain points:
CRF or bitrate to set, your crystal clear 4K video turns into a blocky mess.libass configs or custom drawing filters.In this guide, we will look at two ways to solve this: the Automated API method (using Orshot) and the Manual Node.js method (building it yourself).
If you want to skip the engineering headaches of server-side FFmpeg, GPUs, and file storage, Orshot handles the entire pipeline in a single API request.
Orshot's Video Generation API automatically:
.mp4.You simply pass your video URL and styling preferences. You can even use Google Fonts instantly.
const response = await fetch("https://api.orshot.com/v1/studio/render", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer YOUR_API_KEY",
},
body: JSON.stringify({
templateId: "your-video-template",
response: { format: "mp4" },
videoOptions: {
// The magic happens here
subtitleSource: "https://your-bucket.com/video.mp4",
subtitleColor: "#FFFFFF",
subtitleBackground: "rgba(0,0,0,0.6)", // Semi-transparent box
subtitleFontFamily: "Inter", // Auto-loads Google Font
subtitleFontSize: "32px",
subtitleBottom: "80px", // Position from bottom
},
}),
});That's it. You get a finished URL back. No servers to manage, no fluent-ffmpeg callbacks to debug.
If you prefer to build this pipeline yourself, we will use Node.js, fluent-ffmpeg, and OpenAI's Whisper API.
npm install fluent-ffmpeg openai fsOpenAI's Whisper API accepts audio files. First, we need to strip the audio track from your video to reduce upload size and processing time.
const ffmpeg = require("fluent-ffmpeg");
function extractAudio(videoPath, audioPath) {
return new Promise((resolve, reject) => {
ffmpeg(videoPath)
.output(audioPath)
.noVideo()
.audioCodec("libmp3lame")
.on("end", resolve)
.on("error", reject)
.run();
});
}We upload the audio to OpenAI. The pricing is $0.006 per minute. That means a 10-minute video costs about $0.06 to transcribe.
const OpenAI = require("openai");
const fs = require("fs");
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function transcribe(audioPath) {
const transcription = await openai.audio.transcriptions.create({
file: fs.createReadStream(audioPath),
model: "whisper-1",
response_format: "srt", // Request SRT format directly
});
return transcription; // Returns standard SRT string
}This is the tricky part. We need to take that SRT string, save it to a file, and use FFmpeg's subtitles filter to burn it in.
Critical Note on Quality:
By default, FFmpeg might re-encode your video at a low bitrate. To preserve quality, we use the -crf (Constant Rate Factor) flag. Lower is better quality. A CRF of 23 is standard, 18 is nearly visually lossless.
function burnSubtitles(videoPath, subtitlePath, outputPath) {
return new Promise((resolve, reject) => {
ffmpeg(videoPath)
.outputOptions([
"-vf subtitles=" + subtitlePath, // The complex filter
"-crf 23", // Maintain visual quality
"-preset fast", // Encoding speed
])
.save(outputPath)
.on("end", () => console.log("Subtitles burned successfully!"))
.on("error", (err) => console.error("Error burning subs:", err));
});
}Putting it all together, here is a complete script you can run. This handles audio extraction, transcription, and subtitle burning in one flow.
/* generate-subs.js */
const ffmpeg = require("fluent-ffmpeg");
const OpenAI = require("openai");
const fs = require("fs");
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function extractAudio(videoPath, audioPath) {
console.log("🔊 Extracting audio...");
return new Promise((resolve, reject) => {
ffmpeg(videoPath)
.output(audioPath)
.noVideo()
.audioCodec("libmp3lame")
.on("end", resolve)
.on("error", reject)
.run();
});
}
async function transcribe(audioPath) {
console.log("🎙️ Transcribing with OpenAI Whisper...");
const transcription = await openai.audio.transcriptions.create({
file: fs.createReadStream(audioPath),
model: "whisper-1",
response_format: "srt",
});
return transcription;
}
async function burnSubtitles(videoPath, subtitlePath, outputPath) {
console.log("🔥 Burning subtitles (this may take a while)...");
return new Promise((resolve, reject) => {
ffmpeg(videoPath)
.outputOptions([
`-vf subtitles=${subtitlePath}`,
"-crf 23", // Balanced quality
"-preset fast", // Faster encoding
])
.save(outputPath)
.on("end", resolve)
.on("error", reject);
});
}
(async () => {
try {
const INPUT_VIDEO = "input.mp4";
const TEMP_AUDIO = "temp_audio.mp3";
const TEMP_SRT = "temp_subs.srt";
const OUTPUT_VIDEO = "output.mp4";
await extractAudio(INPUT_VIDEO, TEMP_AUDIO);
const srtData = await transcribe(TEMP_AUDIO);
fs.writeFileSync(TEMP_SRT, srtData);
await burnSubtitles(INPUT_VIDEO, TEMP_SRT, OUTPUT_VIDEO);
console.log("✅ Done! saved to", OUTPUT_VIDEO);
// Cleanup temp files
fs.unlinkSync(TEMP_AUDIO);
fs.unlinkSync(TEMP_SRT);
} catch (error) {
console.error("❌ Error:", error);
}
})();Building your own subtitle generator is a great learning project. You get granular control and only pay for the OpenAI credits ($0.006/min).
However, you inherit the maintenance of an FFmpeg pipeline, which includes managing file storage, server scaling for concurrent renders, and handling weird video formats.
If you need a reliable, scalable solution that just works, give the Orshot Video Generation API a try.
![[object Object]](/customers/ibby.jpeg)
![[object Object]](/customers/alex.jpg)


![[object Object]](/customers/ivan.jpg)