526 lines
19 KiB
TypeScript
526 lines
19 KiB
TypeScript
import { Injectable } from '@nestjs/common';
|
|
import { ExifDateTime, exiftool, WriteTags } from 'exiftool-vendored';
|
|
import ffmpeg, { FfprobeData, FfprobeStream } from 'fluent-ffmpeg';
|
|
import _ from 'lodash';
|
|
import { Duration } from 'luxon';
|
|
import { execFile as execFileCb } from 'node:child_process';
|
|
import fs from 'node:fs/promises';
|
|
import { Writable } from 'node:stream';
|
|
import { promisify } from 'node:util';
|
|
import sharp from 'sharp';
|
|
import { ORIENTATION_TO_SHARP_ROTATION } from 'src/constants';
|
|
import { Exif } from 'src/database';
|
|
import { AssetEditActionItem } from 'src/dtos/editing.dto';
|
|
import {
|
|
AacProfile,
|
|
Av1Profile,
|
|
ColorMatrix,
|
|
ColorPrimaries,
|
|
Colorspace,
|
|
ColorTransfer,
|
|
DvProfile,
|
|
DvSignalCompatibility,
|
|
H264Profile,
|
|
HevcProfile,
|
|
LogLevel,
|
|
RawExtractedFormat,
|
|
} from 'src/enum';
|
|
import { LoggingRepository } from 'src/repositories/logging.repository';
|
|
import {
|
|
DecodeToBufferOptions,
|
|
GenerateThumbhashOptions,
|
|
GenerateThumbnailOptions,
|
|
ImageDimensions,
|
|
ProbeOptions,
|
|
TranscodeCommand,
|
|
VideoInfo,
|
|
VideoPacketInfo,
|
|
} from 'src/types';
|
|
import { handlePromiseError } from 'src/utils/misc';
|
|
import { createAffineMatrix } from 'src/utils/transform';
|
|
|
|
const probe = (input: string, options: string[]): Promise<FfprobeData> =>
|
|
new Promise((resolve, reject) =>
|
|
ffmpeg.ffprobe(input, options, (error, data) => (error ? reject(error) : resolve(data))),
|
|
);
|
|
|
|
const execFile = promisify(execFileCb);
|
|
|
|
sharp.concurrency(0);
|
|
sharp.cache({ files: 0 });
|
|
|
|
const pascalCase = (str: string) => _.upperFirst(_.camelCase(str.toLowerCase()));
|
|
|
|
type ProgressEvent = {
|
|
frames: number;
|
|
currentFps: number;
|
|
currentKbps: number;
|
|
targetSize: number;
|
|
timemark: string;
|
|
percent?: number;
|
|
};
|
|
|
|
export type ExtractResult = {
|
|
buffer: Buffer;
|
|
format: RawExtractedFormat;
|
|
};
|
|
|
|
@Injectable()
|
|
export class MediaRepository {
|
|
constructor(private logger: LoggingRepository) {
|
|
this.logger.setContext(MediaRepository.name);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param input file path to the input image
|
|
* @returns ExtractResult if succeeded, or null if failed
|
|
*/
|
|
async extract(input: string): Promise<ExtractResult | null> {
|
|
try {
|
|
const buffer = await exiftool.extractBinaryTagToBuffer('JpgFromRaw2', input);
|
|
return { buffer, format: RawExtractedFormat.Jpeg };
|
|
} catch (error: any) {
|
|
this.logger.debug(`Could not extract JpgFromRaw2 buffer from image, trying JPEG from RAW next: ${error}`);
|
|
}
|
|
|
|
try {
|
|
const buffer = await exiftool.extractBinaryTagToBuffer('JpgFromRaw', input);
|
|
return { buffer, format: RawExtractedFormat.Jpeg };
|
|
} catch (error: any) {
|
|
this.logger.debug(`Could not extract JPEG buffer from image, trying PreviewJXL next: ${error}`);
|
|
}
|
|
|
|
try {
|
|
const buffer = await exiftool.extractBinaryTagToBuffer('PreviewJXL', input);
|
|
return { buffer, format: RawExtractedFormat.Jxl };
|
|
} catch (error: any) {
|
|
this.logger.debug(`Could not extract PreviewJXL buffer from image, trying PreviewImage next: ${error}`);
|
|
}
|
|
|
|
try {
|
|
const buffer = await exiftool.extractBinaryTagToBuffer('PreviewImage', input);
|
|
return { buffer, format: RawExtractedFormat.Jpeg };
|
|
} catch (error: any) {
|
|
this.logger.debug(`Could not extract preview buffer from image: ${error}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async writeExif(tags: Partial<Exif>, output: string): Promise<boolean> {
|
|
try {
|
|
const tagsToWrite: WriteTags = {
|
|
ExifImageWidth: tags.exifImageWidth,
|
|
ExifImageHeight: tags.exifImageHeight,
|
|
DateTimeOriginal: tags.dateTimeOriginal && ExifDateTime.fromMillis(tags.dateTimeOriginal.getTime()),
|
|
ModifyDate: tags.modifyDate && ExifDateTime.fromMillis(tags.modifyDate.getTime()),
|
|
TimeZone: tags.timeZone,
|
|
GPSLatitude: tags.latitude,
|
|
GPSLongitude: tags.longitude,
|
|
ProjectionType: tags.projectionType,
|
|
City: tags.city,
|
|
Country: tags.country,
|
|
Make: tags.make,
|
|
Model: tags.model,
|
|
LensModel: tags.lensModel,
|
|
Fnumber: tags.fNumber?.toFixed(1),
|
|
FocalLength: tags.focalLength?.toFixed(1),
|
|
ISO: tags.iso,
|
|
ExposureTime: tags.exposureTime,
|
|
ProfileDescription: tags.profileDescription,
|
|
ColorSpace: tags.colorspace,
|
|
Rating: tags.rating === null ? 0 : tags.rating,
|
|
// specially convert Orientation to numeric Orientation# for exiftool
|
|
'Orientation#': tags.orientation ? Number(tags.orientation) : undefined,
|
|
};
|
|
|
|
await exiftool.write(output, tagsToWrite, {
|
|
ignoreMinorErrors: true,
|
|
writeArgs: ['-overwrite_original'],
|
|
});
|
|
return true;
|
|
} catch (error: any) {
|
|
this.logger.warn(`Could not write exif data to image: ${error.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async copyTagGroup(tagGroup: string, source: string, target: string): Promise<boolean> {
|
|
try {
|
|
await exiftool.write(
|
|
target,
|
|
{},
|
|
{
|
|
ignoreMinorErrors: true,
|
|
writeArgs: ['-TagsFromFile', source, `-${tagGroup}:all>${tagGroup}:all`, '-overwrite_original'],
|
|
},
|
|
);
|
|
return true;
|
|
} catch (error: any) {
|
|
this.logger.warn(`Could not copy tag data to image: ${error.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async decodeImage(input: string | Buffer, options: DecodeToBufferOptions) {
|
|
const pipeline = await this.getImageDecodingPipeline(input, options);
|
|
return pipeline.raw().toBuffer({ resolveWithObject: true });
|
|
}
|
|
|
|
private async applyEdits(pipeline: sharp.Sharp, edits: AssetEditActionItem[]): Promise<sharp.Sharp> {
|
|
const affineEditOperations = edits.filter((edit) => edit.action !== 'crop');
|
|
const matrix = createAffineMatrix(affineEditOperations);
|
|
|
|
const crop = edits.find((edit) => edit.action === 'crop');
|
|
const dimensions = await pipeline.metadata();
|
|
|
|
if (crop) {
|
|
pipeline = pipeline.extract({
|
|
left: crop ? Math.round(crop.parameters.x) : 0,
|
|
top: crop ? Math.round(crop.parameters.y) : 0,
|
|
width: crop ? Math.round(crop.parameters.width) : dimensions.width || 0,
|
|
height: crop ? Math.round(crop.parameters.height) : dimensions.height || 0,
|
|
});
|
|
}
|
|
|
|
const { a, b, c, d } = matrix;
|
|
pipeline = pipeline.affine([
|
|
[a, b],
|
|
[c, d],
|
|
]);
|
|
|
|
return pipeline;
|
|
}
|
|
|
|
async generateThumbnail(input: string | Buffer, options: GenerateThumbnailOptions, output: string): Promise<void> {
|
|
const pipeline = await this.getImageDecodingPipeline(input, options);
|
|
const decoded = pipeline.toFormat(options.format, {
|
|
quality: options.quality,
|
|
// this is default in libvips (except the threshold is 90), but we need to set it manually in sharp
|
|
chromaSubsampling: options.quality >= 80 ? '4:4:4' : '4:2:0',
|
|
progressive: options.progressive,
|
|
});
|
|
|
|
await decoded.toFile(output);
|
|
}
|
|
|
|
private async getImageDecodingPipeline(input: string | Buffer, options: DecodeToBufferOptions) {
|
|
let pipeline = sharp(input, {
|
|
// some invalid images can still be processed by sharp, but we want to fail on them by default to avoid crashes
|
|
failOn: options.processInvalidImages ? 'none' : 'error',
|
|
limitInputPixels: false,
|
|
raw: options.raw,
|
|
unlimited: true,
|
|
})
|
|
.pipelineColorspace(options.colorspace === Colorspace.Srgb ? 'srgb' : 'rgb16')
|
|
.withIccProfile(options.colorspace);
|
|
|
|
if (!options.raw) {
|
|
const { angle, flip, flop } = options.orientation ? ORIENTATION_TO_SHARP_ROTATION[options.orientation] : {};
|
|
pipeline = pipeline.rotate(angle);
|
|
if (flip) {
|
|
pipeline = pipeline.flip();
|
|
}
|
|
|
|
if (flop) {
|
|
pipeline = pipeline.flop();
|
|
}
|
|
}
|
|
|
|
if (options.edits && options.edits.length > 0) {
|
|
pipeline = await this.applyEdits(pipeline, options.edits);
|
|
}
|
|
|
|
if (options.size !== undefined) {
|
|
pipeline = pipeline.resize(options.size, options.size, { fit: 'outside', withoutEnlargement: true });
|
|
}
|
|
return pipeline;
|
|
}
|
|
|
|
async generateThumbhash(input: string | Buffer, options: GenerateThumbhashOptions): Promise<Buffer> {
|
|
const [{ rgbaToThumbHash }, decodingPipeline] = await Promise.all([
|
|
import('thumbhash'),
|
|
this.getImageDecodingPipeline(input, {
|
|
colorspace: options.colorspace,
|
|
processInvalidImages: options.processInvalidImages,
|
|
raw: options.raw,
|
|
edits: options.edits,
|
|
}),
|
|
]);
|
|
|
|
const pipeline = decodingPipeline.resize(100, 100, { fit: 'inside', withoutEnlargement: true }).raw().ensureAlpha();
|
|
|
|
const { data, info } = await pipeline.toBuffer({ resolveWithObject: true });
|
|
|
|
return Buffer.from(rgbaToThumbHash(info.width, info.height, data));
|
|
}
|
|
|
|
async probe(input: string, options?: ProbeOptions): Promise<VideoInfo> {
|
|
const results = await probe(input, options?.countFrames ? ['-count_packets'] : []); // gets frame count quickly: https://stackoverflow.com/a/28376817
|
|
return {
|
|
format: {
|
|
formatName: results.format.format_name,
|
|
formatLongName: results.format.format_long_name,
|
|
duration: this.parseFloat(results.format.duration),
|
|
bitrate: this.parseInt(results.format.bit_rate),
|
|
},
|
|
videoStreams: results.streams
|
|
.filter((stream) => stream.codec_type === 'video' && !stream.disposition?.attached_pic)
|
|
.sort((a, b) => this.compareStreams(a, b))
|
|
.map((stream) => {
|
|
const height = this.parseInt(stream.height);
|
|
const dar = this.getDar(stream.display_aspect_ratio);
|
|
return {
|
|
index: stream.index,
|
|
height,
|
|
width: dar ? Math.round(height * dar) : this.parseInt(stream.width),
|
|
codecName: stream.codec_name === 'h265' ? 'hevc' : stream.codec_name,
|
|
profile: this.parseVideoProfile(stream.codec_name, stream.profile as string | undefined),
|
|
level: this.parseOptionalInt(stream.level),
|
|
frameCount: this.parseInt(options?.countFrames ? stream.nb_read_packets : stream.nb_frames),
|
|
frameRate: this.parseFrameRate(stream.avg_frame_rate ?? stream.r_frame_rate),
|
|
timeBase: this.parseRational(stream.time_base)?.den,
|
|
rotation: this.parseInt(stream.rotation),
|
|
bitrate: this.parseInt(stream.bit_rate),
|
|
pixelFormat: stream.pix_fmt || 'yuv420p',
|
|
colorPrimaries: this.parseEnum(ColorPrimaries, stream.color_primaries) ?? ColorPrimaries.Unknown,
|
|
colorMatrix: this.parseEnum(ColorMatrix, stream.color_space) ?? ColorMatrix.Unknown,
|
|
colorTransfer: this.parseEnum(ColorTransfer, stream.color_transfer) ?? ColorTransfer.Unknown,
|
|
dvProfile: this.parseOptionalInt(stream.dv_profile) as DvProfile | undefined,
|
|
dvLevel: this.parseOptionalInt(stream.dv_level),
|
|
dvBlSignalCompatibilityId: this.parseOptionalInt(stream.dv_bl_signal_compatibility_id) as
|
|
| DvSignalCompatibility
|
|
| undefined,
|
|
};
|
|
}),
|
|
audioStreams: results.streams
|
|
.filter((stream) => stream.codec_type === 'audio')
|
|
.sort((a, b) => this.compareStreams(a, b))
|
|
.map((stream) => ({
|
|
index: stream.index,
|
|
codecName: stream.codec_name,
|
|
profile:
|
|
stream.codec_name === 'aac' ? this.parseEnum(AacProfile, stream.profile as string | undefined) : undefined,
|
|
bitrate: this.parseInt(stream.bit_rate),
|
|
})),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Needed for accurate segments, especially when remuxing, seeking and/or VFR is involved.
|
|
* Scanning packets for keyframes in JS is much faster than -skip_frame nokey since it avoids decoding the video.
|
|
*/
|
|
async probePackets(input: string, streamIndex: number): Promise<VideoPacketInfo | null> {
|
|
const { stdout } = await execFile('ffprobe', [
|
|
'-v',
|
|
'error',
|
|
'-select_streams',
|
|
String(streamIndex),
|
|
'-show_entries',
|
|
'packet=pts,duration,flags',
|
|
'-of',
|
|
'csv=p=0',
|
|
input,
|
|
]);
|
|
|
|
let totalDuration = 0;
|
|
const keyframePts: number[] = [];
|
|
const keyframeAccDuration: number[] = [];
|
|
const keyframeOwnDuration: number[] = [];
|
|
const postDiscard: { pts: number; duration: number }[] = [];
|
|
for (const line of stdout.split('\n')) {
|
|
if (!line) {
|
|
continue;
|
|
}
|
|
const [ptsStr, durationStr, flags] = line.split(',');
|
|
const pts = Number.parseInt(ptsStr);
|
|
const duration = Number.parseInt(durationStr);
|
|
if (Number.isNaN(pts) || Number.isNaN(duration)) {
|
|
continue;
|
|
}
|
|
// Discarded packets don't contribute to packet count, but still contribute to video duration
|
|
totalDuration += duration;
|
|
if (flags[1] !== 'D') {
|
|
postDiscard.push({ pts, duration });
|
|
}
|
|
if (flags[0] === 'K') {
|
|
keyframePts.push(pts);
|
|
keyframeAccDuration.push(totalDuration);
|
|
// VFR content can have variable duration keyframes,
|
|
// so we need to track their duration separately for accurate segment boundaries.
|
|
// Non-keyframes are accounted for in totalDuration.
|
|
keyframeOwnDuration.push(duration);
|
|
}
|
|
}
|
|
|
|
if (postDiscard.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
totalDuration,
|
|
packetCount: postDiscard.length,
|
|
outputFrames: this.cfrOutputFrames(postDiscard, postDiscard.length / totalDuration),
|
|
keyframePts,
|
|
keyframeAccDuration,
|
|
keyframeOwnDuration,
|
|
};
|
|
}
|
|
|
|
transcode(input: string, output: string | Writable, options: TranscodeCommand): Promise<void> {
|
|
if (!options.twoPass) {
|
|
return new Promise((resolve, reject) => {
|
|
this.configureFfmpegCall(input, output, options)
|
|
.on('error', reject)
|
|
.on('end', () => resolve())
|
|
.run();
|
|
});
|
|
}
|
|
|
|
if (typeof output !== 'string') {
|
|
throw new TypeError('Two-pass transcoding does not support writing to a stream');
|
|
}
|
|
|
|
// two-pass allows for precise control of bitrate at the cost of running twice
|
|
// recommended for vp9 for better quality and compression
|
|
return new Promise((resolve, reject) => {
|
|
// first pass output is not saved as only the .log file is needed
|
|
this.configureFfmpegCall(input, '/dev/null', options)
|
|
.addOptions('-pass', '1')
|
|
.addOptions('-passlogfile', output)
|
|
.addOptions('-f null')
|
|
.on('error', reject)
|
|
.on('end', () => {
|
|
// second pass
|
|
this.configureFfmpegCall(input, output, options)
|
|
.addOptions('-pass', '2')
|
|
.addOptions('-passlogfile', output)
|
|
.on('error', reject)
|
|
.on('end', () => handlePromiseError(fs.unlink(`${output}-0.log`), this.logger))
|
|
.on('end', () => handlePromiseError(fs.rm(`${output}-0.log.mbtree`, { force: true }), this.logger))
|
|
.on('end', () => resolve())
|
|
.run();
|
|
})
|
|
.run();
|
|
});
|
|
}
|
|
|
|
async getImageMetadata(input: string | Buffer): Promise<ImageDimensions & { isTransparent: boolean }> {
|
|
const { width = 0, height = 0, hasAlpha = false } = await sharp(input).metadata();
|
|
return { width, height, isTransparent: hasAlpha };
|
|
}
|
|
|
|
private configureFfmpegCall(input: string, output: string | Writable, options: TranscodeCommand) {
|
|
const ffmpegCall = ffmpeg(input, { niceness: 10 })
|
|
.inputOptions(options.inputOptions)
|
|
.outputOptions(options.outputOptions)
|
|
.output(output)
|
|
.on('start', (command: string) => this.logger.debug(command))
|
|
.on('error', (error, _, stderr) => this.logger.error(stderr || error));
|
|
|
|
const { frameCount, percentInterval } = options.progress;
|
|
const frameInterval = Math.ceil(frameCount / (100 / percentInterval));
|
|
if (this.logger.isLevelEnabled(LogLevel.Debug) && frameCount && frameInterval) {
|
|
let lastProgressFrame: number = 0;
|
|
ffmpegCall.on('progress', (progress: ProgressEvent) => {
|
|
if (progress.frames - lastProgressFrame < frameInterval) {
|
|
return;
|
|
}
|
|
|
|
lastProgressFrame = progress.frames;
|
|
const percent = ((progress.frames / frameCount) * 100).toFixed(2);
|
|
const ms = progress.currentFps ? Math.floor((frameCount - progress.frames) / progress.currentFps) * 1000 : 0;
|
|
const duration = ms ? Duration.fromMillis(ms).rescale().toHuman({ unitDisplay: 'narrow' }) : '';
|
|
const outputText = output instanceof Writable ? 'stream' : output.split('/').pop();
|
|
this.logger.debug(
|
|
`Transcoding ${percent}% done${duration ? `, estimated ${duration} remaining` : ''} for output ${outputText}`,
|
|
);
|
|
});
|
|
}
|
|
|
|
return ffmpegCall;
|
|
}
|
|
|
|
private parseInt(value: string | number | undefined): number {
|
|
return Number.parseInt(value as string) || 0;
|
|
}
|
|
|
|
private parseFloat(value: string | number | undefined): number {
|
|
return Number.parseFloat(value as string) || 0;
|
|
}
|
|
|
|
private parseOptionalInt(value: string | number | undefined): number | undefined {
|
|
const parsed = Number.parseInt(value as string);
|
|
return Number.isNaN(parsed) ? undefined : parsed;
|
|
}
|
|
|
|
private parseEnum<E extends Record<string, number | string>>(enumObj: E, value?: string) {
|
|
return value ? (enumObj[pascalCase(value)] as Extract<E[keyof E], number> | undefined) : undefined;
|
|
}
|
|
|
|
/** Parse a rational like "60000/1001" or "1/600" into `{ num, den }`. */
|
|
private parseRational(value: string | undefined): { num: number; den: number } | undefined {
|
|
if (!value) {
|
|
return;
|
|
}
|
|
const [num, den = 1] = value.split('/').map(Number);
|
|
if (num && den) {
|
|
return { num, den };
|
|
}
|
|
}
|
|
|
|
private parseFrameRate(value: string | undefined): number | undefined {
|
|
const r = this.parseRational(value);
|
|
return r ? r.num / r.den : undefined;
|
|
}
|
|
|
|
private getDar(dar: string | undefined): number {
|
|
if (dar) {
|
|
const [darW, darH] = dar.split(':').map(Number);
|
|
if (darW && darH) {
|
|
return darW / darH;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
private parseVideoProfile(codec?: string, profile?: string) {
|
|
switch (codec) {
|
|
case 'h264': {
|
|
return this.parseEnum(H264Profile, profile);
|
|
}
|
|
case 'h265':
|
|
case 'hevc': {
|
|
return this.parseEnum(HevcProfile, profile);
|
|
}
|
|
case 'av1': {
|
|
return this.parseEnum(Av1Profile, profile);
|
|
}
|
|
}
|
|
}
|
|
|
|
private compareStreams(a: FfprobeStream, b: FfprobeStream): number {
|
|
const d = (b.disposition?.default ?? 0) - (a.disposition?.default ?? 0);
|
|
if (d !== 0) {
|
|
return d;
|
|
}
|
|
return this.parseInt(b.bit_rate) - this.parseInt(a.bit_rate);
|
|
}
|
|
|
|
private cfrOutputFrames(packets: { pts: number; duration: number }[], slotsPerTick: number) {
|
|
// Packets may be out of PTS order due to B-frames
|
|
packets.sort((a, b) => a.pts - b.pts);
|
|
const firstPts = packets[0].pts;
|
|
let outputFrames = 0;
|
|
let nextPts = 0;
|
|
for (const pkt of packets) {
|
|
const delta = (pkt.pts - firstPts) * slotsPerTick - nextPts + pkt.duration * slotsPerTick;
|
|
const nb = delta < -1.1 ? 0 : delta > 1.1 ? Math.round(delta) : 1;
|
|
outputFrames += nb;
|
|
nextPts += nb;
|
|
}
|
|
return outputFrames;
|
|
}
|
|
}
|