mirror-immich/server/src/repositories/media.repository.ts

526 lines
19 KiB
TypeScript

import { Injectable } from '@nestjs/common';
import { ExifDateTime, exiftool, WriteTags } from 'exiftool-vendored';
import ffmpeg, { FfprobeData, FfprobeStream } from 'fluent-ffmpeg';
import _ from 'lodash';
import { Duration } from 'luxon';
import { execFile as execFileCb } from 'node:child_process';
import fs from 'node:fs/promises';
import { Writable } from 'node:stream';
import { promisify } from 'node:util';
import sharp from 'sharp';
import { ORIENTATION_TO_SHARP_ROTATION } from 'src/constants';
import { Exif } from 'src/database';
import { AssetEditActionItem } from 'src/dtos/editing.dto';
import {
AacProfile,
Av1Profile,
ColorMatrix,
ColorPrimaries,
Colorspace,
ColorTransfer,
DvProfile,
DvSignalCompatibility,
H264Profile,
HevcProfile,
LogLevel,
RawExtractedFormat,
} from 'src/enum';
import { LoggingRepository } from 'src/repositories/logging.repository';
import {
DecodeToBufferOptions,
GenerateThumbhashOptions,
GenerateThumbnailOptions,
ImageDimensions,
ProbeOptions,
TranscodeCommand,
VideoInfo,
VideoPacketInfo,
} from 'src/types';
import { handlePromiseError } from 'src/utils/misc';
import { createAffineMatrix } from 'src/utils/transform';
const probe = (input: string, options: string[]): Promise<FfprobeData> =>
new Promise((resolve, reject) =>
ffmpeg.ffprobe(input, options, (error, data) => (error ? reject(error) : resolve(data))),
);
const execFile = promisify(execFileCb);
sharp.concurrency(0);
sharp.cache({ files: 0 });
const pascalCase = (str: string) => _.upperFirst(_.camelCase(str.toLowerCase()));
type ProgressEvent = {
frames: number;
currentFps: number;
currentKbps: number;
targetSize: number;
timemark: string;
percent?: number;
};
export type ExtractResult = {
buffer: Buffer;
format: RawExtractedFormat;
};
@Injectable()
export class MediaRepository {
constructor(private logger: LoggingRepository) {
this.logger.setContext(MediaRepository.name);
}
/**
*
* @param input file path to the input image
* @returns ExtractResult if succeeded, or null if failed
*/
async extract(input: string): Promise<ExtractResult | null> {
try {
const buffer = await exiftool.extractBinaryTagToBuffer('JpgFromRaw2', input);
return { buffer, format: RawExtractedFormat.Jpeg };
} catch (error: any) {
this.logger.debug(`Could not extract JpgFromRaw2 buffer from image, trying JPEG from RAW next: ${error}`);
}
try {
const buffer = await exiftool.extractBinaryTagToBuffer('JpgFromRaw', input);
return { buffer, format: RawExtractedFormat.Jpeg };
} catch (error: any) {
this.logger.debug(`Could not extract JPEG buffer from image, trying PreviewJXL next: ${error}`);
}
try {
const buffer = await exiftool.extractBinaryTagToBuffer('PreviewJXL', input);
return { buffer, format: RawExtractedFormat.Jxl };
} catch (error: any) {
this.logger.debug(`Could not extract PreviewJXL buffer from image, trying PreviewImage next: ${error}`);
}
try {
const buffer = await exiftool.extractBinaryTagToBuffer('PreviewImage', input);
return { buffer, format: RawExtractedFormat.Jpeg };
} catch (error: any) {
this.logger.debug(`Could not extract preview buffer from image: ${error}`);
return null;
}
}
async writeExif(tags: Partial<Exif>, output: string): Promise<boolean> {
try {
const tagsToWrite: WriteTags = {
ExifImageWidth: tags.exifImageWidth,
ExifImageHeight: tags.exifImageHeight,
DateTimeOriginal: tags.dateTimeOriginal && ExifDateTime.fromMillis(tags.dateTimeOriginal.getTime()),
ModifyDate: tags.modifyDate && ExifDateTime.fromMillis(tags.modifyDate.getTime()),
TimeZone: tags.timeZone,
GPSLatitude: tags.latitude,
GPSLongitude: tags.longitude,
ProjectionType: tags.projectionType,
City: tags.city,
Country: tags.country,
Make: tags.make,
Model: tags.model,
LensModel: tags.lensModel,
Fnumber: tags.fNumber?.toFixed(1),
FocalLength: tags.focalLength?.toFixed(1),
ISO: tags.iso,
ExposureTime: tags.exposureTime,
ProfileDescription: tags.profileDescription,
ColorSpace: tags.colorspace,
Rating: tags.rating === null ? 0 : tags.rating,
// specially convert Orientation to numeric Orientation# for exiftool
'Orientation#': tags.orientation ? Number(tags.orientation) : undefined,
};
await exiftool.write(output, tagsToWrite, {
ignoreMinorErrors: true,
writeArgs: ['-overwrite_original'],
});
return true;
} catch (error: any) {
this.logger.warn(`Could not write exif data to image: ${error.message}`);
return false;
}
}
async copyTagGroup(tagGroup: string, source: string, target: string): Promise<boolean> {
try {
await exiftool.write(
target,
{},
{
ignoreMinorErrors: true,
writeArgs: ['-TagsFromFile', source, `-${tagGroup}:all>${tagGroup}:all`, '-overwrite_original'],
},
);
return true;
} catch (error: any) {
this.logger.warn(`Could not copy tag data to image: ${error.message}`);
return false;
}
}
async decodeImage(input: string | Buffer, options: DecodeToBufferOptions) {
const pipeline = await this.getImageDecodingPipeline(input, options);
return pipeline.raw().toBuffer({ resolveWithObject: true });
}
private async applyEdits(pipeline: sharp.Sharp, edits: AssetEditActionItem[]): Promise<sharp.Sharp> {
const affineEditOperations = edits.filter((edit) => edit.action !== 'crop');
const matrix = createAffineMatrix(affineEditOperations);
const crop = edits.find((edit) => edit.action === 'crop');
const dimensions = await pipeline.metadata();
if (crop) {
pipeline = pipeline.extract({
left: crop ? Math.round(crop.parameters.x) : 0,
top: crop ? Math.round(crop.parameters.y) : 0,
width: crop ? Math.round(crop.parameters.width) : dimensions.width || 0,
height: crop ? Math.round(crop.parameters.height) : dimensions.height || 0,
});
}
const { a, b, c, d } = matrix;
pipeline = pipeline.affine([
[a, b],
[c, d],
]);
return pipeline;
}
async generateThumbnail(input: string | Buffer, options: GenerateThumbnailOptions, output: string): Promise<void> {
const pipeline = await this.getImageDecodingPipeline(input, options);
const decoded = pipeline.toFormat(options.format, {
quality: options.quality,
// this is default in libvips (except the threshold is 90), but we need to set it manually in sharp
chromaSubsampling: options.quality >= 80 ? '4:4:4' : '4:2:0',
progressive: options.progressive,
});
await decoded.toFile(output);
}
private async getImageDecodingPipeline(input: string | Buffer, options: DecodeToBufferOptions) {
let pipeline = sharp(input, {
// some invalid images can still be processed by sharp, but we want to fail on them by default to avoid crashes
failOn: options.processInvalidImages ? 'none' : 'error',
limitInputPixels: false,
raw: options.raw,
unlimited: true,
})
.pipelineColorspace(options.colorspace === Colorspace.Srgb ? 'srgb' : 'rgb16')
.withIccProfile(options.colorspace);
if (!options.raw) {
const { angle, flip, flop } = options.orientation ? ORIENTATION_TO_SHARP_ROTATION[options.orientation] : {};
pipeline = pipeline.rotate(angle);
if (flip) {
pipeline = pipeline.flip();
}
if (flop) {
pipeline = pipeline.flop();
}
}
if (options.edits && options.edits.length > 0) {
pipeline = await this.applyEdits(pipeline, options.edits);
}
if (options.size !== undefined) {
pipeline = pipeline.resize(options.size, options.size, { fit: 'outside', withoutEnlargement: true });
}
return pipeline;
}
async generateThumbhash(input: string | Buffer, options: GenerateThumbhashOptions): Promise<Buffer> {
const [{ rgbaToThumbHash }, decodingPipeline] = await Promise.all([
import('thumbhash'),
this.getImageDecodingPipeline(input, {
colorspace: options.colorspace,
processInvalidImages: options.processInvalidImages,
raw: options.raw,
edits: options.edits,
}),
]);
const pipeline = decodingPipeline.resize(100, 100, { fit: 'inside', withoutEnlargement: true }).raw().ensureAlpha();
const { data, info } = await pipeline.toBuffer({ resolveWithObject: true });
return Buffer.from(rgbaToThumbHash(info.width, info.height, data));
}
async probe(input: string, options?: ProbeOptions): Promise<VideoInfo> {
const results = await probe(input, options?.countFrames ? ['-count_packets'] : []); // gets frame count quickly: https://stackoverflow.com/a/28376817
return {
format: {
formatName: results.format.format_name,
formatLongName: results.format.format_long_name,
duration: this.parseFloat(results.format.duration),
bitrate: this.parseInt(results.format.bit_rate),
},
videoStreams: results.streams
.filter((stream) => stream.codec_type === 'video' && !stream.disposition?.attached_pic)
.sort((a, b) => this.compareStreams(a, b))
.map((stream) => {
const height = this.parseInt(stream.height);
const dar = this.getDar(stream.display_aspect_ratio);
return {
index: stream.index,
height,
width: dar ? Math.round(height * dar) : this.parseInt(stream.width),
codecName: stream.codec_name === 'h265' ? 'hevc' : stream.codec_name,
profile: this.parseVideoProfile(stream.codec_name, stream.profile as string | undefined),
level: this.parseOptionalInt(stream.level),
frameCount: this.parseInt(options?.countFrames ? stream.nb_read_packets : stream.nb_frames),
frameRate: this.parseFrameRate(stream.avg_frame_rate ?? stream.r_frame_rate),
timeBase: this.parseRational(stream.time_base)?.den,
rotation: this.parseInt(stream.rotation),
bitrate: this.parseInt(stream.bit_rate),
pixelFormat: stream.pix_fmt || 'yuv420p',
colorPrimaries: this.parseEnum(ColorPrimaries, stream.color_primaries) ?? ColorPrimaries.Unknown,
colorMatrix: this.parseEnum(ColorMatrix, stream.color_space) ?? ColorMatrix.Unknown,
colorTransfer: this.parseEnum(ColorTransfer, stream.color_transfer) ?? ColorTransfer.Unknown,
dvProfile: this.parseOptionalInt(stream.dv_profile) as DvProfile | undefined,
dvLevel: this.parseOptionalInt(stream.dv_level),
dvBlSignalCompatibilityId: this.parseOptionalInt(stream.dv_bl_signal_compatibility_id) as
| DvSignalCompatibility
| undefined,
};
}),
audioStreams: results.streams
.filter((stream) => stream.codec_type === 'audio')
.sort((a, b) => this.compareStreams(a, b))
.map((stream) => ({
index: stream.index,
codecName: stream.codec_name,
profile:
stream.codec_name === 'aac' ? this.parseEnum(AacProfile, stream.profile as string | undefined) : undefined,
bitrate: this.parseInt(stream.bit_rate),
})),
};
}
/**
* Needed for accurate segments, especially when remuxing, seeking and/or VFR is involved.
* Scanning packets for keyframes in JS is much faster than -skip_frame nokey since it avoids decoding the video.
*/
async probePackets(input: string, streamIndex: number): Promise<VideoPacketInfo | null> {
const { stdout } = await execFile('ffprobe', [
'-v',
'error',
'-select_streams',
String(streamIndex),
'-show_entries',
'packet=pts,duration,flags',
'-of',
'csv=p=0',
input,
]);
let totalDuration = 0;
const keyframePts: number[] = [];
const keyframeAccDuration: number[] = [];
const keyframeOwnDuration: number[] = [];
const postDiscard: { pts: number; duration: number }[] = [];
for (const line of stdout.split('\n')) {
if (!line) {
continue;
}
const [ptsStr, durationStr, flags] = line.split(',');
const pts = Number.parseInt(ptsStr);
const duration = Number.parseInt(durationStr);
if (Number.isNaN(pts) || Number.isNaN(duration)) {
continue;
}
// Discarded packets don't contribute to packet count, but still contribute to video duration
totalDuration += duration;
if (flags[1] !== 'D') {
postDiscard.push({ pts, duration });
}
if (flags[0] === 'K') {
keyframePts.push(pts);
keyframeAccDuration.push(totalDuration);
// VFR content can have variable duration keyframes,
// so we need to track their duration separately for accurate segment boundaries.
// Non-keyframes are accounted for in totalDuration.
keyframeOwnDuration.push(duration);
}
}
if (postDiscard.length === 0) {
return null;
}
return {
totalDuration,
packetCount: postDiscard.length,
outputFrames: this.cfrOutputFrames(postDiscard, postDiscard.length / totalDuration),
keyframePts,
keyframeAccDuration,
keyframeOwnDuration,
};
}
transcode(input: string, output: string | Writable, options: TranscodeCommand): Promise<void> {
if (!options.twoPass) {
return new Promise((resolve, reject) => {
this.configureFfmpegCall(input, output, options)
.on('error', reject)
.on('end', () => resolve())
.run();
});
}
if (typeof output !== 'string') {
throw new TypeError('Two-pass transcoding does not support writing to a stream');
}
// two-pass allows for precise control of bitrate at the cost of running twice
// recommended for vp9 for better quality and compression
return new Promise((resolve, reject) => {
// first pass output is not saved as only the .log file is needed
this.configureFfmpegCall(input, '/dev/null', options)
.addOptions('-pass', '1')
.addOptions('-passlogfile', output)
.addOptions('-f null')
.on('error', reject)
.on('end', () => {
// second pass
this.configureFfmpegCall(input, output, options)
.addOptions('-pass', '2')
.addOptions('-passlogfile', output)
.on('error', reject)
.on('end', () => handlePromiseError(fs.unlink(`${output}-0.log`), this.logger))
.on('end', () => handlePromiseError(fs.rm(`${output}-0.log.mbtree`, { force: true }), this.logger))
.on('end', () => resolve())
.run();
})
.run();
});
}
async getImageMetadata(input: string | Buffer): Promise<ImageDimensions & { isTransparent: boolean }> {
const { width = 0, height = 0, hasAlpha = false } = await sharp(input).metadata();
return { width, height, isTransparent: hasAlpha };
}
private configureFfmpegCall(input: string, output: string | Writable, options: TranscodeCommand) {
const ffmpegCall = ffmpeg(input, { niceness: 10 })
.inputOptions(options.inputOptions)
.outputOptions(options.outputOptions)
.output(output)
.on('start', (command: string) => this.logger.debug(command))
.on('error', (error, _, stderr) => this.logger.error(stderr || error));
const { frameCount, percentInterval } = options.progress;
const frameInterval = Math.ceil(frameCount / (100 / percentInterval));
if (this.logger.isLevelEnabled(LogLevel.Debug) && frameCount && frameInterval) {
let lastProgressFrame: number = 0;
ffmpegCall.on('progress', (progress: ProgressEvent) => {
if (progress.frames - lastProgressFrame < frameInterval) {
return;
}
lastProgressFrame = progress.frames;
const percent = ((progress.frames / frameCount) * 100).toFixed(2);
const ms = progress.currentFps ? Math.floor((frameCount - progress.frames) / progress.currentFps) * 1000 : 0;
const duration = ms ? Duration.fromMillis(ms).rescale().toHuman({ unitDisplay: 'narrow' }) : '';
const outputText = output instanceof Writable ? 'stream' : output.split('/').pop();
this.logger.debug(
`Transcoding ${percent}% done${duration ? `, estimated ${duration} remaining` : ''} for output ${outputText}`,
);
});
}
return ffmpegCall;
}
private parseInt(value: string | number | undefined): number {
return Number.parseInt(value as string) || 0;
}
private parseFloat(value: string | number | undefined): number {
return Number.parseFloat(value as string) || 0;
}
private parseOptionalInt(value: string | number | undefined): number | undefined {
const parsed = Number.parseInt(value as string);
return Number.isNaN(parsed) ? undefined : parsed;
}
private parseEnum<E extends Record<string, number | string>>(enumObj: E, value?: string) {
return value ? (enumObj[pascalCase(value)] as Extract<E[keyof E], number> | undefined) : undefined;
}
/** Parse a rational like "60000/1001" or "1/600" into `{ num, den }`. */
private parseRational(value: string | undefined): { num: number; den: number } | undefined {
if (!value) {
return;
}
const [num, den = 1] = value.split('/').map(Number);
if (num && den) {
return { num, den };
}
}
private parseFrameRate(value: string | undefined): number | undefined {
const r = this.parseRational(value);
return r ? r.num / r.den : undefined;
}
private getDar(dar: string | undefined): number {
if (dar) {
const [darW, darH] = dar.split(':').map(Number);
if (darW && darH) {
return darW / darH;
}
}
return 0;
}
private parseVideoProfile(codec?: string, profile?: string) {
switch (codec) {
case 'h264': {
return this.parseEnum(H264Profile, profile);
}
case 'h265':
case 'hevc': {
return this.parseEnum(HevcProfile, profile);
}
case 'av1': {
return this.parseEnum(Av1Profile, profile);
}
}
}
private compareStreams(a: FfprobeStream, b: FfprobeStream): number {
const d = (b.disposition?.default ?? 0) - (a.disposition?.default ?? 0);
if (d !== 0) {
return d;
}
return this.parseInt(b.bit_rate) - this.parseInt(a.bit_rate);
}
private cfrOutputFrames(packets: { pts: number; duration: number }[], slotsPerTick: number) {
// Packets may be out of PTS order due to B-frames
packets.sort((a, b) => a.pts - b.pts);
const firstPts = packets[0].pts;
let outputFrames = 0;
let nextPts = 0;
for (const pkt of packets) {
const delta = (pkt.pts - firstPts) * slotsPerTick - nextPts + pkt.duration * slotsPerTick;
const nb = delta < -1.1 ? 0 : delta > 1.1 ? Math.round(delta) : 1;
outputFrames += nb;
nextPts += nb;
}
return outputFrames;
}
}