gpu: nova-core: add PIO support for loading firmware images

Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.

Signed-off-by: Timur Tabi <ttabi@nvidia.com>
Co-developed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260306-turing_prep-v11-6-8f0042c5d026@nvidia.com
master
Timur Tabi 2026-03-06 13:52:43 +09:00 committed by Alexandre Courbot
parent 9725005e2b
commit c1d2f7471b
3 changed files with 251 additions and 3 deletions

View File

@ -367,6 +367,127 @@ pub(crate) trait FalconDmaLoadable {
/// Returns the load parameters for `DMEM`.
fn dmem_load_params(&self) -> FalconDmaLoadTarget;
/// Returns an adapter that provides the required parameter to load this firmware using PIO.
///
/// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in
/// the headers are invalid.
fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> {
let new_pio_imem = |params: FalconDmaLoadTarget, secure| {
let start = usize::from_safe_cast(params.src_start);
let end = start + usize::from_safe_cast(params.len);
let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
Ok::<_, Error>(FalconPioImemLoadTarget {
data,
dst_start,
secure,
start_tag: dst_start >> 8,
})
};
let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?;
let imem_ns = if let Some(params) = self.imem_ns_load_params() {
Some(new_pio_imem(params, false)?)
} else {
None
};
let dmem = {
let params = self.dmem_load_params();
let start = usize::from_safe_cast(params.src_start);
let end = start + usize::from_safe_cast(params.len);
let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
FalconPioDmemLoadTarget { data, dst_start }
};
Ok(FalconDmaFirmwarePioAdapter {
fw: self,
imem_sec,
imem_ns,
dmem,
})
}
}
/// Represents a portion of the firmware to be loaded into IMEM using PIO.
#[derive(Clone)]
pub(crate) struct FalconPioImemLoadTarget<'a> {
pub(crate) data: &'a [u8],
pub(crate) dst_start: u16,
pub(crate) secure: bool,
pub(crate) start_tag: u16,
}
/// Represents a portion of the firmware to be loaded into DMEM using PIO.
#[derive(Clone)]
pub(crate) struct FalconPioDmemLoadTarget<'a> {
pub(crate) data: &'a [u8],
pub(crate) dst_start: u16,
}
/// Trait for providing PIO load parameters of falcon firmwares.
pub(crate) trait FalconPioLoadable {
/// Returns the load parameters for Secure `IMEM`, if any.
fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
/// Returns the load parameters for Non-Secure `IMEM`, if any.
fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
/// Returns the load parameters for `DMEM`.
fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>;
}
/// Adapter type that makes any DMA-loadable firmware also loadable via PIO.
///
/// Created using [`FalconDmaLoadable::try_as_pio_loadable`].
pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> {
/// Reference to the DMA firmware.
fw: &'a T,
/// Validated secure IMEM parameters.
imem_sec: FalconPioImemLoadTarget<'a>,
/// Validated non-secure IMEM parameters.
imem_ns: Option<FalconPioImemLoadTarget<'a>>,
/// Validated DMEM parameters.
dmem: FalconPioDmemLoadTarget<'a>,
}
impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T>
where
T: FalconDmaLoadable + ?Sized,
{
fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
Some(self.imem_sec.clone())
}
fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
self.imem_ns.clone()
}
fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> {
self.dmem.clone()
}
}
impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T>
where
T: FalconDmaLoadable + FalconFirmware + ?Sized,
{
type Target = <T as FalconFirmware>::Target;
fn brom_params(&self) -> FalconBromParams {
self.fw.brom_params()
}
fn boot_addr(&self) -> u32 {
self.fw.boot_addr()
}
}
/// Trait for a falcon firmware.
@ -417,6 +538,98 @@ impl<E: FalconEngine + 'static> Falcon<E> {
Ok(())
}
/// Falcons supports up to four ports, but we only ever use one, so just hard-code it.
const PIO_PORT: usize = 0;
/// Write a slice to Falcon IMEM memory using programmed I/O (PIO).
///
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result {
// Rejecting misaligned images here allows us to avoid checking
// inside the loops.
if load_offsets.data.len() % 4 != 0 {
return Err(EINVAL);
}
regs::NV_PFALCON_FALCON_IMEMC::default()
.set_secure(load_offsets.secure)
.set_aincw(true)
.set_offs(load_offsets.dst_start)
.write(bar, &E::ID, Self::PIO_PORT);
for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() {
let n = u16::try_from(n)?;
let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?;
regs::NV_PFALCON_FALCON_IMEMT::default().set_tag(tag).write(
bar,
&E::ID,
Self::PIO_PORT,
);
for word in block.chunks_exact(4) {
let w = [word[0], word[1], word[2], word[3]];
regs::NV_PFALCON_FALCON_IMEMD::default()
.set_data(u32::from_le_bytes(w))
.write(bar, &E::ID, Self::PIO_PORT);
}
}
Ok(())
}
/// Write a slice to Falcon DMEM memory using programmed I/O (PIO).
///
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result {
// Rejecting misaligned images here allows us to avoid checking
// inside the loops.
if load_offsets.data.len() % 4 != 0 {
return Err(EINVAL);
}
regs::NV_PFALCON_FALCON_DMEMC::default()
.set_aincw(true)
.set_offs(load_offsets.dst_start)
.write(bar, &E::ID, Self::PIO_PORT);
for word in load_offsets.data.chunks_exact(4) {
let w = [word[0], word[1], word[2], word[3]];
regs::NV_PFALCON_FALCON_DMEMD::default()
.set_data(u32::from_le_bytes(w))
.write(bar, &E::ID, Self::PIO_PORT);
}
Ok(())
}
/// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>(
&self,
bar: &Bar0,
fw: &F,
) -> Result {
regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
.set_allow_phys_no_ctx(true)
.write(bar, &E::ID);
regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
if let Some(imem_ns) = fw.imem_ns_load_params() {
self.pio_wr_imem_slice(bar, imem_ns)?;
}
if let Some(imem_sec) = fw.imem_sec_load_params() {
self.pio_wr_imem_slice(bar, imem_sec)?;
}
self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?;
self.hal.program_brom(self, bar, &fw.brom_params())?;
regs::NV_PFALCON_FALCON_BOOTVEC::default()
.set_value(fw.boot_addr())
.write(bar, &E::ID);
Ok(())
}
/// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's
/// `target_mem`.
///
@ -659,7 +872,8 @@ impl<E: FalconEngine + 'static> Falcon<E> {
self.hal.is_riscv_active(bar)
}
// Load a firmware image into Falcon memory
/// Load a firmware image into Falcon memory, using the preferred method for the current
/// chipset.
pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
&self,
dev: &Device<device::Bound>,
@ -668,7 +882,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
) -> Result {
match self.hal.load_method() {
LoadMethod::Dma => self.dma_load(dev, bar, fw),
LoadMethod::Pio => Err(ENOTSUPP),
LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?),
}
}

View File

@ -58,7 +58,11 @@ pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
/// Reset the falcon engine.
fn reset_eng(&self, bar: &Bar0) -> Result;
/// returns the method needed to load data into Falcon memory
/// Returns the method used to load data into the falcon's memory.
///
/// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for
/// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the
/// only usable method.
fn load_method(&self) -> LoadMethod;
}

View File

@ -364,6 +364,36 @@ register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] {
1:1 startcpu as bool;
});
// IMEM access control register. Up to 4 ports are available for IMEM access.
register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] {
15:0 offs as u16, "IMEM block and word offset";
24:24 aincw as bool, "Auto-increment on write";
28:28 secure as bool, "Access secure IMEM";
});
// IMEM data register. Reading/writing this register accesses IMEM at the address
// specified by the corresponding IMEMC register.
register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] {
31:0 data as u32;
});
// IMEM tag register. Used to set the tag for the current IMEM block.
register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] {
15:0 tag as u16;
});
// DMEM access control register. Up to 8 ports are available for DMEM access.
register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] {
15:0 offs as u16, "DMEM block and word offset";
24:24 aincw as bool, "Auto-increment on write";
});
// DMEM data register. Reading/writing this register accesses DMEM at the address
// specified by the corresponding DMEMC register.
register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] {
31:0 data as u32;
});
// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon
// instance.
register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {