gpu: nova-core: add PIO support for loading firmware images
Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory. Signed-off-by: Timur Tabi <ttabi@nvidia.com> Co-developed-by: Alexandre Courbot <acourbot@nvidia.com> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Acked-by: Danilo Krummrich <dakr@kernel.org> Link: https://patch.msgid.link/20260306-turing_prep-v11-6-8f0042c5d026@nvidia.commaster
parent
9725005e2b
commit
c1d2f7471b
|
|
@ -367,6 +367,127 @@ pub(crate) trait FalconDmaLoadable {
|
|||
|
||||
/// Returns the load parameters for `DMEM`.
|
||||
fn dmem_load_params(&self) -> FalconDmaLoadTarget;
|
||||
|
||||
/// Returns an adapter that provides the required parameter to load this firmware using PIO.
|
||||
///
|
||||
/// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in
|
||||
/// the headers are invalid.
|
||||
fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> {
|
||||
let new_pio_imem = |params: FalconDmaLoadTarget, secure| {
|
||||
let start = usize::from_safe_cast(params.src_start);
|
||||
let end = start + usize::from_safe_cast(params.len);
|
||||
let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
|
||||
|
||||
let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
|
||||
|
||||
Ok::<_, Error>(FalconPioImemLoadTarget {
|
||||
data,
|
||||
dst_start,
|
||||
secure,
|
||||
start_tag: dst_start >> 8,
|
||||
})
|
||||
};
|
||||
|
||||
let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?;
|
||||
|
||||
let imem_ns = if let Some(params) = self.imem_ns_load_params() {
|
||||
Some(new_pio_imem(params, false)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let dmem = {
|
||||
let params = self.dmem_load_params();
|
||||
let start = usize::from_safe_cast(params.src_start);
|
||||
let end = start + usize::from_safe_cast(params.len);
|
||||
let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
|
||||
|
||||
let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
|
||||
|
||||
FalconPioDmemLoadTarget { data, dst_start }
|
||||
};
|
||||
|
||||
Ok(FalconDmaFirmwarePioAdapter {
|
||||
fw: self,
|
||||
imem_sec,
|
||||
imem_ns,
|
||||
dmem,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a portion of the firmware to be loaded into IMEM using PIO.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FalconPioImemLoadTarget<'a> {
|
||||
pub(crate) data: &'a [u8],
|
||||
pub(crate) dst_start: u16,
|
||||
pub(crate) secure: bool,
|
||||
pub(crate) start_tag: u16,
|
||||
}
|
||||
|
||||
/// Represents a portion of the firmware to be loaded into DMEM using PIO.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FalconPioDmemLoadTarget<'a> {
|
||||
pub(crate) data: &'a [u8],
|
||||
pub(crate) dst_start: u16,
|
||||
}
|
||||
|
||||
/// Trait for providing PIO load parameters of falcon firmwares.
|
||||
pub(crate) trait FalconPioLoadable {
|
||||
/// Returns the load parameters for Secure `IMEM`, if any.
|
||||
fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
|
||||
|
||||
/// Returns the load parameters for Non-Secure `IMEM`, if any.
|
||||
fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
|
||||
|
||||
/// Returns the load parameters for `DMEM`.
|
||||
fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>;
|
||||
}
|
||||
|
||||
/// Adapter type that makes any DMA-loadable firmware also loadable via PIO.
|
||||
///
|
||||
/// Created using [`FalconDmaLoadable::try_as_pio_loadable`].
|
||||
pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> {
|
||||
/// Reference to the DMA firmware.
|
||||
fw: &'a T,
|
||||
/// Validated secure IMEM parameters.
|
||||
imem_sec: FalconPioImemLoadTarget<'a>,
|
||||
/// Validated non-secure IMEM parameters.
|
||||
imem_ns: Option<FalconPioImemLoadTarget<'a>>,
|
||||
/// Validated DMEM parameters.
|
||||
dmem: FalconPioDmemLoadTarget<'a>,
|
||||
}
|
||||
|
||||
impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T>
|
||||
where
|
||||
T: FalconDmaLoadable + ?Sized,
|
||||
{
|
||||
fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
|
||||
Some(self.imem_sec.clone())
|
||||
}
|
||||
|
||||
fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
|
||||
self.imem_ns.clone()
|
||||
}
|
||||
|
||||
fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> {
|
||||
self.dmem.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T>
|
||||
where
|
||||
T: FalconDmaLoadable + FalconFirmware + ?Sized,
|
||||
{
|
||||
type Target = <T as FalconFirmware>::Target;
|
||||
|
||||
fn brom_params(&self) -> FalconBromParams {
|
||||
self.fw.brom_params()
|
||||
}
|
||||
|
||||
fn boot_addr(&self) -> u32 {
|
||||
self.fw.boot_addr()
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for a falcon firmware.
|
||||
|
|
@ -417,6 +538,98 @@ impl<E: FalconEngine + 'static> Falcon<E> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Falcons supports up to four ports, but we only ever use one, so just hard-code it.
|
||||
const PIO_PORT: usize = 0;
|
||||
|
||||
/// Write a slice to Falcon IMEM memory using programmed I/O (PIO).
|
||||
///
|
||||
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
|
||||
fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result {
|
||||
// Rejecting misaligned images here allows us to avoid checking
|
||||
// inside the loops.
|
||||
if load_offsets.data.len() % 4 != 0 {
|
||||
return Err(EINVAL);
|
||||
}
|
||||
|
||||
regs::NV_PFALCON_FALCON_IMEMC::default()
|
||||
.set_secure(load_offsets.secure)
|
||||
.set_aincw(true)
|
||||
.set_offs(load_offsets.dst_start)
|
||||
.write(bar, &E::ID, Self::PIO_PORT);
|
||||
|
||||
for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() {
|
||||
let n = u16::try_from(n)?;
|
||||
let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?;
|
||||
regs::NV_PFALCON_FALCON_IMEMT::default().set_tag(tag).write(
|
||||
bar,
|
||||
&E::ID,
|
||||
Self::PIO_PORT,
|
||||
);
|
||||
for word in block.chunks_exact(4) {
|
||||
let w = [word[0], word[1], word[2], word[3]];
|
||||
regs::NV_PFALCON_FALCON_IMEMD::default()
|
||||
.set_data(u32::from_le_bytes(w))
|
||||
.write(bar, &E::ID, Self::PIO_PORT);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a slice to Falcon DMEM memory using programmed I/O (PIO).
|
||||
///
|
||||
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
|
||||
fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result {
|
||||
// Rejecting misaligned images here allows us to avoid checking
|
||||
// inside the loops.
|
||||
if load_offsets.data.len() % 4 != 0 {
|
||||
return Err(EINVAL);
|
||||
}
|
||||
|
||||
regs::NV_PFALCON_FALCON_DMEMC::default()
|
||||
.set_aincw(true)
|
||||
.set_offs(load_offsets.dst_start)
|
||||
.write(bar, &E::ID, Self::PIO_PORT);
|
||||
|
||||
for word in load_offsets.data.chunks_exact(4) {
|
||||
let w = [word[0], word[1], word[2], word[3]];
|
||||
regs::NV_PFALCON_FALCON_DMEMD::default()
|
||||
.set_data(u32::from_le_bytes(w))
|
||||
.write(bar, &E::ID, Self::PIO_PORT);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
|
||||
pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>(
|
||||
&self,
|
||||
bar: &Bar0,
|
||||
fw: &F,
|
||||
) -> Result {
|
||||
regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
|
||||
.set_allow_phys_no_ctx(true)
|
||||
.write(bar, &E::ID);
|
||||
|
||||
regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
|
||||
|
||||
if let Some(imem_ns) = fw.imem_ns_load_params() {
|
||||
self.pio_wr_imem_slice(bar, imem_ns)?;
|
||||
}
|
||||
if let Some(imem_sec) = fw.imem_sec_load_params() {
|
||||
self.pio_wr_imem_slice(bar, imem_sec)?;
|
||||
}
|
||||
self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?;
|
||||
|
||||
self.hal.program_brom(self, bar, &fw.brom_params())?;
|
||||
|
||||
regs::NV_PFALCON_FALCON_BOOTVEC::default()
|
||||
.set_value(fw.boot_addr())
|
||||
.write(bar, &E::ID);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's
|
||||
/// `target_mem`.
|
||||
///
|
||||
|
|
@ -659,7 +872,8 @@ impl<E: FalconEngine + 'static> Falcon<E> {
|
|||
self.hal.is_riscv_active(bar)
|
||||
}
|
||||
|
||||
// Load a firmware image into Falcon memory
|
||||
/// Load a firmware image into Falcon memory, using the preferred method for the current
|
||||
/// chipset.
|
||||
pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
|
||||
&self,
|
||||
dev: &Device<device::Bound>,
|
||||
|
|
@ -668,7 +882,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
|
|||
) -> Result {
|
||||
match self.hal.load_method() {
|
||||
LoadMethod::Dma => self.dma_load(dev, bar, fw),
|
||||
LoadMethod::Pio => Err(ENOTSUPP),
|
||||
LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,11 @@ pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
|
|||
/// Reset the falcon engine.
|
||||
fn reset_eng(&self, bar: &Bar0) -> Result;
|
||||
|
||||
/// returns the method needed to load data into Falcon memory
|
||||
/// Returns the method used to load data into the falcon's memory.
|
||||
///
|
||||
/// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for
|
||||
/// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the
|
||||
/// only usable method.
|
||||
fn load_method(&self) -> LoadMethod;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -364,6 +364,36 @@ register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] {
|
|||
1:1 startcpu as bool;
|
||||
});
|
||||
|
||||
// IMEM access control register. Up to 4 ports are available for IMEM access.
|
||||
register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] {
|
||||
15:0 offs as u16, "IMEM block and word offset";
|
||||
24:24 aincw as bool, "Auto-increment on write";
|
||||
28:28 secure as bool, "Access secure IMEM";
|
||||
});
|
||||
|
||||
// IMEM data register. Reading/writing this register accesses IMEM at the address
|
||||
// specified by the corresponding IMEMC register.
|
||||
register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] {
|
||||
31:0 data as u32;
|
||||
});
|
||||
|
||||
// IMEM tag register. Used to set the tag for the current IMEM block.
|
||||
register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] {
|
||||
15:0 tag as u16;
|
||||
});
|
||||
|
||||
// DMEM access control register. Up to 8 ports are available for DMEM access.
|
||||
register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] {
|
||||
15:0 offs as u16, "DMEM block and word offset";
|
||||
24:24 aincw as bool, "Auto-increment on write";
|
||||
});
|
||||
|
||||
// DMEM data register. Reading/writing this register accesses DMEM at the address
|
||||
// specified by the corresponding DMEMC register.
|
||||
register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] {
|
||||
31:0 data as u32;
|
||||
});
|
||||
|
||||
// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon
|
||||
// instance.
|
||||
register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {
|
||||
|
|
|
|||
Loading…
Reference in New Issue