LFDUX - Load Floating-Point Double with Update Indexed

Instruction Syntax

Mnemonic	Format	Flags
lfdux	frD,rA,rB	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	011111 (0x1F)
frD	6-10	Destination floating-point register
rA	11-15	Source register A
rB	16-20	Source register B
XO	21-30	631 (Extended opcode)
Rc	31	Reserved (0)

Operation

EA ← (rA) + (rB)
frD ← MEM(EA, 8)
rA ← EA

A double-precision floating-point value (64 bits) is loaded from memory and placed in floating-point register frD. The effective address is computed by adding the contents of registers rA and rB. After the load, the effective address is stored back into register rA.

Note: This instruction cannot be used with rA=0. The update form requires a valid base register. This is the most advanced addressing mode for double-precision loads, combining indexed addressing with automatic pointer advancement. Essential for high-performance scientific computing and matrix operations.

Affected Registers

rA - Updated with the effective address after the load operation.

For more information on floating-point operations see Section 2.1.4, "Floating-Point Status and Control Register (FPSCR)," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Scientific Computing - Advanced Matrix Operations

# Perform matrix multiplication with optimized access patterns
lis r3, matrix_a@ha
addi r3, r3, matrix_a@l
lis r4, matrix_b@ha
addi r4, r4, matrix_b@l
lis r5, result_matrix@ha
addi r5, r5, result_matrix@l
lwz r6, matrix_size(r0)     # N x N matrix

# Triple nested loop for matrix multiplication
li r7, 0                    # i (row index)

outer_loop:
    li r8, 0                # j (column index)
    
middle_loop:
    lfd f10, zero_double(r0) # Initialize sum = 0.0
    li r9, 0                # k (inner loop index)
    
    # Calculate base addresses for this iteration
    mullw r10, r7, r6       # i * N
    slwi r11, r10, 3        # * 8 (bytes per double)
    add r12, r3, r11        # &matrix_a[i][0]
    
    slwi r13, r8, 3         # j * 8
    add r14, r4, r13        # &matrix_b[0][j]
    
    # Calculate stride for matrix_b (advance to next row)
    slwi r15, r6, 3         # N * 8 (row stride in bytes)
    
inner_loop:
    # Load matrix_a[i][k] with automatic advance
    slwi r16, r9, 3         # k * 8
    lfdux f1, r12, r16      # Load a[i][k] and advance r12
    
    # Load matrix_b[k][j] with stride advancement
    lfdux f2, r14, r15      # Load b[k][j] and advance to next row
    
    # Multiply-accumulate: sum += a[i][k] * b[k][j]
    fmadd f10, f1, f2, f10
    
    addi r9, r9, 1          # k++
    cmpw r9, r6             # k < N?
    blt inner_loop          # Continue inner loop
    
    # Store result[i][j] = sum
    mullw r17, r7, r6       # i * N
    add r18, r17, r8        # i * N + j
    slwi r19, r18, 3        # Convert to byte offset
    stfdx f10, r5, r19      # Store result
    
    addi r8, r8, 1          # j++
    cmpw r8, r6             # j < N?
    blt middle_loop         # Continue middle loop
    
    addi r7, r7, 1          # i++
    cmpw r7, r6             # i < N?
    blt outer_loop          # Continue outer loop

Quantum Mechanics - Wavefunction Analysis

# Analyze quantum wavefunction with variable grid spacing
lis r3, wavefunction_data@ha
addi r3, r3, wavefunction_data@l
lis r4, grid_spacing@ha
addi r4, r4, grid_spacing@l
lwz r5, num_grid_points(r0) # Number of spatial grid points

# Calculate probability density and expectation values
lfd f20, zero_double(r0)    # Total probability
lfd f21, zero_double(r0)    # Position expectation 
lfd f22, zero_double(r0)    # Position squared expectation 

quantum_analysis_loop:
    # Load current grid spacing (variable for adaptive grids)
    lwz r6, 0(r4)           # Load spacing offset
    
    # Load complex wavefunction component with adaptive spacing
    lfdux f1, r3, r6        # Load ψ_real and advance by spacing
    lwz r7, 8               # Standard 8-byte advance for imaginary part
    lfdux f2, r3, r7        # Load ψ_imag and advance
    
    # Calculate probability density: |ψ|² = ψ_real² + ψ_imag²
    fmul f3, f1, f1         # ψ_real²
    fmadd f4, f2, f2, f3    # ψ_real² + ψ_imag² = |ψ|²
    
    # Load current position coordinate
    lwz r8, 4               # Advance to position data
    lfdux f5, r3, r8        # Load x coordinate and advance
    
    # Update total probability (for normalization check)
    fadd f20, f20, f4       # total_probability += |ψ|²
    
    # Update position expectation value:  += x * |ψ|²
    fmadd f21, f5, f4, f21  #  += x * |ψ|²
    
    # Update position squared expectation:  += x² * |ψ|²
    fmul f6, f5, f5         # x²
    fmadd f22, f6, f4, f22  #  += x² * |ψ|²
    
    addi r4, r4, 4          # Next grid spacing
    subi r5, r5, 1          # Decrement grid counter
    cmpwi r5, 0
    bne quantum_analysis_loop # Continue analysis

# Normalize results and calculate uncertainty
fdiv f23, f21, f20        #  = Σ(x|ψ|²) / Σ(|ψ|²)
fdiv f24, f22, f20        #  = Σ(x²|ψ|²) / Σ(|ψ|²)

# Calculate uncertainty: Δx = √( - ²)
fmul f25, f23, f23        # ²
fsub f26, f24, f25        #  - ²
fsqrt f27, f26            # Δx = √( - ²)

# Store results
stfd f23, position_expectation(r0)
stfd f27, position_uncertainty(r0)

Computational Fluid Dynamics - Turbulence Modeling

# Simulate turbulent flow using Large Eddy Simulation (LES)
lis r3, velocity_field@ha
addi r3, r3, velocity_field@l
lis r4, grid_metrics@ha
addi r4, r4, grid_metrics@l
lwz r5, num_cells(r0)       # Number of computational cells

# Each cell contains: [u, v, w, p, ρ, τ_xx, τ_yy, τ_zz, τ_xy, τ_xz, τ_yz]
# where u,v,w are velocities, p is pressure, ρ is density, τ are stress tensors

turbulence_simulation_loop:
    # Load grid metrics for adaptive mesh refinement
    lwz r6, 0(r4)           # Load cell size offset
    
    # Load velocity components with variable grid spacing
    lfdux f1, r3, r6        # Load u-velocity and advance by cell size
    lwz r7, 8               # Standard advance for next component
    lfdux f2, r3, r7        # Load v-velocity and advance
    lfdux f3, r3, r7        # Load w-velocity and advance
    
    # Load pressure and density
    lfdux f4, r3, r7        # Load pressure and advance
    lfdux f5, r3, r7        # Load density and advance
    
    # Load stress tensor components
    lfdux f6, r3, r7        # Load τ_xx and advance
    lfdux f7, r3, r7        # Load τ_yy and advance
    lfdux f8, r3, r7        # Load τ_zz and advance
    lfdux f9, r3, r7        # Load τ_xy and advance
    lfdux f10, r3, r7       # Load τ_xz and advance
    lfdux f11, r3, r7       # Load τ_yz and advance
    
    # Calculate strain rate tensor components
    # S_ij = 0.5 * (∂u_i/∂x_j + ∂u_j/∂x_i)
    bl calculate_strain_rate # Compute strain rate from velocity gradients
    
    # Apply Smagorinsky subgrid-scale model
    # τ_sgs = -2 * ρ * (C_s * Δ)² * |S| * S_ij
    lfd f12, smagorinsky_constant(r0) # C_s
    lfd f13, filter_width(r0)         # Δ (filter width)
    
    # Calculate |S| = √(2 * S_ij * S_ij)
    bl calculate_strain_magnitude     # Returns |S| in f14
    
    # Calculate subgrid stress
    fmul f15, f12, f13      # C_s * Δ
    fmul f16, f15, f15      # (C_s * Δ)²
    fmul f17, f16, f14      # (C_s * Δ)² * |S|
    fmul f18, f5, f17       # ρ * (C_s * Δ)² * |S|
    fadd f18, f18, f18      # 2 * ρ * (C_s * Δ)² * |S|
    fneg f19, f18           # -2 * ρ * (C_s * Δ)² * |S|
    
    # Update stress tensor with subgrid contributions
    fmadd f6, f19, f14, f6  # τ_xx += τ_sgs
    fmadd f7, f19, f14, f7  # τ_yy += τ_sgs
    fmadd f8, f19, f14, f8  # τ_zz += τ_sgs
    
    # Store updated flow variables (move pointer back to overwrite)
    stfd f1, -88(r3)        # Store updated u-velocity
    stfd f2, -80(r3)        # Store updated v-velocity
    stfd f3, -72(r3)        # Store updated w-velocity
    stfd f4, -64(r3)        # Store updated pressure
    stfd f5, -56(r3)        # Store updated density
    stfd f6, -48(r3)        # Store updated τ_xx
    stfd f7, -40(r3)        # Store updated τ_yy
    stfd f8, -32(r3)        # Store updated τ_zz
    stfd f9, -24(r3)        # Store updated τ_xy
    stfd f10, -16(r3)       # Store updated τ_xz
    stfd f11, -8(r3)        # Store updated τ_yz
    
    addi r4, r4, 4          # Next grid metric
    subi r5, r5, 1          # Decrement cell counter
    cmpwi r5, 0
    bne turbulence_simulation_loop # Continue simulation

Financial Engineering - Monte Carlo Path Generation

# Generate stochastic paths for multi-asset portfolio using Cholesky decomposition
lis r3, correlation_matrix@ha
addi r3, r3, correlation_matrix@l
lis r4, random_numbers@ha
addi r4, r4, random_numbers@l
lwz r5, num_assets(r0)      # Number of assets in portfolio
lwz r6, num_timesteps(r0)   # Number of time steps in simulation

# Generate correlated random variables using Cholesky decomposition
# Z = L * W where L is lower triangular Cholesky matrix, W is independent normals

monte_carlo_path_loop:
    li r7, 0                # Asset index i
    
asset_loop_i:
    lfd f10, zero_double(r0) # Initialize correlated random variable
    li r8, 0                # Asset index j (for Cholesky sum)
    
cholesky_sum_loop:
    # Calculate matrix element address: L[i][j] = base + (i*(i+1)/2 + j)*8
    mullw r9, r7, r7        # i²
    add r10, r9, r7         # i² + i = i*(i+1)
    srwi r11, r10, 1        # i*(i+1)/2
    add r12, r11, r8        # i*(i+1)/2 + j
    slwi r13, r12, 3        # Convert to byte offset
    
    # Load Cholesky matrix element with indexing
    lfdux f1, r3, r13       # Load L[i][j] and advance
    
    # Load independent random number for asset j
    slwi r14, r8, 3         # j * 8
    lfdx f2, r4, r14        # Load W[j]
    
    # Accumulate: Z[i] += L[i][j] * W[j]
    fmadd f10, f1, f2, f10
    
    addi r8, r8, 1          # j++
    cmpw r8, r7             # j <= i (lower triangular)
    ble cholesky_sum_loop   # Continue Cholesky sum
    
    # Store correlated random variable
    lis r15, correlated_randoms@ha
    addi r15, r15, correlated_randoms@l
    slwi r16, r7, 3         # i * 8
    stfdx f10, r15, r16     # Store Z[i]
    
    addi r7, r7, 1          # i++
    cmpw r7, r5             # i < num_assets?
    blt asset_loop_i        # Continue asset loop
    
    # Generate asset price paths using geometric Brownian motion
    # S(t+dt) = S(t) * exp((μ - σ²/2)*dt + σ*√dt*Z)
    li r17, 0               # Asset index for price update
    
price_update_loop:
    # Load current asset price
    lis r18, asset_prices@ha
    addi r18, r18, asset_prices@l
    slwi r19, r17, 3        # Asset index to byte offset
    lfdx f11, r18, r19      # Load S(t)
    
    # Load asset parameters
    lis r20, drift_rates@ha
    addi r20, r20, drift_rates@l
    lfdx f12, r20, r19      # Load μ (drift rate)
    
    lis r21, volatilities@ha
    addi r21, r21, volatilities@l
    lfdx f13, r21, r19      # Load σ (volatility)
    
    # Load correlated random variable for this asset
    lis r15, correlated_randoms@ha
    addi r15, r15, correlated_randoms@l
    lfdx f14, r15, r19      # Load Z[asset]
    
    # Calculate drift term: (μ - σ²/2)*dt
    fmul f15, f13, f13      # σ²
    lfd f16, half_constant(r0) # 0.5
    fmul f17, f15, f16      # σ²/2
    fsub f18, f12, f17      # μ - σ²/2
    lfd f19, time_step(r0)  # dt
    fmul f20, f18, f19      # (μ - σ²/2)*dt
    
    # Calculate diffusion term: σ*√dt*Z
    fsqrt f21, f19          # √dt
    fmul f22, f13, f21      # σ*√dt
    fmul f23, f22, f14      # σ*√dt*Z
    
    # Total exponent: (μ - σ²/2)*dt + σ*√dt*Z
    fadd f24, f20, f23
    
    # Calculate new price: S(t+dt) = S(t) * exp(exponent)
    bl compute_exp          # exp(f24) -> f25
    fmul f26, f11, f25      # S(t+dt) = S(t) * exp(exponent)
    
    # Store updated price
    stfdx f26, r18, r19     # Store new price
    
    addi r17, r17, 1        # Next asset
    cmpw r17, r5            # All assets updated?
    blt price_update_loop   # Continue price updates
    
    # Advance to next time step
    add r4, r4, r5          # Advance random number pointer
    subi r6, r6, 1          # Decrement time steps
    cmpwi r6, 0
    bne monte_carlo_path_loop # Continue simulation

Molecular Dynamics - Force Calculation

# Calculate intermolecular forces using Lennard-Jones potential
lis r3, particle_positions@ha
addi r3, r3, particle_positions@l
lis r4, force_vectors@ha
addi r4, r4, force_vectors@l
lis r5, neighbor_list@ha
addi r5, r5, neighbor_list@l
lwz r6, num_particles(r0)   # Number of particles

# Each particle has: [x, y, z, mass, type, charge]
# Each force vector: [fx, fy, fz]

particle_force_loop:
    # Load current particle data
    lwz r7, 0               # Particle index
    slwi r8, r7, 5          # Particle offset (* 32 for 4 doubles)
    
    # Load particle position with automatic advancement
    lfdux f1, r3, r8        # Load x coordinate and advance
    lwz r9, 8               # Standard advance
    lfdux f2, r3, r9        # Load y coordinate and advance
    lfdux f3, r3, r9        # Load z coordinate and advance
    lfdux f4, r3, r9        # Load mass and advance
    
    # Initialize force accumulator
    lfd f10, zero_double(r0) # fx = 0
    lfd f11, zero_double(r0) # fy = 0
    lfd f12, zero_double(r0) # fz = 0
    
    # Process neighbor list for this particle
    lwz r10, 0(r5)          # Load number of neighbors
    addi r5, r5, 4          # Advance to neighbor indices
    
neighbor_loop:
    cmpwi r10, 0            # Any neighbors left?
    beq force_complete      # Skip if no neighbors
    
    # Load neighbor index and calculate neighbor position offset
    lwz r11, 0(r5)          # Load neighbor index
    slwi r12, r11, 5        # Neighbor offset (* 32)
    
    # Load neighbor position
    lis r13, particle_positions@ha
    addi r13, r13, particle_positions@l
    lfdux f5, r13, r12      # Load neighbor x and advance
    lwz r9, 8
    lfdux f6, r13, r9       # Load neighbor y and advance
    lfdux f7, r13, r9       # Load neighbor z and advance
    
    # Calculate distance vector: dr = r_neighbor - r_current
    fsub f13, f5, f1        # dx = x_neighbor - x_current
    fsub f14, f6, f2        # dy = y_neighbor - y_current
    fsub f15, f7, f3        # dz = z_neighbor - z_current
    
    # Calculate distance squared: r² = dx² + dy² + dz²
    fmul f16, f13, f13      # dx²
    fmadd f17, f14, f14, f16 # dx² + dy²
    fmadd f18, f15, f15, f17 # r² = dx² + dy² + dz²
    
    # Calculate Lennard-Jones force: F = 24ε[(2σ¹²/r¹³) - (σ⁶/r⁷)]
    lfd f19, lj_sigma(r0)   # σ (Lennard-Jones size parameter)
    lfd f20, lj_epsilon(r0) # ε (Lennard-Jones energy parameter)
    
    # Calculate σ²/r²
    fmul f21, f19, f19      # σ²
    fdiv f22, f21, f18      # σ²/r²
    
    # Calculate (σ²/r²)³ = σ⁶/r⁶
    fmul f23, f22, f22      # (σ²/r²)²
    fmul f24, f23, f22      # (σ²/r²)³ = σ⁶/r⁶
    
    # Calculate (σ⁶/r⁶)² = σ¹²/r¹²
    fmul f25, f24, f24      # σ¹²/r¹²
    
    # Calculate force magnitude: F = 24ε[(2σ¹²/r¹²) - (σ⁶/r⁶)]/r²
    fadd f26, f25, f25      # 2σ¹²/r¹²
    fsub f27, f26, f24      # 2σ¹²/r¹² - σ⁶/r⁶
    lfd f28, twentyfour_constant(r0) # 24
    fmul f29, f28, f20      # 24ε
    fmul f30, f29, f27      # 24ε[(2σ¹²/r¹²) - (σ⁶/r⁶)]
    fdiv f31, f30, f18      # Force magnitude / r²
    
    # Calculate force components: F_i = force_magnitude * dr_i
    fmul f0, f31, f13       # fx_component
    fmul f1, f31, f14       # fy_component  
    fmul f2, f31, f15       # fz_component
    
    # Accumulate forces
    fadd f10, f10, f0       # total_fx += fx_component
    fadd f11, f11, f1       # total_fy += fy_component
    fadd f12, f12, f2       # total_fz += fz_component
    
    addi r5, r5, 4          # Next neighbor index
    subi r10, r10, 1        # Decrement neighbor count
    b neighbor_loop         # Continue neighbor processing

force_complete:
    # Store total force for this particle
    slwi r14, r7, 4         # Force vector offset (* 16 for 2 doubles)
    stfdx f10, r4, r14      # Store fx
    addi r15, r14, 8
    stfdx f11, r4, r15      # Store fy
    addi r16, r15, 8
    stfdx f12, r4, r16      # Store fz
    
    addi r7, r7, 1          # Next particle
    cmpw r7, r6             # All particles processed?
    blt particle_force_loop # Continue force calculation

Related Instructions

lfd, lfdu, lfdx, stfdux, lfsux, lwzux

Back to Index