LFDU - Load Floating-Point Double with Update | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
lfdu	frD,d(rA)	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	110011 (0x33)
frD	6-10	Destination floating-point register
rA	11-15	Source register A
d	16-31	16-bit signed displacement

Operation

EA ← (rA) + EXTS(d)
frD ← MEM(EA, 8)
rA ← EA

A double-precision floating-point value (64 bits) is loaded from memory and placed in floating-point register frD. The effective address is computed by adding the sign-extended displacement to the contents of register rA. After the load, the effective address is stored back into register rA.

Note: This instruction cannot be used with rA=0. The update form requires a valid base register. The effective address should be doubleword-aligned (divisible by 8) for optimal performance. This instruction is essential for sequential processing of double-precision floating-point arrays.

Affected Registers

rA - Updated with the effective address after the load operation.

For more information on floating-point operations see Section 2.1.4, "Floating-Point Status and Control Register (FPSCR)," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Scientific Array Processing

# Process array of double-precision values with automatic advance
lis r3, scientific_data@ha
addi r3, r3, scientific_data@l
lwz r4, array_length(r0)    # Number of elements
subi r3, r3, 8              # Pre-adjust for first lfdu

compute_loop:
    lfdu f1, 8(r3)          # Load next double and advance pointer
    
    # Perform scientific computation (e.g., statistical analysis)
    bl compute_square_root   # f1 = sqrt(f1)
    bl compute_logarithm     # f1 = log(f1)
    
    # Store result back
    stfd f1, 0(r3)          # Store computed result
    
    subi r4, r4, 1          # Decrement counter
    cmpwi r4, 0
    bne compute_loop        # Continue if more elements

Matrix Row Processing

# Process matrix rows using update addressing
lis r3, matrix_data@ha
addi r3, r3, matrix_data@l
lwz r4, num_rows(r0)        # Number of rows
lwz r5, num_cols(r0)        # Number of columns
subi r3, r3, 8              # Pre-adjust pointer

row_loop:
    mr r6, r5               # Column counter
    
col_loop:
    lfdu f1, 8(r3)          # Load matrix element and advance
    
    # Apply transformation (e.g., normalization)
    lfd f2, normalization_factor(r0)
    fmul f3, f1, f2         # Apply scaling factor
    
    # Apply bias
    lfd f4, bias_value(r0)
    fadd f5, f3, f4         # Add bias
    
    # Store transformed value
    stfd f5, 0(r3)          # Store back to matrix
    
    subi r6, r6, 1          # Decrement column counter
    cmpwi r6, 0
    bne col_loop            # Continue row
    
    subi r4, r4, 1          # Decrement row counter
    cmpwi r4, 0
    bne row_loop            # Continue matrix

Financial Time Series Analysis

# Process financial time series data with moving averages
lis r3, price_data@ha
addi r3, r3, price_data@l
lwz r4, data_points(r0)     # Number of data points
li r5, 20                   # Moving average window
subi r3, r3, 8              # Pre-adjust pointer

# Initialize moving average sum
lfd f10, zero_constant(r0)  # Running sum
li r6, 0                    # Current index

time_series_loop:
    lfdu f1, 8(r3)          # Load next price and advance
    
    # Add to running sum
    fadd f10, f10, f1       # sum += current_price
    
    # Check if we have enough data for moving average
    cmpw r6, r5             # index >= window_size?
    blt skip_average        # Skip if not enough data
    
    # Calculate moving average
    stw r5, temp_window(r1) # Store window size
    lfs f11, temp_window(r1) # Convert to float
    fdiv f12, f10, f11      # average = sum / window_size
    
    # Store moving average
    lis r7, moving_avg_array@ha
    addi r7, r7, moving_avg_array@l
    sub r8, r6, r5          # Calculate storage index
    addi r8, r8, 1          # Adjust for 0-based index
    slwi r9, r8, 3          # Convert to byte offset
    stfdx f12, r7, r9       # Store moving average
    
    # Remove oldest value from sum (sliding window)
    mr r10, r6              # Current index
    sub r11, r10, r5        # Oldest index in window
    slwi r12, r11, 3        # Convert to byte offset
    # Load oldest value (requires saving current r3)
    mr r13, r3              # Save current pointer
    lis r14, price_data@ha
    addi r14, r14, price_data@l
    lfdx f13, r14, r12      # Load oldest value
    fsub f10, f10, f13      # Remove from sum
    mr r3, r13              # Restore pointer

skip_average:
    addi r6, r6, 1          # Increment index
    subi r4, r4, 1          # Decrement remaining count
    cmpwi r4, 0
    bne time_series_loop    # Continue processing

Digital Signal Processing - FFT Data Loading

# Load complex numbers for FFT processing with automatic advance
lis r3, complex_data@ha
addi r3, r3, complex_data@l
lwz r4, fft_size(r0)        # FFT size (number of complex samples)
subi r3, r3, 8              # Pre-adjust pointer
li r5, 0                    # Sample index

# Separate real and imaginary arrays for FFT algorithm
lis r6, real_array@ha
addi r6, r6, real_array@l
lis r7, imag_array@ha  
addi r7, r7, imag_array@l
subi r6, r6, 8              # Pre-adjust real array pointer
subi r7, r7, 8              # Pre-adjust imag array pointer

load_complex_loop:
    # Load real part
    lfdu f1, 8(r3)          # Load real part and advance
    stfdu f1, 8(r6)         # Store in real array and advance
    
    # Load imaginary part  
    lfdu f2, 8(r3)          # Load imaginary part and advance
    stfdu f2, 8(r7)         # Store in imag array and advance
    
    addi r5, r5, 1          # Increment sample counter
    cmpw r5, r4             # Check if done
    blt load_complex_loop   # Continue loading

# Apply windowing function to reduce spectral leakage
lis r8, window_function@ha
addi r8, r8, window_function@l
subi r8, r8, 8              # Pre-adjust window pointer
mr r5, r4                   # Reset sample counter
subi r6, r6, 8              # Reset real array pointer
subi r7, r7, 8              # Reset imag array pointer
# Note: Need to adjust pointers back to start

apply_window_loop:
    lfdu f3, 8(r8)          # Load window coefficient and advance
    lfdu f4, 8(r6)          # Load real sample and advance
    lfdu f5, 8(r7)          # Load imag sample and advance
    
    # Apply window function
    fmul f6, f4, f3         # real *= window
    fmul f7, f5, f3         # imag *= window
    
    # Store windowed samples back
    stfd f6, 0(r6)          # Store windowed real
    stfd f7, 0(r7)          # Store windowed imag
    
    subi r5, r5, 1          # Decrement counter
    cmpwi r5, 0
    bne apply_window_loop   # Continue windowing

Machine Learning - Neural Network Training

# Load training data for neural network with automatic advance
lis r3, training_data@ha
addi r3, r3, training_data@l
lwz r4, num_samples(r0)     # Number of training samples
lwz r5, input_size(r0)      # Size of each input vector
subi r3, r3, 8              # Pre-adjust pointer

training_loop:
    # Load input vector
    mr r6, r5               # Input vector counter
    lis r7, input_vector@ha
    addi r7, r7, input_vector@l
    subi r7, r7, 8          # Pre-adjust input vector pointer
    
load_input_loop:
    lfdu f1, 8(r3)          # Load input component and advance
    stfdu f1, 8(r7)         # Store in input vector and advance
    subi r6, r6, 1          # Decrement input counter
    cmpwi r6, 0
    bne load_input_loop     # Continue loading input
    
    # Load target output
    lfdu f2, 8(r3)          # Load target value and advance
    stfd f2, target_output(r0) # Store target
    
    # Forward propagation through network
    bl forward_propagation  # Process input vector
    
    # Load network output for comparison
    lfd f3, network_output(r0)
    
    # Calculate error: error = target - output
    fsub f4, f2, f3         # f4 = error
    
    # Calculate squared error for loss function
    fmul f5, f4, f4         # f5 = error^2
    lfd f6, total_loss(r0)  # Load accumulated loss
    fadd f7, f6, f5         # Add to total loss
    stfd f7, total_loss(r0) # Store updated loss
    
    # Backpropagation to update weights
    bl backpropagation      # Update network weights
    
    subi r4, r4, 1          # Decrement sample counter
    cmpwi r4, 0
    bne training_loop       # Continue training

Computational Physics - Particle Simulation

# Process particle data for physics simulation
lis r3, particle_array@ha
addi r3, r3, particle_array@l
lwz r4, num_particles(r0)   # Number of particles
subi r3, r3, 8              # Pre-adjust pointer

# Particle structure: [x, y, z, vx, vy, vz, mass, charge]
# Each field is 8 bytes (double precision)

particle_loop:
    # Load position
    lfdu f1, 8(r3)          # Load x and advance
    lfdu f2, 8(r3)          # Load y and advance  
    lfdu f3, 8(r3)          # Load z and advance
    
    # Load velocity
    lfdu f4, 8(r3)          # Load vx and advance
    lfdu f5, 8(r3)          # Load vy and advance
    lfdu f6, 8(r3)          # Load vz and advance
    
    # Load mass and charge
    lfdu f7, 8(r3)          # Load mass and advance
    lfdu f8, 8(r3)          # Load charge and advance
    
    # Calculate forces (simplified electromagnetic force)
    lfd f9, electric_field_x(r0)  # Load electric field components
    lfd f10, electric_field_y(r0)
    lfd f11, electric_field_z(r0)
    
    # Force = charge * electric_field
    fmul f12, f8, f9        # fx = charge * Ex
    fmul f13, f8, f10       # fy = charge * Ey
    fmul f14, f8, f11       # fz = charge * Ez
    
    # Calculate acceleration: a = F / mass
    fdiv f15, f12, f7       # ax = fx / mass
    fdiv f16, f13, f7       # ay = fy / mass
    fdiv f17, f14, f7       # az = fz / mass
    
    # Load time step
    lfd f18, time_step(r0)  # dt
    
    # Update velocity: v = v + a * dt
    fmadd f4, f15, f18, f4  # vx += ax * dt
    fmadd f5, f16, f18, f5  # vy += ay * dt  
    fmadd f6, f17, f18, f6  # vz += az * dt
    
    # Update position: x = x + v * dt
    fmadd f1, f4, f18, f1   # x += vx * dt
    fmadd f2, f5, f18, f2   # y += vy * dt
    fmadd f3, f6, f18, f3   # z += vz * dt
    
    # Store updated particle data (move pointer back)
    stfd f1, -64(r3)        # Store x (8 fields back)
    stfd f2, -56(r3)        # Store y
    stfd f3, -48(r3)        # Store z
    stfd f4, -40(r3)        # Store vx
    stfd f5, -32(r3)        # Store vy
    stfd f6, -24(r3)        # Store vz
    # mass and charge unchanged
    
    subi r4, r4, 1          # Decrement particle counter
    cmpwi r4, 0
    bne particle_loop       # Continue simulation

Audio Processing - Convolution Reverb

# Apply convolution reverb using impulse response
lis r3, audio_input@ha
addi r3, r3, audio_input@l
lis r4, impulse_response@ha
addi r4, r4, impulse_response@l
lis r5, audio_output@ha
addi r5, r5, audio_output@l
lwz r6, audio_length(r0)   # Length of audio signal
lwz r7, impulse_length(r0) # Length of impulse response
subi r3, r3, 8             # Pre-adjust input pointer
li r8, 0                   # Current sample index

convolution_loop:
    lfd f10, zero_constant(r0) # Initialize accumulator
    
    # Convolution: output[n] = sum(input[k] * impulse[n-k])
    li r9, 0                # Impulse index
    mr r10, r3              # Current input position
    mr r11, r4              # Reset impulse pointer
    subi r11, r11, 8        # Pre-adjust impulse pointer
    
impulse_loop:
    cmpw r9, r7             # Check if done with impulse
    bge impulse_done
    
    # Check bounds for input signal
    sub r12, r8, r9         # input_index = current - impulse_index
    cmpwi r12, 0            # Check if negative
    blt skip_impulse        # Skip if before start of input
    
    # Load input sample and impulse coefficient
    slwi r13, r12, 3        # Convert index to byte offset
    lis r14, audio_input@ha
    addi r14, r14, audio_input@l
    lfdx f1, r14, r13       # Load input[input_index]
    
    lfdu f2, 8(r11)         # Load impulse coefficient and advance
    
    # Multiply and accumulate
    fmadd f10, f1, f2, f10  # accumulator += input * impulse
    
skip_impulse:
    addi r9, r9, 1          # Next impulse sample
    b impulse_loop

impulse_done:
    # Store convolution result
    slwi r15, r8, 3         # Convert output index to byte offset
    stfdx f10, r5, r15      # Store output[current_sample]
    
    # Advance to next input sample
    lfdu f3, 8(r3)          # Load current input (advance pointer)
    
    addi r8, r8, 1          # Increment sample index
    subi r6, r6, 1          # Decrement remaining samples
    cmpwi r6, 0
    bne convolution_loop    # Continue convolution

Geological Data Analysis

# Analyze seismic data with automatic pointer advancement
lis r3, seismic_data@ha
addi r3, r3, seismic_data@l
lwz r4, num_traces(r0)      # Number of seismic traces
lwz r5, samples_per_trace(r0) # Samples per trace
subi r3, r3, 8              # Pre-adjust pointer

# Statistics for each trace
lfd f20, zero_constant(r0)  # Global minimum
lfd f21, max_double(r0)     # Global maximum
lfd f22, zero_constant(r0)  # Global sum for mean

trace_loop:
    mr r6, r5               # Sample counter for current trace
    lfd f10, zero_constant(r0) # Trace sum
    lfd f11, max_double(r0)    # Trace minimum
    lfd f12, zero_constant(r0) # Trace maximum
    
sample_loop:
    lfdu f1, 8(r3)          # Load seismic amplitude and advance
    
    # Update trace statistics
    fadd f10, f10, f1       # Add to trace sum
    
    # Check for new minimum
    fcmpu cr0, f1, f11      # Compare with current min
    bge check_max           # Skip if not smaller
    fmr f11, f1             # Update trace minimum
    
check_max:
    fcmpu cr0, f1, f12      # Compare with current max
    ble update_global       # Skip if not larger
    fmr f12, f1             # Update trace maximum
    
update_global:
    # Update global statistics
    fcmpu cr0, f11, f20     # Compare trace min with global min
    bge check_global_max    # Skip if not smaller
    fmr f20, f11            # Update global minimum
    
check_global_max:
    fcmpu cr0, f12, f21     # Compare trace max with global max
    ble continue_sample     # Skip if not larger
    fmr f21, f12            # Update global maximum
    
continue_sample:
    fadd f22, f22, f1       # Add to global sum
    
    subi r6, r6, 1          # Decrement sample counter
    cmpwi r6, 0
    bne sample_loop         # Continue trace
    
    # Calculate trace mean
    stw r5, temp_samples(r1) # Store sample count
    lfs f13, temp_samples(r1) # Convert to float
    fdiv f14, f10, f13      # trace_mean = trace_sum / num_samples
    
    # Store trace statistics
    lis r7, trace_stats@ha
    addi r7, r7, trace_stats@l
    sub r8, r4, 1           # Calculate trace index (reverse count)
    li r9, 4                # 4 stats per trace (sum, mean, min, max)
    mullw r10, r8, r9       # trace_index * 4
    slwi r11, r10, 3        # Convert to byte offset (* 8)
    
    stfdx f10, r7, r11      # Store trace sum
    addi r11, r11, 8
    stfdx f14, r7, r11      # Store trace mean
    addi r11, r11, 8
    stfdx f11, r7, r11      # Store trace minimum
    addi r11, r11, 8
    stfdx f12, r7, r11      # Store trace maximum
    
    subi r4, r4, 1          # Decrement trace counter
    cmpwi r4, 0
    bne trace_loop          # Continue processing

# Calculate and store global statistics
lwz r12, total_samples(r0)  # Total number of samples
stw r12, temp_total(r1)
lfs f15, temp_total(r1)     # Convert to float
fdiv f16, f22, f15          # global_mean = global_sum / total_samples

stfd f20, global_min(r0)    # Store global minimum
stfd f21, global_max(r0)    # Store global maximum
stfd f16, global_mean(r0)   # Store global mean

Quantum Mechanics - Wavefunction Evolution

# Evolve quantum wavefunction using time-dependent Schrödinger equation
lis r3, wavefunction_real@ha
addi r3, r3, wavefunction_real@l
lis r4, wavefunction_imag@ha
addi r4, r4, wavefunction_imag@l
lis r5, hamiltonian@ha
addi r5, r5, hamiltonian@l
lwz r6, grid_points(r0)     # Number of spatial grid points
subi r3, r3, 8              # Pre-adjust real part pointer
subi r4, r4, 8              # Pre-adjust imaginary part pointer
subi r5, r5, 8              # Pre-adjust Hamiltonian pointer

# Load physical constants
lfd f20, hbar(r0)           # ℏ (reduced Planck constant)
lfd f21, time_step(r0)      # Δt
fdiv f22, f21, f20          # Δt/ℏ

evolution_loop:
    # Load wavefunction components
    lfdu f1, 8(r3)          # Load ψ_real and advance
    lfdu f2, 8(r4)          # Load ψ_imag and advance
    
    # Load Hamiltonian matrix element (simplified as potential)
    lfdu f3, 8(r5)          # Load H and advance
    
    # Apply time evolution operator: ψ(t+dt) = exp(-iHdt/ℏ)ψ(t)
    # For small dt, use first-order approximation:
    # ψ_new = ψ - i(H*dt/ℏ)ψ
    
    # Calculate H*ψ (simplified)
    fmul f4, f3, f1         # H * ψ_real
    fmul f5, f3, f2         # H * ψ_imag
    
    # Apply time evolution: ψ_new = ψ - i(H*dt/ℏ)ψ
    # Real part: ψ_real_new = ψ_real - (-H*ψ_imag*dt/ℏ) = ψ_real + H*ψ_imag*dt/ℏ
    fmadd f6, f5, f22, f1   # ψ_real_new = ψ_real + H*ψ_imag*Δt/ℏ
    
    # Imaginary part: ψ_imag_new = ψ_imag - H*ψ_real*dt/ℏ
    fnmsub f7, f4, f22, f2  # ψ_imag_new = ψ_imag - H*ψ_real*Δt/ℏ
    
    # Store evolved wavefunction
    stfd f6, 0(r3)          # Store new real part
    stfd f7, 0(r4)          # Store new imaginary part
    
    subi r6, r6, 1          # Decrement grid point counter
    cmpwi r6, 0
    bne evolution_loop      # Continue evolution

# Normalize wavefunction to preserve probability
lis r3, wavefunction_real@ha
addi r3, r3, wavefunction_real@l
lis r4, wavefunction_imag@ha
addi r4, r4, wavefunction_imag@l
lwz r6, grid_points(r0)     # Reset grid counter
lfd f10, zero_constant(r0)  # Normalization sum
subi r3, r3, 8              # Pre-adjust pointers
subi r4, r4, 8

norm_loop:
    lfdu f1, 8(r3)          # Load real part and advance
    lfdu f2, 8(r4)          # Load imaginary part and advance
    
    # Calculate |ψ|² = ψ_real² + ψ_imag²
    fmadd f3, f1, f1, f10   # sum += ψ_real²
    fmadd f10, f2, f2, f3   # sum += ψ_imag²
    
    subi r6, r6, 1          # Decrement counter
    cmpwi r6, 0
    bne norm_loop           # Continue normalization calculation

# Calculate normalization factor: 1/√(∫|ψ|²dx)
fsqrt f11, f10              # √(∫|ψ|²dx)
lfd f12, one_constant(r0)   # Load 1.0
fdiv f13, f12, f11          # Normalization factor = 1/√(∫|ψ|²dx)

# Apply normalization
# ... (similar loop to multiply all wavefunction values by f13)

Related Instructions

lfd, lfdx, lfdux, stfdu, lfsu, lwzu

Back to Index