LFD - Load Floating-Point Double | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
lfd	frD,d(rA)	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	110010 (0x32)
frD	6-10	Destination floating-point register
rA	11-15	Source register A
d	16-31	16-bit signed displacement

Operation

if rA = 0 then EA ← EXTS(d)
else EA ← (rA) + EXTS(d)
frD ← MEM(EA, 8)

A double-precision floating-point value (64 bits) is loaded from memory and placed in floating-point register frD. The effective address is computed by adding the sign-extended displacement to the contents of register rA, or zero if rA is 0.

Note: This instruction loads the full IEEE-754 double-precision format directly. The effective address must be doubleword-aligned (divisible by 8) for optimal performance. If rA=0, it is treated as the value 0, not the contents of register r0.

Affected Registers

None - This instruction does not affect any condition register fields or XER register bits.

For more information on floating-point operations see Section 2.1.4, "Floating-Point Status and Control Register (FPSCR)," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Basic Double-Precision Loading

lfd f1, 0(r3)           # Load double from r3
lfd f2, 8(r3)           # Load next double
lfd f3, -16(r4)         # Load from r4-16

Scientific Computing - Matrix Operations

# Load 3x3 matrix for scientific calculations
lis r3, matrix_data@ha
addi r3, r3, matrix_data@l

# Load first row
lfd f0, 0(r3)           # m[0][0]
lfd f1, 8(r3)           # m[0][1]
lfd f2, 16(r3)          # m[0][2]

# Load second row  
lfd f3, 24(r3)          # m[1][0]
lfd f4, 32(r3)          # m[1][1]
lfd f5, 40(r3)          # m[1][2]

# Load third row
lfd f6, 48(r3)          # m[2][0]
lfd f7, 56(r3)          # m[2][1]
lfd f8, 64(r3)          # m[2][2]

# Calculate determinant: det = a11(a22*a33 - a23*a32) - a12(a21*a33 - a23*a31) + a13(a21*a32 - a22*a31)
fmul f9, f4, f8         # a22 * a33
fmul f10, f5, f7        # a23 * a32
fsub f11, f9, f10       # (a22*a33 - a23*a32)
fmul f12, f0, f11       # a11 * (a22*a33 - a23*a32)

fmul f13, f3, f8        # a21 * a33
fmul f14, f5, f6        # a23 * a31
fsub f15, f13, f14      # (a21*a33 - a23*a31)
fmul f16, f1, f15       # a12 * (a21*a33 - a23*a31)

fmul f17, f3, f7        # a21 * a32
fmul f18, f4, f6        # a22 * a31
fsub f19, f17, f18      # (a21*a32 - a22*a31)
fmul f20, f2, f19       # a13 * (a21*a32 - a22*a31)

fsub f21, f12, f16      # First two terms
fadd f22, f21, f20      # Final determinant

High-Precision Financial Calculations

# Black-Scholes option pricing with double precision
lis r3, option_params@ha
addi r3, r3, option_params@l

lfd f1, STOCK_PRICE(r3)     # Current stock price (S)
lfd f2, STRIKE_PRICE(r3)    # Strike price (K)
lfd f3, RISK_FREE_RATE(r3)  # Risk-free rate (r)
lfd f4, TIME_TO_EXPIRY(r3)  # Time to expiration (T)
lfd f5, VOLATILITY(r3)      # Volatility (σ)

# Calculate d1 = [ln(S/K) + (r + σ²/2)T] / (σ√T)
fdiv f6, f1, f2             # S/K
bl ln_function              # f6 = ln(S/K)

fmul f7, f5, f5             # σ²
lfd f8, half_constant(r0)   # Load 0.5
fmul f9, f7, f8             # σ²/2
fadd f10, f3, f9            # r + σ²/2
fmul f11, f10, f4           # (r + σ²/2)T
fadd f12, f6, f11           # ln(S/K) + (r + σ²/2)T

fsqrt f13, f4               # √T
fmul f14, f5, f13           # σ√T
fdiv f15, f12, f14          # d1

# Calculate d2 = d1 - σ√T
fsub f16, f15, f14          # d2 = d1 - σ√T

# Calculate N(d1) and N(d2) using cumulative normal distribution
fmr f1, f15                 # Pass d1
bl normal_cdf               # f1 = N(d1)
fmr f17, f1                 # Save N(d1)

fmr f1, f16                 # Pass d2
bl normal_cdf               # f1 = N(d2)
fmr f18, f1                 # Save N(d2)

# Calculate call option price: C = S*N(d1) - K*e^(-rT)*N(d2)
lfd f19, stock_price(r3)    # Reload S
fmul f20, f19, f17          # S * N(d1)

fneg f21, f3                # -r
fmul f22, f21, f4           # -rT
bl exp_function             # f22 = e^(-rT)
lfd f23, strike_price(r3)   # Reload K
fmul f24, f23, f22          # K * e^(-rT)
fmul f25, f24, f18          # K * e^(-rT) * N(d2)

fsub f26, f20, f25          # Call option price
stfd f26, option_value(r3)  # Store result

Digital Signal Processing - FFT

# Load complex numbers for FFT calculation
lis r3, fft_data@ha
addi r3, r3, fft_data@l
li r4, 1024                 # FFT size
li r5, 0                    # Current index

fft_load_loop:
    slwi r6, r5, 4          # Index * 16 (8 bytes real + 8 bytes imag)
    add r7, r3, r6          # Address of complex number
    
    lfd f1, 0(r7)           # Load real part
    lfd f2, 8(r7)           # Load imaginary part
    
    # Store in separated real and imaginary arrays for processing
    lis r8, real_array@ha
    addi r8, r8, real_array@l
    lis r9, imag_array@ha
    addi r9, r9, imag_array@l
    
    slwi r10, r5, 3         # Index * 8 bytes
    stfdx f1, r8, r10       # Store real part
    stfdx f2, r9, r10       # Store imaginary part
    
    addi r5, r5, 1          # Next complex number
    cmpw r5, r4             # Check if done
    blt fft_load_loop

# Perform bit-reversal permutation
li r5, 0                    # Current index
bit_reverse_loop:
    mr r6, r5               # Copy index
    li r7, 0                # Reversed index
    li r8, 10               # log2(1024) = 10 bits
    
reverse_bits:
    rlwinm r9, r6, 0, 31, 31 # Extract LSB
    slw r7, r7, 1           # Shift reversed index left
    or r7, r7, r9           # Insert bit
    srw r6, r6, 1           # Shift original right
    subi r8, r8, 1          # Decrement bit count
    cmpwi r8, 0
    bgt reverse_bits
    
    # Swap if needed
    cmpw r5, r7             # Compare indices
    bge no_swap             # Skip if already processed
    
    # Load and swap real parts
    lis r8, real_array@ha
    addi r8, r8, real_array@l
    slwi r9, r5, 3          # r5 * 8
    slwi r10, r7, 3         # r7 * 8
    lfdx f1, r8, r9         # Load real[r5]
    lfdx f2, r8, r10        # Load real[r7]
    stfdx f2, r8, r9        # Store real[r7] at r5
    stfdx f1, r8, r10       # Store real[r5] at r7
    
    # Load and swap imaginary parts
    lis r8, imag_array@ha
    addi r8, r8, imag_array@l
    lfdx f1, r8, r9         # Load imag[r5]
    lfdx f2, r8, r10        # Load imag[r7]
    stfdx f2, r8, r9        # Store imag[r7] at r5
    stfdx f1, r8, r10       # Store imag[r5] at r7

no_swap:
    addi r5, r5, 1          # Next index
    cmpw r5, r4             # Check if done
    blt bit_reverse_loop

3D Graphics Transformations

# Load and apply 4x4 transformation matrix to vertex
lis r3, transform_matrix@ha
addi r3, r3, transform_matrix@l
lis r4, vertex_data@ha
addi r4, r4, vertex_data@l

# Load transformation matrix (row-major order)
lfd f0, 0(r3)           # m00
lfd f1, 8(r3)           # m01
lfd f2, 16(r3)          # m02
lfd f3, 24(r3)          # m03

lfd f4, 32(r3)          # m10
lfd f5, 40(r3)          # m11
lfd f6, 48(r3)          # m12
lfd f7, 56(r3)          # m13

lfd f8, 64(r3)          # m20
lfd f9, 72(r3)          # m21
lfd f10, 80(r3)         # m22
lfd f11, 88(r3)         # m23

lfd f12, 96(r3)         # m30
lfd f13, 104(r3)        # m31
lfd f14, 112(r3)        # m32
lfd f15, 120(r3)        # m33

# Load vertex position (x, y, z, w)
lfd f16, 0(r4)          # x
lfd f17, 8(r4)          # y
lfd f18, 16(r4)         # z
lfd f19, 24(r4)         # w (usually 1.0)

# Transform vertex: result = matrix * vertex
# x' = m00*x + m01*y + m02*z + m03*w
fmul f20, f0, f16       # m00 * x
fmadd f20, f1, f17, f20 # + m01 * y
fmadd f20, f2, f18, f20 # + m02 * z
fmadd f20, f3, f19, f20 # + m03 * w

# y' = m10*x + m11*y + m12*z + m13*w
fmul f21, f4, f16       # m10 * x
fmadd f21, f5, f17, f21 # + m11 * y
fmadd f21, f6, f18, f21 # + m12 * z
fmadd f21, f7, f19, f21 # + m13 * w

# z' = m20*x + m21*y + m22*z + m23*w
fmul f22, f8, f16       # m20 * x
fmadd f22, f9, f17, f22 # + m21 * y
fmadd f22, f10, f18, f22 # + m22 * z
fmadd f22, f11, f19, f22 # + m23 * w

# w' = m30*x + m31*y + m32*z + m33*w
fmul f23, f12, f16      # m30 * x
fmadd f23, f13, f17, f23 # + m31 * y
fmadd f23, f14, f18, f23 # + m32 * z
fmadd f23, f15, f19, f23 # + m33 * w

# Store transformed vertex
stfd f20, 0(r4)         # Store x'
stfd f21, 8(r4)         # Store y'
stfd f22, 16(r4)        # Store z'
stfd f23, 24(r4)        # Store w'

Numerical Integration

# Simpson's rule numerical integration with double precision
lis r3, function_data@ha
addi r3, r3, function_data@l
lfd f1, start_point(r0)     # Integration start
lfd f2, end_point(r0)       # Integration end
lwz r4, num_intervals(r0)   # Number of intervals (must be even)

# Calculate step size: h = (b - a) / n
fsub f3, f2, f1             # b - a
stw r4, temp_n(r1)          # Store n as integer
lfs f4, temp_n(r1)          # Load as float
fdiv f5, f3, f4             # h = (b - a) / n

# Initialize sum with f(a) and f(b)
fmr f6, f1                  # x = a
bl evaluate_function        # f(a)
fmr f7, f1                  # Save f(a)

fmr f6, f2                  # x = b
bl evaluate_function        # f(b)
fadd f8, f7, f1             # f(a) + f(b)

# Add 4 * sum of odd points and 2 * sum of even points
lfd f9, zero_constant(r0)   # Sum of odd points
lfd f10, zero_constant(r0)  # Sum of even points
li r5, 1                    # Current interval

integration_loop:
    cmpw r5, r4             # Check if done
    bge integration_done
    
    # Calculate x = a + i * h
    stw r5, temp_i(r1)
    lfs f11, temp_i(r1)     # i as float
    fmul f12, f11, f5       # i * h
    fadd f6, f1, f12        # x = a + i * h
    
    bl evaluate_function    # Evaluate f(x)
    
    # Check if i is odd or even
    andi. r6, r5, 1         # Check LSB
    beq even_point
    
    # Odd point
    fadd f9, f9, f1         # Add to odd sum
    b next_point
    
even_point:
    # Even point
    fadd f10, f10, f1       # Add to even sum

next_point:
    addi r5, r5, 1          # Next interval
    b integration_loop

integration_done:
    # Calculate final result: (h/3) * [f(a) + f(b) + 4*odd_sum + 2*even_sum]
    lfd f11, four_constant(r0)  # Load 4.0
    lfd f12, two_constant(r0)   # Load 2.0
    
    fmul f13, f11, f9       # 4 * odd_sum
    fmul f14, f12, f10      # 2 * even_sum
    fadd f15, f8, f13       # f(a) + f(b) + 4*odd_sum
    fadd f16, f15, f14      # + 2*even_sum
    
    lfd f17, three_constant(r0) # Load 3.0
    fdiv f18, f5, f17       # h/3
    fmul f19, f18, f16      # Final integral result
    
    stfd f19, integral_result(r0) # Store result

Monte Carlo Simulation

# Monte Carlo estimation of π using double precision
lis r3, random_seed@ha
addi r3, r3, random_seed@l
lwz r4, num_samples(r0)     # Number of random samples
li r5, 0                    # Count of points inside circle
li r6, 0                    # Current sample

monte_carlo_loop:
    cmpw r6, r4             # Check if done
    bge monte_carlo_done
    
    # Generate random x coordinate [-1, 1]
    bl generate_random      # Returns random value in f1
    lfd f2, two_constant(r0) # Load 2.0
    fmul f3, f1, f2         # Scale to [0, 2]
    lfd f4, one_constant(r0) # Load 1.0
    fsub f5, f3, f4         # Shift to [-1, 1]
    
    # Generate random y coordinate [-1, 1]
    bl generate_random      # Returns random value in f1
    fmul f6, f1, f2         # Scale to [0, 2]
    fsub f7, f6, f4         # Shift to [-1, 1]
    
    # Calculate distance from origin: d² = x² + y²
    fmul f8, f5, f5         # x²
    fmadd f9, f7, f7, f8    # x² + y²
    
    # Check if point is inside unit circle (d² < 1)
    fcmpu cr0, f9, f4       # Compare d² with 1.0
    bge outside_circle      # Branch if >= 1.0
    
    addi r5, r5, 1          # Increment inside count

outside_circle:
    addi r6, r6, 1          # Next sample
    b monte_carlo_loop

monte_carlo_done:
    # Estimate π = 4 * (inside_count / total_samples)
    stw r5, temp_inside(r1)
    lfs f10, temp_inside(r1) # Inside count as float
    stw r4, temp_total(r1)
    lfs f11, temp_total(r1)  # Total samples as float
    
    fdiv f12, f10, f11      # Ratio of inside points
    lfd f13, four_constant(r0) # Load 4.0
    fmul f14, f12, f13      # π estimate
    
    stfd f14, pi_estimate(r0) # Store π estimate

Machine Learning - Neural Network

# Load neural network weights and biases (double precision)
lis r3, weight_matrix@ha
addi r3, r3, weight_matrix@l
lis r4, bias_vector@ha
addi r4, r4, bias_vector@l
lis r5, input_vector@ha
addi r5, r5, input_vector@l
lwz r6, layer_size(r0)      # Number of neurons
li r7, 0                    # Current neuron

neural_layer_loop:
    cmpw r7, r6             # Check if done with layer
    bge layer_done
    
    # Load bias for current neuron
    slwi r8, r7, 3          # neuron * 8 bytes
    lfdx f1, r4, r8         # Load bias
    
    # Calculate weighted sum: sum = bias + Σ(weight * input)
    lwz r9, input_size(r0)  # Number of inputs
    li r10, 0               # Input index
    
weight_sum_loop:
    cmpw r10, r9            # Check if done with inputs
    bge weight_sum_done
    
    # Calculate weight matrix address: weights[neuron][input]
    mullw r11, r7, r9       # neuron * input_size
    add r11, r11, r10       # + input_index
    slwi r11, r11, 3        # * 8 bytes
    lfdx f2, r3, r11        # Load weight
    
    # Load input value
    slwi r12, r10, 3        # input * 8 bytes
    lfdx f3, r5, r12        # Load input
    
    # Multiply and accumulate
    fmadd f1, f2, f3, f1    # sum += weight * input
    
    addi r10, r10, 1        # Next input
    b weight_sum_loop

weight_sum_done:
    # Apply activation function (sigmoid)
    # sigmoid(x) = 1 / (1 + e^(-x))
    fneg f4, f1             # -x
    bl exp_function         # e^(-x)
    lfd f5, one_constant(r0) # Load 1.0
    fadd f6, f1, f5         # 1 + e^(-x)
    fdiv f7, f5, f6         # 1 / (1 + e^(-x))
    
    # Store neuron output
    lis r13, output_vector@ha
    addi r13, r13, output_vector@l
    slwi r14, r7, 3         # neuron * 8 bytes
    stfdx f7, r13, r14      # Store output
    
    addi r7, r7, 1          # Next neuron
    b neural_layer_loop

layer_done:

Related Instructions

lfdu, lfdx, lfdux, lfs, stfd, lwz

Back to Index