LFS - Load Floating-Point Single | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
lfs	frD,d(rA)	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	110000 (0x30)
frD	6-10	Destination floating-point register
rA	11-15	Source register A
d	16-31	16-bit signed displacement

Operation

if rA = 0 then EA ← EXTS(d)
else EA ← (rA) + EXTS(d)
frD ← DOUBLE(MEM(EA, 4))

A single-precision floating-point value (32 bits) is loaded from memory and converted to double-precision format, then placed in floating-point register frD. The effective address is computed by adding the sign-extended displacement to the contents of register rA, or zero if rA is 0.

Note: The loaded single-precision value is automatically converted to double-precision format according to IEEE-754 standard. The effective address must be word-aligned (divisible by 4). If rA=0, it is treated as the value 0, not the contents of register r0.

Affected Registers

None - This instruction does not affect any condition register fields or XER register bits.

For more information on floating-point operations see Section 2.1.4, "Floating-Point Status and Control Register (FPSCR)," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Basic Single-Precision Loading

lfs f1, 0(r3)           # Load single-precision float from r3
lfs f2, 100(r4)         # Load from r4+100
lfs f3, -50(r5)         # Load from r5-50

3D Vector Loading

# Load 3D vector components (x, y, z)
lis r3, vector_data@ha
addi r3, r3, vector_data@l

lfs f1, 0(r3)           # Load X component
lfs f2, 4(r3)           # Load Y component  
lfs f3, 8(r3)           # Load Z component

# Calculate vector magnitude: |v| = sqrt(x² + y² + z²)
fmul f4, f1, f1         # f4 = x²
fmadd f4, f2, f2, f4    # f4 = x² + y²
fmadd f4, f3, f3, f4    # f4 = x² + y² + z²
fsqrt f5, f4            # f5 = magnitude

Matrix Loading (4x4)

# Load 4x4 transformation matrix
lis r3, transform_matrix@ha
addi r3, r3, transform_matrix@l

# Load first row
lfs f0, 0(r3)           # m[0][0]
lfs f1, 4(r3)           # m[0][1]
lfs f2, 8(r3)           # m[0][2]
lfs f3, 12(r3)          # m[0][3]

# Load second row
lfs f4, 16(r3)          # m[1][0]
lfs f5, 20(r3)          # m[1][1]
lfs f6, 24(r3)          # m[1][2]
lfs f7, 28(r3)          # m[1][3]

# Load third row
lfs f8, 32(r3)          # m[2][0]
lfs f9, 36(r3)          # m[2][1]
lfs f10, 40(r3)         # m[2][2]
lfs f11, 44(r3)         # m[2][3]

# Load fourth row
lfs f12, 48(r3)         # m[3][0]
lfs f13, 52(r3)         # m[3][1]
lfs f14, 56(r3)         # m[3][2]
lfs f15, 60(r3)         # m[3][3]

Audio Processing

# Load and process audio samples
lis r3, audio_samples@ha
addi r3, r3, audio_samples@l
lwz r4, sample_count(r0)
li r5, 0                # Sample index

process_audio:
    slwi r6, r5, 2      # Convert to byte offset (4 bytes per float)
    lfsx f1, r3, r6     # Load audio sample

    # Apply audio effects
    lfs f2, volume_factor(r0)   # Load volume multiplier
    fmuls f1, f1, f2            # Apply volume
    
    lfs f3, low_pass_coeff(r0)  # Load filter coefficient
    fmuls f1, f1, f3            # Apply low-pass filter
    
    # Store processed sample back
    stfsx f1, r3, r6
    
    addi r5, r5, 1      # Next sample
    cmpw r5, r4         # Check if done
    blt process_audio   # Continue processing

Physics Simulation

# Load particle physics data
lis r3, particle_array@ha
addi r3, r3, particle_array@l
li r4, 0                # Particle index
lwz r5, particle_count(r0)

update_particles:
    li r6, 32           # sizeof(Particle) = 8 floats * 4 bytes
    mullw r7, r4, r6    # Calculate particle offset
    add r8, r3, r7      # Particle address
    
    # Load position
    lfs f1, 0(r8)       # x position
    lfs f2, 4(r8)       # y position
    lfs f3, 8(r8)       # z position
    
    # Load velocity
    lfs f4, 12(r8)      # x velocity
    lfs f5, 16(r8)      # y velocity
    lfs f6, 20(r8)      # z velocity
    
    # Load mass and time delta
    lfs f7, 24(r8)      # mass
    lfs f8, time_delta(r0) # delta time
    
    # Update position: pos = pos + vel * dt
    fmadd f1, f4, f8, f1    # x = x + vx * dt
    fmadd f2, f5, f8, f2    # y = y + vy * dt
    fmadd f3, f6, f8, f3    # z = z + vz * dt
    
    # Store updated position
    stfs f1, 0(r8)      # Store x
    stfs f2, 4(r8)      # Store y
    stfs f3, 8(r8)      # Store z
    
    addi r4, r4, 1      # Next particle
    cmpw r4, r5         # Check if done
    blt update_particles

Graphics Lighting Calculations

# Load lighting parameters and calculate illumination
lis r3, light_params@ha
addi r3, r3, light_params@l
lis r4, surface_normal@ha
addi r4, r4, surface_normal@l

# Load light direction
lfs f1, 0(r3)           # Light direction X
lfs f2, 4(r3)           # Light direction Y  
lfs f3, 8(r3)           # Light direction Z

# Load surface normal
lfs f4, 0(r4)           # Normal X
lfs f5, 4(r4)           # Normal Y
lfs f6, 8(r4)           # Normal Z

# Calculate dot product (N · L)
fmul f7, f1, f4         # nx * lx
fmadd f7, f2, f5, f7    # nx*lx + ny*ly
fmadd f7, f3, f6, f7    # nx*lx + ny*ly + nz*lz

# Clamp to [0, 1]
lfs f8, zero_constant(r0)   # Load 0.0
fcmpu cr0, f7, f8       # Compare with 0
bge positive_dot
fmr f7, f8              # Use 0 if negative

positive_dot:
lfs f9, one_constant(r0)    # Load 1.0
fcmpu cr0, f7, f9       # Compare with 1
ble dot_in_range
fmr f7, f9              # Use 1 if > 1

dot_in_range:
# f7 now contains the lighting factor

Digital Signal Processing

# Load and process signal samples for FFT
lis r3, signal_buffer@ha
addi r3, r3, signal_buffer@l
li r4, 1024             # FFT size
li r5, 0                # Sample index

load_fft_input:
    slwi r6, r5, 2      # Convert to byte offset
    lfsx f1, r3, r6     # Load real part
    
    # Apply window function (Hanning window)
    # w(n) = 0.5 * (1 - cos(2*pi*n/N))
    
    # Convert index to float
    stw r5, temp_index(r1)
    lfs f2, temp_index(r1)
    
    # Calculate window coefficient
    lfs f3, pi_constant(r0)     # Load π
    fmuls f4, f3, f2            # π * n
    fadds f4, f4, f4            # 2π * n
    stw r4, temp_size(r1)
    lfs f5, temp_size(r1)       # Load N as float
    fdivs f4, f4, f5            # 2π * n / N
    
    # Calculate cos(2πn/N) - would need cos implementation
    bl calculate_cos            # f4 = cos(2πn/N)
    
    lfs f6, one_constant(r0)    # Load 1.0
    fsubs f7, f6, f4            # 1 - cos(2πn/N)
    lfs f8, half_constant(r0)   # Load 0.5
    fmuls f7, f7, f8            # window coefficient
    
    # Apply window
    fmuls f1, f1, f7            # windowed sample
    
    # Store for FFT processing
    stfs f1, fft_input(r6)
    
    addi r5, r5, 1      # Next sample
    cmpw r5, r4         # Check if done
    blt load_fft_input

Game Engine Transform Processing

# Load and apply 3D transformations for game objects
lis r3, game_object@ha
addi r3, r3, game_object@l

# Load object position
lfs f1, OBJ_POS_X(r3)   # Object X position
lfs f2, OBJ_POS_Y(r3)   # Object Y position
lfs f3, OBJ_POS_Z(r3)   # Object Z position

# Load rotation angles (Euler angles)
lfs f4, OBJ_ROT_X(r3)   # Rotation around X axis
lfs f5, OBJ_ROT_Y(r3)   # Rotation around Y axis
lfs f6, OBJ_ROT_Z(r3)   # Rotation around Z axis

# Load scale factors
lfs f7, OBJ_SCALE_X(r3) # Scale factor X
lfs f8, OBJ_SCALE_Y(r3) # Scale factor Y
lfs f9, OBJ_SCALE_Z(r3) # Scale factor Z

# Apply transformations to world matrix
bl build_transform_matrix   # Build transform from pos/rot/scale
bl apply_camera_transform   # Apply camera transformation
bl apply_projection         # Apply projection matrix

# Result is transformed vertices ready for rendering

Financial Calculations

# Load financial data for calculations
lis r3, market_data@ha
addi r3, r3, market_data@l

lfs f1, STOCK_PRICE(r3)     # Current stock price
lfs f2, STRIKE_PRICE(r3)    # Option strike price
lfs f3, VOLATILITY(r3)      # Market volatility
lfs f4, RISK_FREE_RATE(r3)  # Risk-free interest rate
lfs f5, TIME_TO_EXPIRY(r3)  # Time to option expiration

# Calculate option value using Black-Scholes model
# (This is a simplified example)
fsubs f6, f1, f2            # S - K (stock - strike)
lfs f7, zero_constant(r0)   # Load 0.0
fcmpu cr0, f6, f7           # Compare with 0
bgt call_option_itm         # Branch if in-the-money

# Out of the money - option has time value only
bl calculate_time_value     # Calculate time value
b option_value_done

call_option_itm:
# In the money - intrinsic value + time value
bl calculate_intrinsic_value
bl calculate_time_value
fadds f8, f6, f7            # Total option value

option_value_done:
stfs f8, option_value(r3)   # Store calculated value

Machine Learning Weight Loading

# Load neural network weights for inference
lis r3, weight_matrix@ha
addi r3, r3, weight_matrix@l
lis r4, input_vector@ha
addi r4, r4, input_vector@l
lwz r5, layer_size(r0)      # Number of neurons in layer
li r6, 0                    # Neuron index

neural_network_layer:
    li r7, 0                # Weight index for current neuron
    lfs f1, zero_constant(r0) # Initialize accumulator

weight_multiply_accumulate:
    # Calculate weight address
    mullw r8, r6, r5        # neuron * layer_size
    add r8, r8, r7          # + weight_index
    slwi r8, r8, 2          # Convert to byte offset
    lfsx f2, r3, r8         # Load weight
    
    # Load corresponding input
    slwi r9, r7, 2          # Convert input index to byte offset
    lfsx f3, r4, r9         # Load input value
    
    # Multiply and accumulate
    fmadds f1, f2, f3, f1   # accumulator += weight * input
    
    addi r7, r7, 1          # Next weight
    cmpw r7, r5             # Check if done with this neuron
    blt weight_multiply_accumulate
    
    # Apply activation function (e.g., ReLU)
    lfs f4, zero_constant(r0)
    fcmpu cr0, f1, f4       # Compare with 0
    bge relu_positive
    fmr f1, f4              # Use 0 if negative
    
relu_positive:
    # Store neuron output
    slwi r10, r6, 2
    stfsx f1, r4, r10       # Store back to input vector for next layer
    
    addi r6, r6, 1          # Next neuron
    cmpw r6, r5             # Check if done with layer
    blt neural_network_layer

Related Instructions

lfsu, lfsx, lfsux, lfd, stfs, lwz

Back to Index