LWAUX - Load Word Algebraic with Update Indexed | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
lwaux	rD,rA,rB	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	011111 (0x1F)
rD	6-10	Destination register
rA	11-15	Source register A
rB	16-20	Source register B
XO	21-30	375 (Extended opcode)
Rc	31	Reserved (0)

Operation

EA ← (rA) + (rB)
rD ← EXTS(MEM(EA, 4))
rA ← EA

A word (32 bits) is loaded from memory, sign-extended to 64 bits, and placed in register rD. The effective address is computed by adding the contents of registers rA and rB. After the load, the effective address is stored back into register rA.

Note: This instruction cannot be used with rA=0. The update form requires a valid base register. This is the most advanced addressing mode for signed word loads, combining indexed addressing with automatic pointer advancement and sign extension to 64 bits. Essential for processing signed 32-bit data structures with dynamic stride patterns in 64-bit environments.

Affected Registers

rA - Updated with the effective address after the load operation.

For more information on memory addressing see Section 2.1.6, "Effective Address Calculation," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

64-bit Array Processing with Dynamic Strides

# Process 64-bit arrays with variable stride patterns
lis r3, data_array@ha
addi r3, r3, data_array@l
lis r4, stride_patterns@ha
addi r4, r4, stride_patterns@l
lwz r5, num_elements(r0)     # Number of elements

# Process array with dynamic stride advancement
array_processing_loop:
    # Load stride for current element
    lwz r6, 0(r4)           # Load stride value
    
    # Load signed 32-bit data with automatic advancement
    lwaux r7, r3, r6        # Load data and advance by stride
    
    # Process 64-bit arithmetic (value is sign-extended)
    # Calculate running average with 64-bit precision
    lwz r8, running_sum(r0) # Load current running sum
    add r9, r8, r7          # Add new value (64-bit addition)
    stw r9, running_sum(r0) # Store updated sum
    
    # Calculate variance components
    lwz r10, count(r0)      # Load current count
    addi r11, r10, 1        # Increment count
    stw r11, count(r0)      # Store updated count
    
    # Calculate mean: sum / count
    divw r12, r9, r11       # mean = sum / count
    
    # Calculate squared difference: (value - mean)²
    sub r13, r7, r12        # value - mean (64-bit subtraction)
    mullw r14, r13, r13     # (value - mean)² (64-bit multiplication)
    
    # Update variance accumulator
    lwz r15, variance_sum(r0) # Load current variance sum
    add r16, r15, r14       # Add squared difference
    stw r16, variance_sum(r0) # Store updated variance sum
    
    addi r4, r4, 4          # Next stride pattern
    subi r5, r5, 1          # Decrement element counter
    cmpwi r5, 0
    bne array_processing_loop # Continue processing

Advanced Memory Management - Dynamic Allocation

# Manage dynamic memory allocation with 64-bit addressing
lis r3, memory_pool@ha
addi r3, r3, memory_pool@l
lis r4, allocation_sizes@ha
addi r4, r4, allocation_sizes@l
lwz r5, num_allocations(r0) # Number of memory allocations

# Dynamic memory allocation with automatic pointer advancement
allocation_loop:
    # Load allocation size for current request
    lwz r6, 0(r4)           # Load allocation size
    
    # Load current pool pointer with automatic advancement
    lwaux r7, r3, r6        # Load pool status and advance by allocation size
    
    # Check if allocation was successful
    cmpwi r7, 0             # Check allocation status
    blt allocation_failed   # Branch if allocation failed
    
    # Successful allocation - initialize memory block
    # Calculate block header address
    sub r8, r3, r6          # Calculate header address
    stw r6, 0(r8)           # Store block size in header
    
    # Initialize memory block with pattern
    li r9, 0                # Initialization counter
    mr r10, r8              # Current address for initialization
    
init_loop:
    cmpw r9, r6             # Check if initialization complete
    bge allocation_complete # Exit if done
    
    stw r9, 0(r10)          # Store initialization pattern
    addi r10, r10, 4        # Next word address
    addi r9, r9, 1          # Increment counter
    b init_loop             # Continue initialization

allocation_complete:
    # Mark allocation as successful
    li r11, 1               # Success status
    stw r11, allocation_status(r0)
    b next_allocation

allocation_failed:
    # Handle allocation failure
    li r12, -1              # Failure status
    stw r12, allocation_status(r0)
    bl handle_allocation_failure

next_allocation:
    addi r4, r4, 4          # Next allocation size
    subi r5, r5, 1          # Decrement allocation counter
    cmpwi r5, 0
    bne allocation_loop     # Continue allocations

64-bit Database Record Processing

# Process database records with variable field sizes
lis r3, record_buffer@ha
addi r3, r3, record_buffer@l
lis r4, field_offsets@ha
addi r4, r4, field_offsets@l
lwz r5, num_records(r0)     # Number of records to process

# Process database records with dynamic field access
record_processing_loop:
    # Load record type to determine field layout
    lwz r6, 0(r4)           # Load record type offset
    lwaux r7, r3, r6        # Load record type and advance
    
    # Process based on record type
    cmpwi r7, RECORD_TYPE_A
    beq process_type_a
    cmpwi r7, RECORD_TYPE_B
    beq process_type_b
    cmpwi r7, RECORD_TYPE_C
    beq process_type_c
    b unknown_record_type

process_type_a:
    # Type A record: [type, id, name, value, timestamp]
    lwz r8, 4(r4)           # Load ID field offset
    lwaux r9, r3, r8        # Load ID and advance
    
    lwz r10, 8(r4)          # Load name field offset
    lwaux r11, r3, r10      # Load name pointer and advance
    
    lwz r12, 12(r4)         # Load value field offset
    lwaux r13, r3, r12      # Load signed value and advance
    
    lwz r14, 16(r4)         # Load timestamp field offset
    lwaux r15, r3, r14      # Load timestamp and advance
    
    # Process Type A record
    bl process_type_a_record
    b record_complete

process_type_b:
    # Type B record: [type, id, data_array, checksum]
    lwz r8, 4(r4)           # Load ID field offset
    lwaux r9, r3, r8        # Load ID and advance
    
    lwz r10, 8(r4)          # Load data array offset
    lwaux r11, r3, r10      # Load data array pointer and advance
    
    lwz r12, 12(r4)         # Load checksum field offset
    lwaux r13, r3, r12      # Load checksum and advance
    
    # Process Type B record
    bl process_type_b_record
    b record_complete

process_type_c:
    # Type C record: [type, id, metadata, payload]
    lwz r8, 4(r4)           # Load ID field offset
    lwaux r9, r3, r8        # Load ID and advance
    
    lwz r10, 8(r4)          # Load metadata offset
    lwaux r11, r3, r10      # Load metadata and advance
    
    lwz r12, 12(r4)         # Load payload offset
    lwaux r13, r3, r12      # Load payload pointer and advance
    
    # Process Type C record
    bl process_type_c_record
    b record_complete

unknown_record_type:
    bl handle_unknown_record_type

record_complete:
    addi r4, r4, 20         # Next field offset set (5 fields * 4 bytes)
    subi r5, r5, 1          # Decrement record counter
    cmpwi r5, 0
    bne record_processing_loop # Continue processing

64-bit Graphics Pipeline - Vertex Processing

# Process 3D graphics vertices with dynamic attribute strides
lis r3, vertex_buffer@ha
addi r3, r3, vertex_buffer@l
lis r4, attribute_strides@ha
addi r4, r4, attribute_strides@l
lwz r5, num_vertices(r0)    # Number of vertices to process

# Process vertices with variable attribute layouts
vertex_processing_loop:
    # Load position attribute with automatic advancement
    lwz r6, 0(r4)           # Load position stride
    lwaux r7, r3, r6        # Load X coordinate and advance
    lwaux r8, r3, r6        # Load Y coordinate and advance
    lwaux r9, r3, r6        # Load Z coordinate and advance
    
    # Apply 64-bit transformation matrix
    # Load transformation matrix elements
    lis r10, transform_matrix@ha
    addi r10, r10, transform_matrix@l
    
    # Transform X coordinate: new_x = m11*x + m12*y + m13*z + m14
    lwz r11, 0(r10)         # Load m11
    mullw r12, r7, r11      # m11 * x
    lwz r13, 4(r10)         # Load m12
    mullw r14, r8, r13      # m12 * y
    add r15, r12, r14       # m11*x + m12*y
    lwz r16, 8(r10)         # Load m13
    mullw r17, r9, r16      # m13 * z
    add r18, r15, r17       # m11*x + m12*y + m13*z
    lwz r19, 12(r10)        # Load m14
    add r20, r18, r19       # new_x = m11*x + m12*y + m13*z + m14
    
    # Transform Y coordinate: new_y = m21*x + m22*y + m23*z + m24
    lwz r21, 16(r10)        # Load m21
    mullw r22, r7, r21      # m21 * x
    lwz r23, 20(r10)        # Load m22
    mullw r24, r8, r23      # m22 * y
    add r25, r22, r24       # m21*x + m22*y
    lwz r26, 24(r10)        # Load m23
    mullw r27, r9, r26      # m23 * z
    add r28, r25, r27       # m21*x + m22*y + m23*z
    lwz r29, 28(r10)        # Load m24
    add r30, r28, r29       # new_y = m21*x + m22*y + m23*z + m24
    
    # Transform Z coordinate: new_z = m31*x + m32*y + m33*z + m34
    lwz r31, 32(r10)        # Load m31
    mullw r0, r7, r31       # m31 * x
    lwz r1, 36(r10)         # Load m32
    mullw r2, r8, r1        # m32 * y
    add r3, r0, r2          # m31*x + m32*y
    lwz r4, 40(r10)         # Load m33
    mullw r5, r9, r4        # m33 * z
    add r6, r3, r5          # m31*x + m32*y + m33*z
    lwz r7, 44(r10)         # Load m34
    add r8, r6, r7          # new_z = m31*x + m32*y + m33*z + m34
    
    # Store transformed coordinates
    stw r20, transformed_x(r0)  # Store new X
    stw r30, transformed_y(r0)  # Store new Y
    stw r8, transformed_z(r0)   # Store new Z
    
    # Load normal attributes if present
    lwz r9, 4(r4)           # Load normal stride
    cmpwi r9, 0             # Check if normals present
    beq skip_normals
    
    lwaux r10, r3, r9       # Load normal X and advance
    lwaux r11, r3, r9       # Load normal Y and advance
    lwaux r12, r3, r9       # Load normal Z and advance
    
    # Transform normals (simplified - no translation)
    # Normal transformation requires matrix inversion and transposition
    bl transform_normal_vector
    
skip_normals:
    # Load texture coordinates if present
    lwz r13, 8(r4)          # Load texture stride
    cmpwi r13, 0            # Check if texture coords present
    beq skip_texture
    
    lwaux r14, r3, r13      # Load texture U and advance
    lwaux r15, r3, r13      # Load texture V and advance
    
    # Store texture coordinates
    stw r14, texture_u(r0)  # Store U coordinate
    stw r15, texture_v(r0)  # Store V coordinate
    
skip_texture:
    # Send vertex to graphics pipeline
    bl send_vertex_to_pipeline
    
    addi r4, r4, 12         # Next attribute stride set (3 attributes * 4 bytes)
    subi r5, r5, 1          # Decrement vertex counter
    cmpwi r5, 0
    bne vertex_processing_loop # Continue processing

64-bit Network Protocol Processing

# Process network packets with variable header structures
lis r3, packet_buffer@ha
addi r3, r3, packet_buffer@l
lis r4, header_layouts@ha
addi r4, r4, header_layouts@l
lwz r5, num_packets(r0)     # Number of packets to process

# Process network packets with dynamic header parsing
packet_processing_loop:
    # Load packet version to determine header layout
    lwz r6, 0(r4)           # Load version field offset
    lwaux r7, r3, r6        # Load packet version and advance
    
    # Process based on protocol version
    cmpwi r7, PROTOCOL_V1
    beq process_v1_packet
    cmpwi r7, PROTOCOL_V2
    beq process_v2_packet
    cmpwi r7, PROTOCOL_V3
    beq process_v3_packet
    b unknown_protocol

process_v1_packet:
    # V1 packet: [version, length, source, destination, data]
    lwz r8, 4(r4)           # Load length field offset
    lwaux r9, r3, r8        # Load packet length and advance
    
    lwz r10, 8(r4)          # Load source field offset
    lwaux r11, r3, r10      # Load source address and advance
    
    lwz r12, 12(r4)         # Load destination field offset
    lwaux r13, r3, r12      # Load destination address and advance
    
    lwz r14, 16(r4)         # Load data field offset
    lwaux r15, r3, r14      # Load data pointer and advance
    
    # Process V1 packet
    bl process_v1_packet_data
    b packet_complete

process_v2_packet:
    # V2 packet: [version, length, source, destination, flags, data]
    lwz r8, 4(r4)           # Load length field offset
    lwaux r9, r3, r8        # Load packet length and advance
    
    lwz r10, 8(r4)          # Load source field offset
    lwaux r11, r3, r10      # Load source address and advance
    
    lwz r12, 12(r4)         # Load destination field offset
    lwaux r13, r3, r12      # Load destination address and advance
    
    lwz r14, 16(r4)         # Load flags field offset
    lwaux r15, r3, r14      # Load flags and advance
    
    lwz r16, 20(r4)         # Load data field offset
    lwaux r17, r3, r16      # Load data pointer and advance
    
    # Process V2 packet
    bl process_v2_packet_data
    b packet_complete

process_v3_packet:
    # V3 packet: [version, length, source, destination, flags, timestamp, data]
    lwz r8, 4(r4)           # Load length field offset
    lwaux r9, r3, r8        # Load packet length and advance
    
    lwz r10, 8(r4)          # Load source field offset
    lwaux r11, r3, r10      # Load source address and advance
    
    lwz r12, 12(r4)         # Load destination field offset
    lwaux r13, r3, r12      # Load destination address and advance
    
    lwz r14, 16(r4)         # Load flags field offset
    lwaux r15, r3, r14      # Load flags and advance
    
    lwz r16, 20(r4)         # Load timestamp field offset
    lwaux r17, r3, r16      # Load timestamp and advance
    
    lwz r18, 24(r4)         # Load data field offset
    lwaux r19, r3, r18      # Load data pointer and advance
    
    # Process V3 packet
    bl process_v3_packet_data
    b packet_complete

unknown_protocol:
    bl handle_unknown_protocol

packet_complete:
    addi r4, r4, 28         # Next header layout (7 fields * 4 bytes)
    subi r5, r5, 1          # Decrement packet counter
    cmpwi r5, 0
    bne packet_processing_loop # Continue processing

Related Instructions

lwa, lwax, stwux, lwzux, lhaux, lfdux

Back to Index