lhaux
Instruction Syntax
| Mnemonic | Format | Flags |
| lhaux | rD,rA,rB | - |
Instruction Encoding
| Field | Bits | Description |
| Primary Opcode | 0-5 | 011111 (0x1F) |
| rD | 6-10 | Destination register |
| rA | 11-15 | Source register A |
| rB | 16-20 | Source register B |
| XO | 21-30 | 375 (Extended opcode) |
| Rc | 31 | Reserved (0) |
Operation
EA ← (rA) + (rB) rD ← EXTS(MEM(EA, 2)) rA ← EA
A halfword (16 bits) is loaded from memory, sign-extended to 32 bits, and placed in register rD. The effective address is computed by adding the contents of registers rA and rB. After the load, the effective address is stored back into register rA.
Note: This instruction cannot be used with rA=0. The update form requires a valid base register. This is the most advanced addressing mode for signed halfword loads, combining indexed addressing with automatic pointer advancement and sign extension. Essential for processing signed 16-bit data structures with dynamic stride patterns.
Affected Registers
rA - Updated with the effective address after the load operation.
For more information on memory addressing see Section 2.1.6, "Effective Address Calculation," in the PowerPC Microprocessor Family: The Programming Environments manual.
Examples
Advanced Audio Processing - Multi-Channel Convolution
# Process multi-channel audio with variable channel spacing
lis r3, multichannel_audio@ha
addi r3, r3, multichannel_audio@l
lis r4, channel_strides@ha
addi r4, r4, channel_strides@l
lwz r5, num_samples(r0) # Number of samples per channel
lwz r6, num_channels(r0) # Number of audio channels
# Audio convolution with impulse response for room acoustics
multichannel_convolution_loop:
li r7, 0 # Channel index
channel_loop:
# Load channel stride (varies for different sample rates/formats)
lwz r8, 0(r4) # Load stride for this channel
# Load audio sample with dynamic channel advancement
lhaux r9, r3, r8 # Load signed sample and advance by stride
# Apply convolution with room impulse response
lis r10, impulse_response@ha
addi r10, r10, impulse_response@l
lwz r11, impulse_length(r0) # Length of impulse response
li r12, 0 # Convolution accumulator
li r13, 0 # Impulse sample index
mr r14, r3 # Current audio position for convolution
convolution_loop:
cmpw r13, r11 # Check if processed entire impulse
bge convolution_done
# Load impulse coefficient
slwi r15, r13, 1 # Convert index to byte offset
lhax r16, r10, r15 # Load signed impulse coefficient
# Load delayed audio sample
mullw r17, r13, r8 # Calculate delay offset
sub r18, r14, r17 # Calculate delayed sample address
# Bounds check for audio buffer
lis r19, multichannel_audio@ha
addi r19, r19, multichannel_audio@l
cmpw r18, r19 # Check if within buffer
blt skip_convolution # Skip if out of bounds
lha r20, 0(r18) # Load delayed audio sample
# Convolution: accumulate impulse * delayed_sample
mullw r21, r16, r20 # impulse_coeff * delayed_sample
add r12, r12, r21 # Add to convolution sum
skip_convolution:
addi r13, r13, 1 # Next impulse sample
b convolution_loop # Continue convolution
convolution_done:
# Scale convolution result
srawi r22, r12, 12 # Scale down (12-bit fractional)
# Apply dynamic range compression
abs r23, r22 # |convolved_sample|
lwz r24, compression_threshold(r0)
cmpw r23, r24 # Compare with threshold
ble no_compression # Skip if below threshold
# Apply soft compression: sign(x) * (threshold + (|x| - threshold) * ratio)
sub r25, r23, r24 # |x| - threshold
lwz r26, compression_ratio(r0) # Compression ratio (0-256)
mullw r27, r25, r26 # (|x| - threshold) * ratio
srawi r28, r27, 8 # Scale ratio
add r29, r24, r28 # threshold + compressed_amount
# Restore sign
cmpwi r22, 0 # Check original sign
bge positive_sample
neg r29, r29 # Apply negative sign
positive_sample:
mr r22, r29 # Use compressed sample
no_compression:
# Store processed sample back to buffer
sth r22, 0(r3) # Store convolved sample
addi r4, r4, 4 # Next channel stride
addi r7, r7, 1 # Next channel
cmpw r7, r6 # Check if done with all channels
blt channel_loop # Continue channels
subi r5, r5, 1 # Decrement sample counter
cmpwi r5, 0
bne multichannel_convolution_loop # Continue processing
Scientific Computing - Sparse Matrix Compressed Storage
# Process sparse matrix in Compressed Sparse Row (CSR) format
lis r3, csr_values@ha
addi r3, r3, csr_values@l
lis r4, row_offsets@ha
addi r4, r4, row_offsets@l
lis r5, column_indices@ha
addi r5, r5, column_indices@l
lwz r6, num_rows(r0) # Number of matrix rows
# Sparse matrix-vector multiplication: y = A * x
sparse_matvec_loop:
li r7, 0 # Current row index
row_processing_loop:
# Load row start and end offsets
slwi r8, r7, 2 # Row index to byte offset
lwzx r9, r4, r8 # Load row start offset
addi r10, r8, 4 # Next row offset address
lwzx r11, r4, r10 # Load row end offset
# Initialize row result
li r12, 0 # Row accumulator
# Process non-zero elements in this row
mr r13, r9 # Current element index
element_loop:
cmpw r13, r11 # Check if done with row
bge row_complete
# Load sparse matrix value with dynamic advancement
slwi r14, r13, 1 # Convert element index to byte offset
lhaux r15, r3, r14 # Load signed matrix value and advance
# Load corresponding column index
slwi r16, r13, 1 # Convert to byte offset for column
lhax r17, r5, r16 # Load column index (signed)
# Load vector element x[column]
lis r18, input_vector@ha
addi r18, r18, input_vector@l
slwi r19, r17, 1 # Convert column to byte offset
lhax r20, r18, r19 # Load x[column] (signed)
# Multiply-accumulate: row_sum += matrix_value * x[column]
mullw r21, r15, r20 # matrix_value * x[column]
add r12, r12, r21 # Add to row accumulator
addi r13, r13, 1 # Next element in row
b element_loop # Continue row processing
row_complete:
# Store row result in output vector
lis r22, output_vector@ha
addi r22, r22, output_vector@l
slwi r23, r7, 1 # Convert row to byte offset
sthx r12, r22, r23 # Store y[row] = row_sum
addi r7, r7, 1 # Next row
cmpw r7, r6 # Check if done with all rows
blt row_processing_loop # Continue matrix processing
Computer Graphics - Mesh Deformation
# Deform 3D mesh vertices using signed displacement vectors
lis r3, vertex_positions@ha
addi r3, r3, vertex_positions@l
lis r4, deformation_vectors@ha
addi r4, r4, deformation_vectors@l
lis r5, vertex_weights@ha
addi r5, r5, vertex_weights@l
lwz r6, num_vertices(r0) # Number of vertices
# Each vertex: [x, y, z] (signed 16-bit coordinates)
# Each deformation: [dx, dy, dz] (signed 16-bit displacements)
mesh_deformation_loop:
li r7, 0 # Vertex index
vertex_deformation_loop:
# Calculate vertex data addresses
slwi r8, r7, 3 # Vertex index * 8 (4 coordinates * 2 bytes)
slwi r9, r7, 3 # Same for deformation vectors
slwi r10, r7, 1 # Weight index * 2 bytes
# Load vertex weight for this deformation
lhax r11, r5, r10 # Load signed weight (0-32767)
# Load and deform X coordinate
lhaux r12, r3, r8 # Load vertex X and advance to next coordinate
li r13, 2 # Standard advance for next coordinate
lhaux r14, r4, r9 # Load deformation dX and advance
# Apply weighted deformation: new_x = old_x + (dx * weight / 32768)
mullw r15, r14, r11 # dx * weight
srawi r16, r15, 15 # Scale by 32768 (15-bit shift)
add r17, r12, r16 # new_x = old_x + scaled_dx
# Clamp to valid coordinate range
cmpwi r17, -32768 # Check lower bound
bge check_x_upper
li r17, -32768 # Clamp to minimum
check_x_upper:
cmpwi r17, 32767 # Check upper bound
ble store_new_x
li r17, 32767 # Clamp to maximum
store_new_x:
sth r17, -2(r3) # Store new X coordinate
# Load and deform Y coordinate
lhaux r18, r3, r13 # Load vertex Y and advance
lhaux r19, r4, r13 # Load deformation dY and advance
# Apply weighted deformation for Y
mullw r20, r19, r11 # dy * weight
srawi r21, r20, 15 # Scale by 32768
add r22, r18, r21 # new_y = old_y + scaled_dy
# Clamp Y coordinate
cmpwi r22, -32768 # Check lower bound
bge check_y_upper
li r22, -32768 # Clamp to minimum
check_y_upper:
cmpwi r22, 32767 # Check upper bound
ble store_new_y
li r22, 32767 # Clamp to maximum
store_new_y:
sth r22, -2(r3) # Store new Y coordinate
# Load and deform Z coordinate
lhaux r23, r3, r13 # Load vertex Z and advance
lhaux r24, r4, r13 # Load deformation dZ and advance
# Apply weighted deformation for Z
mullw r25, r24, r11 # dz * weight
srawi r26, r25, 15 # Scale by 32768
add r27, r23, r26 # new_z = old_z + scaled_dz
# Clamp Z coordinate
cmpwi r27, -32768 # Check lower bound
bge check_z_upper
li r27, -32768 # Clamp to minimum
check_z_upper:
cmpwi r27, 32767 # Check upper bound
ble store_new_z
li r27, 32767 # Clamp to maximum
store_new_z:
sth r27, -2(r3) # Store new Z coordinate
# Calculate surface normal update (simplified)
# For deformed mesh, recalculate normals based on adjacent vertices
bl recalculate_vertex_normal # Update normal for this vertex
addi r7, r7, 1 # Next vertex
cmpw r7, r6 # Check if done with all vertices
blt vertex_deformation_loop # Continue vertex processing
Signal Processing - Adaptive Filter
# Implement adaptive LMS (Least Mean Squares) filter
lis r3, input_signal@ha
addi r3, r3, input_signal@l
lis r4, adaptive_coeffs@ha
addi r4, r4, adaptive_coeffs@l
lis r5, step_sizes@ha
addi r5, r5, step_sizes@l
lwz r6, signal_length(r0) # Number of signal samples
lwz r7, filter_order(r0) # Number of adaptive filter taps
adaptive_filter_loop:
# Calculate filter output using current coefficients
li r8, 0 # Filter output accumulator
li r9, 0 # Tap index
mr r10, r3 # Current signal position
mr r11, r4 # Current coefficient position
filter_convolution:
cmpw r9, r7 # Check if done with all taps
bge calculate_error
# Load signal sample and coefficient with adaptive step
slwi r12, r9, 1 # Convert tap index to byte offset
lhaux r13, r10, r12 # Load signal sample and advance
lhaux r14, r11, r12 # Load coefficient and advance
# Multiply and accumulate
mullw r15, r13, r14 # sample * coefficient
add r8, r8, r15 # Add to filter output
addi r9, r9, 1 # Next tap
b filter_convolution # Continue convolution
calculate_error:
# Load desired response and calculate error
lis r16, desired_signal@ha
addi r16, r16, desired_signal@l
sub r17, r6, 1 # Calculate current sample index
slwi r18, r17, 1 # Convert to byte offset
lhax r19, r16, r18 # Load desired response
# Calculate error: e = desired - actual
srawi r20, r8, 8 # Scale filter output
sub r21, r19, r20 # error = desired - actual
# Update adaptive coefficients using LMS algorithm
# coeff_new = coeff_old + step_size * error * input
li r22, 0 # Tap index for coefficient update
mr r23, r3 # Reset signal position
mr r24, r4 # Reset coefficient position
mr r25, r5 # Reset step size position
coefficient_update_loop:
cmpw r22, r7 # Check if updated all coefficients
bge adaptation_complete
# Load current input sample, coefficient, and step size
slwi r26, r22, 1 # Convert index to byte offset
lhaux r27, r23, r26 # Load input sample and advance
lhaux r28, r24, r26 # Load current coefficient and advance
lhaux r29, r25, r26 # Load step size and advance
# Calculate coefficient update: delta = step_size * error * input
mullw r30, r29, r21 # step_size * error
mullw r31, r30, r27 # * input
srawi r0, r31, 12 # Scale down (12-bit fractional)
# Update coefficient: new_coeff = old_coeff + delta
add r1, r28, r0 # new_coefficient
# Clip coefficient to prevent overflow
cmpwi r1, 32767 # Check upper bound
ble check_coeff_lower
li r1, 32767 # Clip to maximum
check_coeff_lower:
cmpwi r1, -32768 # Check lower bound
bge store_new_coeff
li r1, -32768 # Clip to minimum
store_new_coeff:
sth r1, -2(r24) # Store updated coefficient
addi r22, r22, 1 # Next coefficient
b coefficient_update_loop # Continue coefficient updates
adaptation_complete:
# Calculate convergence metric: |error|
abs r2, r21 # |error|
lwz r3, convergence_threshold(r0)
cmpw r2, r3 # Check if converged
blt filter_converged # Branch if converged
# Continue adaptation
b next_sample
filter_converged:
# Filter has converged - could switch to faster non-adaptive mode
bl switch_to_fixed_mode
next_sample:
# Advance to next signal sample
addi r3, r3, 2 # Next input sample
subi r6, r6, 1 # Decrement sample counter
cmpwi r6, 0
bne adaptive_filter_loop # Continue adaptive filtering
Bioinformatics - DNA Sequence Analysis
# Analyze DNA sequences with variable scoring matrices
lis r3, dna_sequence1@ha
addi r3, r3, dna_sequence1@l
lis r4, dna_sequence2@ha
addi r4, r4, dna_sequence2@l
lis r5, scoring_matrix@ha
addi r5, r5, scoring_matrix@l
lwz r6, sequence1_length(r0) # Length of first sequence
lwz r7, sequence2_length(r0) # Length of second sequence
# DNA bases encoded as: A=0, T=1, G=2, C=3 (signed values for gap penalties)
# Scoring matrix: 4x4 matrix of alignment scores
sequence_alignment_loop:
li r8, 0 # Position in sequence 1
seq1_loop:
li r9, 0 # Position in sequence 2
seq2_loop:
# Load nucleotides from both sequences
slwi r10, r8, 1 # Convert position to byte offset
lhaux r11, r3, r10 # Load nucleotide from sequence 1 and advance
slwi r12, r9, 1 # Convert position to byte offset
lhaux r13, r4, r12 # Load nucleotide from sequence 2 and advance
# Handle gap penalties (negative values indicate gaps)
cmpwi r11, 0 # Check for gap in sequence 1
blt gap_penalty_seq1
cmpwi r13, 0 # Check for gap in sequence 2
blt gap_penalty_seq2
# Normal nucleotide comparison
# Calculate scoring matrix index: score = matrix[base1][base2]
slwi r14, r11, 3 # base1 * 8 (4 bases * 2 bytes per entry)
slwi r15, r13, 1 # base2 * 2 bytes
add r16, r14, r15 # Matrix offset
lhaux r17, r5, r16 # Load alignment score and advance
b score_loaded
gap_penalty_seq1:
# Gap in sequence 1
abs r18, r11 # |gap_length|
lwz r19, gap_penalty(r0) # Gap penalty per position
mullw r17, r18, r19 # gap_length * penalty
neg r17, r17 # Negative penalty
b score_loaded
gap_penalty_seq2:
# Gap in sequence 2
abs r18, r13 # |gap_length|
lwz r19, gap_penalty(r0) # Gap penalty per position
mullw r17, r18, r19 # gap_length * penalty
neg r17, r17 # Negative penalty
score_loaded:
# Store alignment score
lis r20, alignment_scores@ha
addi r20, r20, alignment_scores@l
mullw r21, r8, r7 # seq1_pos * seq2_length
add r22, r21, r9 # + seq2_pos
slwi r23, r22, 1 # Convert to byte offset
sthx r17, r20, r23 # Store alignment score
# Update optimal alignment path (dynamic programming)
bl update_alignment_path # Update traceback matrix
addi r9, r9, 1 # Next position in sequence 2
cmpw r9, r7 # Check if done with sequence 2
blt seq2_loop # Continue sequence 2
addi r8, r8, 1 # Next position in sequence 1
cmpw r8, r6 # Check if done with sequence 1
blt seq1_loop # Continue sequence 1
# Find optimal alignment score
bl find_optimal_alignment # Traceback to find best alignment