LHAU - Load Halfword Algebraic with Update | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
lhau	rD,d(rA)	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	101011 (0x2B)
rD	6-10	Destination register
rA	11-15	Source register A
d	16-31	16-bit signed displacement

Operation

EA ← (rA) + EXTS(d)
rD ← EXTS(MEM(EA, 2))
rA ← EA

A halfword (16 bits) is loaded from memory, sign-extended to 32 bits, and placed in register rD. The effective address is computed by adding the sign-extended displacement to the contents of register rA. After the load, the effective address is stored back into register rA.

Note: This instruction cannot be used with rA=0. The update form requires a valid base register. The destination register rD can be the same as rA, in which case the loaded data takes precedence over the address update. This instruction is essential for sequential processing of signed 16-bit data arrays with automatic pointer advancement.

Affected Registers

rA - Updated with the effective address after the load operation.

For more information on memory addressing see Section 2.1.6, "Effective Address Calculation," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Sequential Audio Sample Processing

# Process signed 16-bit audio samples with automatic advance
lis r3, audio_stream@ha
addi r3, r3, audio_stream@l
lwz r4, num_samples(r0)     # Number of audio samples
subi r3, r3, 2              # Pre-adjust for first lhau

audio_processing_loop:
    lhau r5, 2(r3)          # Load signed audio sample and advance pointer
    
    # Apply digital audio effects
    # Echo effect: output = input + (delayed_input * echo_strength)
    lwz r6, echo_delay_samples(r0) # Echo delay in samples
    mullw r7, r6, 2         # Convert to byte offset
    sub r8, r3, r7          # Calculate delayed sample address
    
    # Check bounds for echo buffer
    lis r9, audio_stream@ha
    addi r9, r9, audio_stream@l
    cmpw r8, r9             # Check if within buffer
    blt no_echo             # Skip echo if out of bounds
    
    lha r10, 0(r8)          # Load delayed sample
    lwz r11, echo_strength(r0) # Load echo strength (0-256)
    mullw r12, r10, r11     # delayed_sample * echo_strength
    srawi r13, r12, 8       # Scale down (8-bit fractional)
    add r14, r5, r13        # Add echo to current sample
    
    # Clipping to prevent overflow
    cmpwi r14, 32767        # Check upper bound
    ble check_lower_clip
    li r14, 32767           # Clip to maximum

check_lower_clip:
    cmpwi r14, -32768       # Check lower bound
    bge store_processed
    li r14, -32768          # Clip to minimum
    b store_processed

no_echo:
    mr r14, r5              # No echo processing

store_processed:
    sth r14, 0(r3)          # Store processed sample
    
    subi r4, r4, 1          # Decrement sample counter
    cmpwi r4, 0
    bne audio_processing_loop # Continue processing

Digital Image Filter - Sobel Edge Detection

# Apply Sobel edge detection to signed 16-bit image data
lis r3, image_data@ha
addi r3, r3, image_data@l
lwz r4, image_width(r0)     # Image width in pixels
lwz r5, image_height(r0)    # Image height in pixels

# Process each pixel (excluding border)
li r6, 1                    # Start from row 1
subi r7, r5, 1              # End at height-1

sobel_row_loop:
    # Calculate row start address
    mullw r8, r6, r4        # row * width
    slwi r9, r8, 1          # * 2 (bytes per pixel)
    add r10, r3, r9         # Row start address
    addi r10, r10, 2        # Skip first pixel (column 1)
    subi r10, r10, 2        # Pre-adjust for lhau
    
    li r11, 1               # Start from column 1
    subi r12, r4, 1         # End at width-1

sobel_col_loop:
    # Load 3x3 neighborhood using signed loads with automatic advance
    # Top row: calculate address = (row-1) * width + (col-1)
    subi r13, r6, 1         # row - 1
    mullw r14, r13, r4      # (row-1) * width
    add r15, r14, r11       # + column
    subi r16, r15, 1        # + (col-1)
    slwi r17, r16, 1        # Convert to byte offset
    add r18, r3, r17        # Top-left pixel address
    
    lha r19, 0(r18)         # Load top-left pixel
    lha r20, 2(r18)         # Load top-center pixel
    lha r21, 4(r18)         # Load top-right pixel
    
    # Middle row: current row
    add r22, r15, r4        # Middle row start
    slwi r23, r22, 1        # Convert to byte offset
    add r24, r3, r23        # Middle row address
    subi r24, r24, 2        # Adjust for column offset
    
    lha r25, 0(r24)         # Load middle-left pixel
    lhau r26, 2(r24)        # Load middle-center pixel and advance
    lhau r27, 2(r24)        # Load middle-right pixel and advance
    
    # Bottom row: (row+1) * width + col
    addi r28, r6, 1         # row + 1
    mullw r29, r28, r4      # (row+1) * width
    add r30, r29, r11       # + column
    subi r31, r30, 1        # + (col-1)
    slwi r0, r31, 1         # Convert to byte offset
    add r1, r3, r0          # Bottom row address
    
    lha r2, 0(r1)           # Load bottom-left pixel
    lha r3, 2(r1)           # Load bottom-center pixel
    lha r4, 4(r1)           # Load bottom-right pixel
    
    # Apply Sobel X kernel: [-1 0 +1; -2 0 +2; -1 0 +1]
    neg r5, r19             # -top_left
    add r6, r5, r21         # -top_left + top_right
    slwi r7, r25, 1         # 2 * middle_left
    neg r8, r7              # -2 * middle_left
    slwi r9, r27, 1         # 2 * middle_right
    add r10, r8, r9         # -2*middle_left + 2*middle_right
    neg r11, r2             # -bottom_left
    add r12, r11, r4        # -bottom_left + bottom_right
    
    add r13, r6, r10        # Combine top and middle contributions
    add r14, r13, r12       # Add bottom contribution = Sobel X
    
    # Apply Sobel Y kernel: [-1 -2 -1; 0 0 0; +1 +2 +1]
    neg r15, r19            # -top_left
    slwi r16, r20, 1        # 2 * top_center
    neg r17, r16            # -2 * top_center
    neg r18, r21            # -top_right
    # Middle row contributes 0
    slwi r22, r3, 1         # 2 * bottom_center
    
    add r23, r15, r17       # -top_left + -2*top_center
    add r24, r23, r18       # + -top_right
    add r25, r2, r22        # bottom_left + 2*bottom_center
    add r26, r25, r4        # + bottom_right
    add r27, r24, r26       # Sobel Y
    
    # Calculate gradient magnitude: |Gx| + |Gy| (approximation)
    abs r28, r14            # |Sobel X|
    abs r29, r27            # |Sobel Y|
    add r30, r28, r29       # Gradient magnitude
    
    # Store edge magnitude
    lis r31, edge_output@ha
    addi r31, r31, edge_output@l
    mullw r0, r6, r4        # row * width
    add r1, r0, r11         # + column
    slwi r2, r1, 1          # Convert to byte offset
    sthx r30, r31, r2       # Store edge magnitude
    
    addi r11, r11, 1        # Next column
    cmpw r11, r12           # Check if done with row
    blt sobel_col_loop      # Continue row
    
    addi r6, r6, 1          # Next row
    cmpw r6, r7             # Check if done with image
    blt sobel_row_loop      # Continue image processing

Time Series Analysis - Moving Average

# Calculate moving average of signed time series data
lis r3, time_series@ha
addi r3, r3, time_series@l
lwz r4, series_length(r0)   # Number of data points
lwz r5, window_size(r0)     # Moving average window size
subi r3, r3, 2              # Pre-adjust for first lhau

moving_average_loop:
    # Initialize moving average calculation
    li r6, 0                # Sum accumulator
    li r7, 0                # Sample counter
    mr r8, r3               # Current position for window
    mr r9, r5               # Window size counter
    
window_sum_loop:
    cmpwi r9, 0             # Check if window complete
    beq calculate_average   # Calculate average if done
    
    lhau r10, 2(r8)         # Load signed sample and advance
    add r6, r6, r10         # Add to sum
    addi r7, r7, 1          # Increment sample counter
    subi r9, r9, 1          # Decrement window counter
    
    # Check bounds
    sub r11, r4, r7         # Remaining samples
    cmpwi r11, 0            # Check if at end of series
    beq calculate_average   # Calculate with partial window
    
    b window_sum_loop       # Continue window

calculate_average:
    cmpwi r7, 0             # Check for division by zero
    beq skip_average
    divw r12, r6, r7        # Calculate average
    
    # Store moving average
    lis r13, moving_avg@ha
    addi r13, r13, moving_avg@l
    sub r14, r4, 1          # Calculate output index
    slwi r15, r14, 1        # Convert to byte offset
    sthx r12, r13, r15      # Store moving average
    
skip_average:
    lhau r16, 2(r3)         # Load next data point and advance
    subi r4, r4, 1          # Decrement remaining points
    cmpwi r4, r5            # Check if enough points for full window
    bge moving_average_loop # Continue if enough points

Sensor Data Calibration Pipeline

# Process sensor calibration data with automatic advancement
lis r3, raw_sensor_data@ha
addi r3, r3, raw_sensor_data@l
lis r4, calibration_params@ha
addi r4, r4, calibration_params@l
lwz r5, num_sensors(r0)     # Number of sensors
subi r3, r3, 2              # Pre-adjust for first lhau

sensor_calibration_loop:
    lhau r6, 2(r3)          # Load raw sensor reading and advance
    
    # Multi-point calibration: output = (input - offset) * gain + correction
    lha r7, 0(r4)           # Load offset calibration
    lha r8, 2(r4)           # Load gain calibration  
    lha r9, 4(r4)           # Load linearity correction
    
    # Apply offset correction
    sub r10, r6, r7         # raw_value - offset
    
    # Apply gain correction (fixed-point multiplication)
    mullw r11, r10, r8      # (raw_value - offset) * gain
    srawi r12, r11, 10      # Scale down (assume 10-bit fractional gain)
    
    # Apply linearity correction
    add r13, r12, r9        # Add linearity correction
    
    # Temperature compensation
    lha r14, 6(r4)          # Load temperature coefficient
    lha r15, temperature_reading(r0) # Current temperature
    lha r16, reference_temp(r0) # Reference temperature
    sub r17, r15, r16       # Temperature delta
    mullw r18, r17, r14     # temp_delta * temp_coefficient
    srawi r19, r18, 8       # Scale temperature correction
    add r20, r13, r19       # Apply temperature compensation
    
    # Range validation and clipping
    lha r21, 8(r4)          # Load minimum valid range
    lha r22, 10(r4)         # Load maximum valid range
    
    cmpw r20, r21           # Check minimum
    bge check_max_range
    mr r20, r21             # Clip to minimum

check_max_range:
    cmpw r20, r22           # Check maximum
    ble range_ok
    mr r20, r22             # Clip to maximum

range_ok:
    # Store calibrated value
    lis r23, calibrated_data@ha
    addi r23, r23, calibrated_data@l
    sub r24, r5, 1          # Calculate sensor index
    slwi r25, r24, 1        # Convert to byte offset
    sthx r20, r23, r25      # Store calibrated reading
    
    # Signal quality assessment
    abs r26, r6             # |raw_value|
    lwz r27, noise_threshold(r0) # Noise threshold
    cmpw r26, r27           # Compare with threshold
    bge signal_good
    
    # Mark low signal quality
    lis r28, quality_flags@ha
    addi r28, r28, quality_flags@l
    li r29, 1               # Low quality flag
    stbx r29, r28, r24      # Store quality flag
    b next_sensor

signal_good:
    # Mark good signal quality
    lis r28, quality_flags@ha
    addi r28, r28, quality_flags@l
    li r29, 0               # Good quality flag
    stbx r29, r28, r24      # Store quality flag

next_sensor:
    addi r4, r4, 12         # Next calibration parameter set (6 halfwords)
    subi r5, r5, 1          # Decrement sensor counter
    cmpwi r5, 0
    bne sensor_calibration_loop # Continue calibration

Related Instructions

lha, lhax, lhaux, sthu, lhzu, lwzu

Back to Index