ICBT - Instruction Cache Block Touch | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
icbt	CT,rA,rB	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	011111 (0x1F)
CT	6-10	Cache Touch hint
rA	11-15	Source register A
rB	16-20	Source register B
Reserved	21	0
XO	22-30	000010110 (22)
Reserved	31	0

Operation

if rA = 0 then EA ← 0
else EA ← (rA)
EA ← EA + (rB)
Touch instruction cache block containing EA with hint CT

The instruction cache block containing the effective address is touched, indicating to the processor that the program will likely access this instruction address in the near future. If rA is 0, the effective address is the contents of rB. Otherwise, the effective address is the sum of the contents of rA and rB.

Note: This is a hint instruction for performance optimization. The CT field provides additional cache touch hints. The instruction may be treated as a no-op by implementations that do not support instruction cache touch operations. This instruction is optional in the PowerPC architecture.

Affected Registers

None - This instruction does not affect any registers.

For more information on cache management see Section 2.1.1, "Cache Model," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Basic Instruction Cache Touch

# Touch instruction cache for upcoming code
lis r3, hot_function@ha
addi r3, r3, hot_function@l
icbt 0, 0, r3           # Touch instruction cache block
# ... some other work ...
bl hot_function         # Function likely already in cache

Branch Target Prefetching

# Prefetch likely branch targets before conditional branch
lis r3, likely_path@ha
addi r3, r3, likely_path@l
icbt 0, 0, r3           # Touch likely branch target

lis r4, unlikely_path@ha
addi r4, r4, unlikely_path@l
icbt 0, 0, r4           # Touch unlikely path too

# Perform conditional logic
cmpwi cr0, r5, 0
beq likely_path         # Branch to prefetched code
b unlikely_path         # Branch to other prefetched code

Loop Unrolling Optimization

# Prefetch instruction cache for large unrolled loop
lis r3, unrolled_loop@ha
addi r3, r3, unrolled_loop@l
li r4, 0                # Offset
li r5, 1024             # Loop size
li r6, 32               # Cache line size

prefetch_loop:
    add r7, r3, r4      # Calculate address
    icbt 0, 0, r7       # Touch cache line
    add r4, r4, r6      # Next cache line
    cmpw r4, r5         # Check if done
    blt prefetch_loop

# Now execute the unrolled loop with better cache performance
b unrolled_loop

Function Call Optimization

# Prefetch function entry points for call sequence
lis r3, func_table@ha
addi r3, r3, func_table@l
lwz r4, num_functions(r0)
li r5, 0

prefetch_functions:
    lwzx r6, r3, r5     # Load function address
    icbt 0, 0, r6       # Touch function entry point
    addi r5, r5, 4      # Next function pointer
    subi r4, r4, 1      # Decrement counter
    cmpwi r4, 0
    bne prefetch_functions

# Now call functions with better cache performance
li r5, 0
call_functions:
    lwzx r6, r3, r5     # Load function address
    mtctr r6            # Set up call
    bctrl               # Call function (likely in cache)
    addi r5, r5, 4      # Next function
    # ... continue calling ...

Interpreter Dispatch Table Optimization

# Prefetch interpreter dispatch targets
lis r3, dispatch_table@ha
addi r3, r3, dispatch_table@l
lwz r4, bytecode_addr(r0)
lbz r5, 0(r4)          # Load opcode
slwi r5, r5, 2         # Convert to word offset
lwzx r6, r3, r5        # Load handler address
icbt 0, 0, r6          # Prefetch handler code

# Optionally prefetch next few opcodes
lbz r7, 1(r4)          # Next opcode
slwi r7, r7, 2
lwzx r8, r3, r7
icbt 0, 0, r8          # Prefetch next handler

# Execute current opcode
mtctr r6
bctrl                  # Jump to handler

JIT Compiler Code Cache Warming

# Warm instruction cache for JIT compiled code
lis r3, jit_cache@ha
addi r3, r3, jit_cache@l
lwz r4, compiled_size(r0)
li r5, 0               # Current offset
li r6, 32              # Cache line size

warm_jit_cache:
    add r7, r3, r5     # Current address
    icbt 0, 0, r7      # Touch cache line
    add r5, r5, r6     # Next cache line
    cmpw r5, r4        # Check if done
    blt warm_jit_cache

# Execute JIT compiled code with warmed cache
mtctr r3
bcctr 20, 0            # Execute JIT code

Game Engine Entity Update Loop

# Prefetch entity update functions for better performance
lis r3, entity_array@ha
addi r3, r3, entity_array@l
lwz r4, entity_count(r0)
li r5, 0               # Entity index
li r6, 32              # Entity struct size

entity_prefetch_loop:
    # Calculate entity address
    mullw r7, r5, r6
    add r8, r3, r7     # Entity pointer
    
    # Load function pointer and prefetch
    lwz r9, update_func_offset(r8)
    icbt 0, 0, r9      # Prefetch update function
    
    addi r5, r5, 1     # Next entity
    cmpw r5, r4        # Check if done
    blt entity_prefetch_loop

# Now update entities with better cache performance
li r5, 0
entity_update_loop:
    mullw r7, r5, r6
    add r8, r3, r7     # Entity pointer
    lwz r9, update_func_offset(r8)
    mtctr r9
    mr r3, r8          # Pass entity as parameter
    bctrl              # Call update function
    addi r5, r5, 1
    cmpw r5, r4
    blt entity_update_loop

Database Query Execution Plan

# Prefetch query operator code for execution plan
lis r3, query_plan@ha
addi r3, r3, query_plan@l
lwz r4, num_operators(r0)
li r5, 0               # Operator index

prefetch_operators:
    # Each operator has function pointer at offset 0
    slwi r6, r5, 3     # 8 bytes per operator struct
    add r7, r3, r6     # Operator struct address
    lwz r8, 0(r7)      # Load operator function
    icbt 0, 0, r8      # Prefetch operator code
    
    addi r5, r5, 1     # Next operator
    cmpw r5, r4        # Check if done
    bne prefetch_operators

# Execute query plan with prefetched operators
li r5, 0
execute_operators:
    slwi r6, r5, 3
    add r7, r3, r6
    lwz r8, 0(r7)      # Load operator function
    lwz r9, 4(r7)      # Load operator data
    mtctr r8
    mr r3, r9          # Pass data as parameter
    bctrl              # Execute operator
    addi r5, r5, 1
    cmpw r5, r4
    blt execute_operators

State Machine Transition Prefetching

# Prefetch state machine transition handlers
lis r3, state_machine@ha
addi r3, r3, state_machine@l
lwz r4, current_state(r3)
lwz r5, event_code(r0)

# Calculate transition table entry
slwi r6, r4, 8         # 256 events per state
add r6, r6, r5         # Add event code
slwi r6, r6, 2         # 4 bytes per pointer
add r7, r3, r6         # Transition table entry
lwz r8, transition_table_offset(r7)
icbt 0, 0, r8          # Prefetch transition handler

# Also prefetch likely next states
addi r9, r5, 1         # Next event
cmpwi r9, 256
bge skip_next_prefetch
slwi r10, r4, 8
add r10, r10, r9
slwi r10, r10, 2
add r11, r3, r10
lwz r12, transition_table_offset(r11)
icbt 0, 0, r12         # Prefetch next likely transition

skip_next_prefetch:
# Execute current transition
mtctr r8
mr r3, r5              # Pass event as parameter
bctrl                  # Execute transition handler

Packet Processing Pipeline

# Network packet processing with function prefetching
lis r3, packet_buffer@ha
addi r3, r3, packet_buffer@l
lbz r4, packet_type(r3) # Load packet type
lis r5, handler_table@ha
addi r5, r5, handler_table@l
slwi r6, r4, 2         # 4 bytes per handler pointer
lwzx r7, r5, r6        # Load handler address
icbt 0, 0, r7          # Prefetch packet handler

# Prefetch common follow-up handlers
li r8, PKT_TYPE_ACK    # Common follow-up packet type
slwi r9, r8, 2
lwzx r10, r5, r9
icbt 0, 0, r10         # Prefetch ACK handler

# Process current packet
mtctr r7
mr r3, r3              # Pass packet buffer
bctrl                  # Process packet

Graphics Shader Pipeline Optimization

# Prefetch shader programs for rendering pipeline
lis r3, shader_cache@ha
addi r3, r3, shader_cache@l
lwz r4, active_material(r0)
lwz r5, vertex_shader_offset(r4)
add r6, r3, r5         # Vertex shader address
icbt 0, 0, r6          # Prefetch vertex shader

lwz r7, fragment_shader_offset(r4)
add r8, r3, r7         # Fragment shader address
icbt 0, 0, r8          # Prefetch fragment shader

# Prefetch geometry shader if present
lwz r9, geometry_shader_offset(r4)
cmpwi r9, 0
beq skip_geometry_prefetch
add r10, r3, r9
icbt 0, 0, r10         # Prefetch geometry shader

skip_geometry_prefetch:
# Execute rendering pipeline with prefetched shaders
mtctr r6
bctrl                  # Execute vertex shader
mtctr r8
bctrl                  # Execute fragment shader

Related Instructions

icbi, dcbt, dcbtst, isync, sync

Back to Index