ICBI - Instruction Cache Block Invalidate | PowerPC Instruction Set Reference

Instruction Syntax

Mnemonic	Format	Flags
icbi	rA,rB	-

Instruction Encoding

Field	Bits	Description
Primary Opcode	0-5	011111 (0x1F)
Reserved	6-10	00000
rA	11-15	Source register A
rB	16-20	Source register B
Reserved	21	0
XO	22-30	1111010110 (982)
Reserved	31	0

Operation

if rA = 0 then EA ← 0
else EA ← (rA)
EA ← EA + (rB)
Invalidate instruction cache block containing EA

The instruction cache block containing the effective address is invalidated. If rA is 0, the effective address is the contents of rB. Otherwise, the effective address is the sum of the contents of rA and rB.

Note: This instruction is essential for self-modifying code and dynamic code generation. After invalidating the instruction cache, an isync instruction should be executed to ensure coherency. The invalidation only affects the instruction cache, not the data cache.

Affected Registers

None - This instruction does not affect any registers.

For more information on cache management see Section 2.1.1, "Cache Model," in the PowerPC Microprocessor Family: The Programming Environments manual.

Examples

Basic Instruction Cache Invalidation

# Invalidate instruction cache for specific address
lis r3, code_addr@ha
addi r3, r3, code_addr@l
icbi 0, r3              # Invalidate instruction cache block at r3
isync                   # Ensure instruction fetch coherency

Self-Modifying Code Pattern

# Typical pattern for self-modifying code
lis r3, dynamic_code@ha
addi r3, r3, dynamic_code@l

# Modify the instruction in memory
lis r4, new_instruction@ha
addi r4, r4, new_instruction@l
lwz r5, 0(r4)           # Load new instruction
stw r5, 0(r3)           # Store new instruction to code area

# Ensure cache coherency
dcbst 0, r3             # Store data cache block
sync                    # Ensure store completes
icbi 0, r3              # Invalidate instruction cache block
isync                   # Synchronize instruction fetch

Dynamic Code Generation

# Generate code at runtime and ensure cache coherency
lis r3, code_buffer@ha
addi r3, r3, code_buffer@l

# Generate instruction sequence (simplified example)
lis r4, 0x3860          # Start of "li r3, value" instruction
ori r4, r4, 42          # Complete instruction: li r3, 42
stw r4, 0(r3)           # Store generated instruction

# Flush data cache and invalidate instruction cache
dcbst 0, r3             # Ensure data reaches main memory
sync                    # Wait for store to complete
icbi 0, r3              # Invalidate instruction cache
isync                   # Synchronize instruction prefetch

# Now safe to execute generated code
mtctr r3                # Move code address to count register
bcctr 20, 0             # Execute generated code

JIT Compiler Cache Management

# JIT compiler code cache management
lis r3, jit_cache@ha
addi r3, r3, jit_cache@l
li r4, 0                # Start offset
li r5, 1024             # Cache size in bytes
li r6, 32               # Cache line size

jit_invalidate_loop:
    add r7, r3, r4      # Calculate current address
    dcbst 0, r7         # Flush data cache
    icbi 0, r7          # Invalidate instruction cache
    addi r4, r4, 32     # Move to next cache line
    cmpw r4, r5         # Compare with total size
    blt jit_invalidate_loop

sync                    # Ensure all stores complete
isync                   # Synchronize instruction fetch

Kernel Module Loading

# Kernel module cache invalidation
lis r3, module_base@ha
addi r3, r3, module_base@l
lwz r4, module_size(r0) # Load module size
li r5, 0                # Current offset
li r6, 32               # Cache line size

module_cache_flush:
    add r7, r3, r5      # Current address
    dcbst 0, r7         # Flush data cache line
    icbi 0, r7          # Invalidate instruction cache line
    add r5, r5, r6      # Next cache line
    cmpw r5, r4         # Check if done
    blt module_cache_flush

sync                    # Memory barrier
isync                   # Instruction synchronization

Code Patching for Debugging

# Patch instruction for debugging/tracing
lis r3, patch_point@ha
addi r3, r3, patch_point@l

# Save original instruction
lwz r4, 0(r3)
stw r4, saved_instruction(r0)

# Install debug trap
lis r5, 0x7FE0          # Start of trap instruction
ori r5, r5, 0x0008      # Complete: trap
stw r5, 0(r3)           # Install trap instruction

# Ensure cache coherency
dcbst 0, r3             # Flush modified instruction
sync                    # Wait for store
icbi 0, r3              # Invalidate instruction cache
isync                   # Synchronize prefetch

Function Prologue/Epilogue Patching

# Patch function entry points for profiling
lis r3, function_list@ha
addi r3, r3, function_list@l
lwz r4, num_functions(r0)
li r5, 0

patch_functions_loop:
    lwzx r6, r3, r5     # Load function address
    
    # Save original first instruction
    lwz r7, 0(r6)
    stwx r7, r3, r5     # Save in parallel array
    
    # Install profiling call
    lis r8, profile_call@ha
    addi r8, r8, profile_call@l
    stw r8, 0(r6)       # Patch function entry
    
    # Invalidate cache for this function
    dcbst 0, r6         # Flush data cache
    icbi 0, r6          # Invalidate instruction cache
    
    addi r5, r5, 4      # Next function
    subi r4, r4, 1      # Decrement counter
    cmpwi r4, 0
    bne patch_functions_loop

sync                    # Ensure all modifications complete
isync                   # Synchronize instruction stream

Bootloader Code Relocation

# Relocate code and ensure cache coherency
lis r3, source_addr@ha
addi r3, r3, source_addr@l
lis r4, dest_addr@ha
addi r4, r4, dest_addr@l
lwz r5, copy_size(r0)
li r6, 0

relocate_loop:
    lwzx r7, r3, r6     # Load from source
    stwx r7, r4, r6     # Store to destination
    
    # Maintain cache coherency every cache line
    andi. r8, r6, 31    # Check if cache line boundary
    bne skip_cache_ops
    add r8, r4, r6      # Calculate destination address
    dcbst 0, r8         # Flush data cache
    icbi 0, r8          # Invalidate instruction cache
skip_cache_ops:
    
    addi r6, r6, 4      # Next word
    cmpw r6, r5         # Check if done
    blt relocate_loop

sync                    # Ensure all stores complete
isync                   # Synchronize instruction fetch

# Now safe to execute relocated code
mtctr r4                # Set up to jump to relocated code
bcctr 20, 0             # Execute relocated code

Hot Code Swapping

# Atomically replace function implementation
lis r3, target_function@ha
addi r3, r3, target_function@l
lis r4, new_implementation@ha
addi r4, r4, new_implementation@l

# Create branch instruction to new implementation
sub r5, r4, r3          # Calculate displacement
srwi r5, r5, 2          # Convert to word displacement
rlwinm r5, r5, 2, 6, 29 # Position displacement in instruction
oris r5, r5, 0x4800     # Create branch instruction

# Atomically update
stw r5, 0(r3)           # Install branch to new code
dcbst 0, r3             # Ensure store reaches memory
sync                    # Memory barrier
icbi 0, r3              # Invalidate old instruction
isync                   # Synchronize instruction fetch

Performance Monitoring Code Injection

# Inject performance monitoring into hot functions
lis r3, hot_function@ha
addi r3, r3, hot_function@l

# Save original instruction
lwz r4, 0(r3)
stw r4, original_instr(r0)

# Create branch to instrumentation
lis r5, perf_hook@ha
addi r5, r5, perf_hook@l
sub r6, r5, r3          # Calculate displacement
srwi r6, r6, 2          # Word displacement
rlwinm r6, r6, 2, 6, 29 # Position in instruction
oris r6, r6, 0x4800     # Create branch instruction
ori r6, r6, 0x0001      # Set link bit for call

# Install instrumentation
stw r6, 0(r3)           # Install performance hook
dcbst 0, r3             # Flush to memory
sync                    # Ensure visibility
icbi 0, r3              # Invalidate instruction cache
isync                   # Synchronize instruction stream

Related Instructions

isync, dcbf, dcbst, sync, icbt

Back to Index