www.xbdev.net
xbdev - software development
Sunday April 27, 2025
Home | Contact | Support | WebGPU Graphics and Compute ... | WebGPU 'Compute'.. Compute, Algorithms, and Code.....
     
 

WebGPU 'Compute'..

Compute, Algorithms, and Code.....

 

Sorting Numbers (Bitonic Sort Algorithm)


Sorting an array of random numbers using the parallel bitonic sort algorithm.

The reason we're using the bitonic sort algorithm is it's well-suited for parallel execution on GPUs (split the work across multiple threads).


Sorting arrays of numbers on the GPU
Sorting arrays of numbers on the GPU


Functions Used: requestAdapter(), getPreferredCanvasFormat(), createCommandEncoder(), beginRenderPass(), setPipeline(), draw(), end(), submit(), getCurrentTexture(), createView(), createShaderModule()

The core idea of the bitonic sort algorithm is to recursively build bitonic sequences from smaller bitonic sequences, and then merge them into a fully sorted sequence. This recursive process involves sorting subsequences and then merging them in a way that preserves the bitonic property.

The process starts by dividing the sequence into pairs and sorting each pair in ascending and descending order, effectively creating multiple bitonic sequences of length two.

These sequences are then merged into longer bitonic sequences by comparing and swapping elements to ensure that the merged sequences maintain the bitonic property. This merging step is performed iteratively, doubling the length of the bitonic sequences at each stage until the entire sequence is sorted.

The merge operation uses a compare-and-swap mechanism that is parallelizable, making the algorithm highly efficient on parallel processing units.

One of the key features of the bitonic sort is its regular structure, which makes it particularly amenable to implementation on hardware with a fixed, predictable pattern of data movement and comparison.

This regularity allows for efficient utilization of memory bandwidth and computational resources in parallel computing environments. As a result, bitonic sort is widely used in applications that benefit from parallel processing capabilities, such as GPU-based computing and hardware sorting networks, despite its O(n log^2 n) time complexity, which is higher than the optimal O(n log n) complexity of other comparison-based sorting algorithms like quicksort and mergesort.







Complete Code


const wgslcompute = `
struct Data {
    numbers: array<f32>
};

@group(0) @binding(0) var<storage, read_write> data : Data;

fn compareAndSwap(i: u32, j: u32, dir: bool) {
    if ((data.numbers[i] > data.numbers[j]) == dir) {
        let temp = data.numbers[i];
        data.numbers[i] = data.numbers[j];
        data.numbers[j] = temp;
    }
}

@compute @workgroup_size(64)
fn main( @builtin(global_invocation_id) id: vec3<u32>) 
{
    var k: u32 = 2;
    let datalength = arrayLength( &data.numbers );
    
    while ( k <= datalength ) 
    {
        var j: u32 = k >> 1;
        while (j > 0) {
            let idx = id.x;
            let ixj = idx ^ j;
            if (ixj > idx) {
                let direction = ((idx & k) == 0);
                compareAndSwap(idx, ixj, direction);
            }
            j = j >> 1;
            workgroupBarrier();
        }
        k = k << 1;
    }
}
`;

let div document.createElement('div');
document.body.appendChilddiv );
div.style['font-size'] = '20pt';
function 
log)
{
  
console.log);
  
let args = [...arguments].join(' ');
  
div.innerHTML += args '<br>';
}

log('WebGPU Compute Example');

if (!
navigator.gpu) { log("WebGPU is not supported (or is it disabled? flags/settings)"); return; }

const 
adapter await navigator.gpu.requestAdapter();
const 
device  await adapter.requestDevice();


const 
arraySize 128;
const 
data0 = new Float32Array(arraySize);
for (
let i 0arraySizei++) {
  
data0[i] = Math.roundMath.random() * 100.0 );
}

const 
gbuffer0 device.createBuffer({ size:  data0.byteLengthusageGPUBufferUsage.STORAGE GPUBufferUsage.COPY_DST GPUBufferUsage.COPY_SRC });
device.queue.writeBuffer(gbuffer00data0);

const 
resultBuffer device.createBuffer({ sizedata0.byteLengthusageGPUBufferUsage.STORAGE GPUBufferUsage.COPY_SRC GPUBufferUsage.COPY_DST });

const 
readbackBuffer device.createBuffer({ sizedata0.byteLengthusageGPUBufferUsage.COPY_DST GPUBufferUsage.MAP_READ });


const 
bindGroupLayout device.createBindGroupLayout({
  
entries: [ {binding0visibilityGPUShaderStage.COMPUTEbuffer: {type"storage"} } ]
});

const 
bindGroup device.createBindGroup({
  
layoutbindGroupLayout,
  
entries: [{ binding0,  resource: {  buffergbuffer0  }  }]
});

const 
shaderModule device.createShaderModule( {  codewgslcompute } );
  
const 
computePipeline device.createComputePipeline({
  
layoutdevice.createPipelineLayout({bindGroupLayouts: [bindGroupLayout]}),
  
compute: { moduleshaderModule,
             
entryPoint'main'  }
});

const 
commandEncoder device.createCommandEncoder();
const 
passEncoder commandEncoder.beginComputePass();
passEncoder.setPipeline(computePipeline);
passEncoder.setBindGroup(0bindGroup);
passEncoder.dispatchWorkgroups(arraySize 64);
passEncoder.end();

commandEncoder.copyBufferToBuffer(gbuffer00resultBuffer0data0.byteLength);
commandEncoder.copyBufferToBuffer(resultBuffer0readbackBuffer0data0.byteLength);

const 
commands commandEncoder.finish();
device.queue.submit([commands]);

await readbackBuffer.mapAsync(GPUMapMode.READ);
const 
sortedArray = new Float32Array(readbackBuffer.getMappedRange());
console.log(sortedArray);

log('Sorted array:')
const 
arr = Array.fromsortedArray );
for (
let i=0i<arr.lengthi++)
{
      
// display over multiple lines
    
logarr.splice010 ) );  
}


















Things t Try


• Try a larger array size
• Try different workgroup sizes (log the time it takes to sort - plot the results)
• Research other sort algorithms that can be implemented on the GPU




Resources and Links


• WebGPU Lab Sort Demo [LINK]




WebGPU by Example: Fractals, Image Effects, Ray-Tracing, Procedural Geometry, 2D/3D, Particles, Simulations WebGPU Compute graphics and animations using the webgpu api 12 week course kenwright learn webgpu api kenwright programming compute and graphics applications with html5 and webgpu api kenwright real-time 3d graphics with webgpu kenwright webgpu api develompent a quick start guide kenwright webgpu by example 2022 kenwright webgpu gems kenwright webgpu interactive compute and graphics visualization cookbook kenwright wgsl webgpu shading language cookbook kenwright wgsl webgpugems shading language cookbook kenwright



 
Advert (Support Website)

 
 Visitor:
Copyright (c) 2002-2025 xbdev.net - All rights reserved.
Designated articles, tutorials and software are the property of their respective owners.