1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#[cfg(target_arch = "spirv")]
use core::arch::asm;

/// Wait for other invocations of this module to reach the current point
/// of execution.
///
/// All invocations of this module within Execution scope reach this point of
/// execution before any invocation proceeds beyond it.
///
/// When Execution is [`crate::memory::Scope::Workgroup`] or larger, behavior is
/// undefined unless all invocations within Execution execute the same dynamic
/// instance of this instruction. When Execution is Subgroup or Invocation, the
/// behavior of this instruction in non-uniform control flow is defined by the
/// client API.
///
/// If [`crate::memory::Semantics`] is not [`crate::memory::Semantics::NONE`],
/// this instruction also serves as an [`memory_barrier`] function call, and
/// also performs and adheres to the description and semantics of an
/// [`memory_barrier`] function with the same `MEMORY` and `SEMANTICS` operands.
/// This allows atomically specifying both a control barrier and a memory
/// barrier (that is, without needing two instructions). If
/// [`crate::memory::Semantics`] is [`crate::memory::Semantics::NONE`], `MEMORY`
/// is ignored.
///
/// Before SPIRV-V version 1.3, it is only valid to use this instruction with
/// `TessellationControl`, `GLCompute`, or `Kernel` execution models. There is
/// no such restriction starting with version 1.3.
///
/// If used with the `TessellationControl` execution model, it also implicitly
/// synchronizes the `output` storage class: Writes to `output` variables
/// performed by any invocation executed prior to a [`control_barrier`] are
/// visible to any other invocation proceeding beyond that [`control_barrier`].
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpControlBarrier")]
#[inline]
pub unsafe fn control_barrier<
    const EXECUTION: u32, // Scope
    const MEMORY: u32,    // Scope
    const SEMANTICS: u32, // Semantics
>() {
    asm! {
        "%u32 = OpTypeInt 32 0",
        "%execution = OpConstant %u32 {execution}",
        "%memory = OpConstant %u32 {memory}",
        "%semantics = OpConstant %u32 {semantics}",
        "OpControlBarrier %execution %memory %semantics",
        execution = const EXECUTION,
        memory = const MEMORY,
        semantics = const SEMANTICS,
    }
}

/// Control the order that memory accesses are observed.
///
/// Ensures that memory accesses issued before this instruction are observed
/// before memory accesses issued after this instruction. This control is
/// ensured only for memory accesses issued by this invocation and observed by
/// another invocation executing within `MEMORY` scope. If the `vulkan` memory
/// model is declared, this ordering only applies to memory accesses that
/// use the `NonPrivatePointer` memory operand or `NonPrivateTexel`
/// image operand.
///
/// `SEMANTICS` declares what kind of memory is being controlled and what kind
/// of control to apply.
///
/// To execute both a memory barrier and a control barrier,
/// see [`control_barrier`].
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpMemoryBarrier")]
#[inline]
pub unsafe fn memory_barrier<
    const MEMORY: u32,    // Scope
    const SEMANTICS: u32, // Semantics
>() {
    asm! {
        "%u32 = OpTypeInt 32 0",
        "%memory = OpConstant %u32 {memory}",
        "%semantics = OpConstant %u32 {semantics}",
        "OpMemoryBarrier %memory %semantics",
        memory = const MEMORY,
        semantics = const SEMANTICS,
    }
}

/// Blocks execution of all threads in a group until all group shared accesses have been completed.
///
/// This is an exact implementation of `GroupMemoryBarrier()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/groupmemorybarrier>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn workgroup_memory_barrier() {
    memory_barrier::<
        { crate::memory::Scope::Workgroup as u32 },
        {
            crate::memory::Semantics::WORKGROUP_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}

/// Blocks execution of all threads in a group until all group shared accesses have been completed and all threads in the group have reached this call.
///
/// This is an exact implementation of `GroupMemoryBarrierWithGroupSync()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/groupmemorybarrierwithgroupsync>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn workgroup_memory_barrier_with_group_sync() {
    control_barrier::<
        { crate::memory::Scope::Workgroup as u32 },
        { crate::memory::Scope::Workgroup as u32 },
        {
            crate::memory::Semantics::WORKGROUP_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}

/// Blocks execution of all threads in a group until all device memory accesses have been completed.
///
/// This is an exact implementation of `DeviceMemoryBarrier()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/devicememorybarrier>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn device_memory_barrier() {
    memory_barrier::<
        { crate::memory::Scope::Device as u32 },
        {
            crate::memory::Semantics::IMAGE_MEMORY.bits()
                | crate::memory::Semantics::UNIFORM_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}

/// Blocks execution of all threads in a group until all device memory accesses have been completed and all threads in the group have reached this call.
///
/// This is an exact implementation of `DeviceMemoryBarrierWithGroupSync()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/devicememorybarrierwithgroupsync>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn device_memory_barrier_with_group_sync() {
    control_barrier::<
        { crate::memory::Scope::Workgroup as u32 },
        { crate::memory::Scope::Device as u32 },
        {
            crate::memory::Semantics::IMAGE_MEMORY.bits()
                | crate::memory::Semantics::UNIFORM_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}

/// Blocks execution of all threads in a group until all memory accesses have been completed.
///
/// This is an exact implementation of `AllMemoryBarrier()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/allmemorybarrier>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn all_memory_barrier() {
    memory_barrier::<
        { crate::memory::Scope::Device as u32 },
        {
            crate::memory::Semantics::WORKGROUP_MEMORY.bits()
                | crate::memory::Semantics::IMAGE_MEMORY.bits()
                | crate::memory::Semantics::UNIFORM_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}

/// Blocks execution of all threads in a group until all memory accesses have been completed and all threads in the group have reached this call.
///
/// This is an exact implementation of `AllMemoryBarrierWithGroupSync()`.
///
/// From <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/allmemorybarrierwithgroupsync>
#[spirv_std_macros::gpu_only]
#[inline]
pub unsafe fn all_memory_barrier_with_group_sync() {
    control_barrier::<
        { crate::memory::Scope::Workgroup as u32 },
        { crate::memory::Scope::Device as u32 },
        {
            crate::memory::Semantics::WORKGROUP_MEMORY.bits()
                | crate::memory::Semantics::IMAGE_MEMORY.bits()
                | crate::memory::Semantics::UNIFORM_MEMORY.bits()
                | crate::memory::Semantics::ACQUIRE_RELEASE.bits()
        },
    >();
}