Przeglądam kod silnika Filament https://github.com/google/filament żeby mieć obeznanie z trochę trudniejszymi aspektami jak zarządzanie pamięcią czy wątkami.
Jestem akurat na etapie renderowania na osobnym wątku i na tym jak zaprojektowana jest kolejka komend. Od samego czytania niezbyt się czegoś uczę dlatego też trochę się bawię i modyfikuję kod. Coś tam skopiowałem, coś tam zmieniłem, coś tam sobie rozrysowałem itd..
Nie przedłużając gdzieś podczas alokowania pamięci dla obiektu w klasie Driver
w "moim" kodzie był błąd, bo się wykrzaczało co jakiś czas. Po paru godzinach próby znalezienia gdzie jest błąd stwierdziłem, że zastosuje słynną metodę Kopiego Pejsta. Problem nadal występuje. Po całym dniu, zmęczony, zwracam się do was o pomoc w nakierowaniu mnie na dobre tory. Co prawda nie wszystko copy-paste'owałem, bo byłoby rzeczywiście za dużo roboty, ale zostało to zmienione w sposób bardzo podobny do oryginalnego.
Kod jest rzeczywiście dość długi ale nie ma w nim raczej żadnej niewykorzystanej funkcji czy metody. Testowałem też go z flagą -fsanitize=thread
Cały kodzik jest tutaj https://onlinegdb.com/Sy-izi-M_. Żeby odpalić go w trybie debugowania trzeba najpierw kliknąć Fork this
i można już normalnie wszystko robić.
Poniżej jest cały problem opisany i gdzie się wykrzacza. Oraz co zmieniłem żeby trochę zmniejszyć ilość kodu.
Alokator jest synchronizowany przy użyciu spin locka, bo tak było w oryginalnym kodzie ale na mutexie zachowuje się tak samo.
Niestety nie widzę tutaj opcji formatowania kodu żeby został zwinięty lekko, ale mam nadzieję, że podczas publikacji postu zostanie on jakoś ogarnięty.
Czy mógłby mi ktoś pomóc znaleźć i zrozumieć dlaczego powstaje ten błąd? Zgaduje, że to data race, ale bez pomocy wątpie, że sobie poradzę z wyeliminowaniem go.
Alokator - w pełni skopiowany
#include <cstdlib>
#include <utility>
#include <cassert>
#include <algorithm>
#include <atomic>
#include <mutex>
# define assert_invariant(e) \
(e ? ((void)0) : printf("%s, %s, %i, %s", __func__, __FILE__, __LINE__, #e))
namespace pointermath {
template <typename P, typename T>
static inline P* add(P* a, T b) noexcept
{
return (P*)(uintptr_t(a) + uintptr_t(b));
}
template <typename P>
static inline P* align(P* p, size_t alignment) noexcept
{
// alignment must be a power-of-two
assert(alignment && !(alignment & alignment - 1));
return (P*)((uintptr_t(p) + alignment - 1) & ~(alignment - 1));
}
template <typename P>
static inline P* align(P* p, size_t alignment, size_t offset) noexcept
{
P* const r = align(add(p, offset), alignment);
assert(pointermath::add(r, -offset) >= p);
return r;
}
}
class HeapArea
{
public:
HeapArea() noexcept = default;
explicit HeapArea(size_t size)
{
if(size) {
// TODO: policy committing memory
mBegin = malloc(size);
mEnd = pointermath::add(mBegin, size);
}
}
~HeapArea() noexcept
{
// TODO: policy for returning memory to system
free(mBegin);
}
HeapArea(const HeapArea& rhs) = delete;
HeapArea& operator=(const HeapArea& rhs) = delete;
HeapArea(HeapArea&& rhs) noexcept = delete;
HeapArea& operator=(HeapArea&& rhs) noexcept = delete;
void* data() const noexcept { return mBegin; }
void* begin() const noexcept { return mBegin; }
void* end() const noexcept { return mEnd; }
size_t getSize() const noexcept { return uintptr_t(mEnd) - uintptr_t(mBegin); }
friend void swap(HeapArea& lhs, HeapArea& rhs) noexcept
{
using std::swap;
swap(lhs.mBegin, rhs.mBegin);
swap(lhs.mEnd, rhs.mEnd);
}
private:
void* mBegin = nullptr;
void* mEnd = nullptr;
};
namespace LockingPolicy {
struct NoLock
{
void lock() noexcept {}
void unlock() noexcept {}
};
}
namespace TrackingPolicy {
struct Untracked
{
Untracked() noexcept = default;
Untracked(const char* name, void* base, size_t size) noexcept {}
void onAlloc(void* p, size_t size, size_t alignment, size_t extra) noexcept {}
void onFree(void* p, size_t = 0) noexcept {}
void onReset() noexcept {}
void onRewind(void* addr) noexcept {}
};
struct Debug
{
Debug() noexcept = default;
Debug(const char* name, void* base, size_t size) noexcept
: mName(name), mBase(base), mSize(uint32_t(size))
{
}
void onAlloc(void* p, size_t size, size_t alignment, size_t extra) noexcept
{
if(p) {
memset(p, 0xeb, size);
}
}
void onFree(void* p, size_t size) noexcept
{
if(p) {
memset(p, 0xef, size);
}
}
void onReset() noexcept
{
// we should never be here if mBase is nullptr because compilation would have failed when
// Arena::onReset() tries to call the underlying allocator's onReset()
assert(mBase);
memset(mBase, 0xec, mSize);
}
void onRewind(void* addr) noexcept
{
// we should never be here if mBase is nullptr because compilation would have failed when
// Arena::onRewind() tries to call the underlying allocator's onReset()
assert(mBase);
assert(addr >= mBase);
memset(addr, 0x55, uintptr_t(mBase) + mSize - uintptr_t(addr));
}
protected:
const char* mName = nullptr;
void* mBase = nullptr;
uint32_t mSize = 0;
};
}
template<typename AllocatorPolicy, typename LockingPolicy,
typename TrackingPolicy = TrackingPolicy::Untracked>
class Arena
{
public:
std::atomic_uint32_t allocCount = 0;
Arena() = default;
// construct an arena with a name and forward argument to its allocator
template<typename ... ARGS>
Arena(const char* name, size_t size, ARGS&& ... args)
: mArea(size),
mAllocator(mArea, std::forward<ARGS>(args) ...),
mListener(name, mArea.data(), size),
mArenaName(name)
{
}
// allocate memory from arena with given size and alignment
// (acceptable size/alignment may depend on the allocator provided)
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) noexcept
{
std::lock_guard<LockingPolicy> guard(mLock);
void* p = mAllocator.alloc(size, alignment, extra);
mListener.onAlloc(p, size, alignment, extra);
allocCount.fetch_add(1, std::memory_order_relaxed);
return p;
}
// Allocate an array of trivially destructible objects
// for safety, we disable the object-based alloc method if the object type is not
// trivially destructible, since free() won't call the destructor and this is allocating
// an array.
template <typename T,
typename = typename std::enable_if<std::is_trivially_destructible<T>::value>::type>
T* alloc(size_t count, size_t alignment = alignof(T), size_t extra = 0) noexcept
{
return (T*)alloc(count * sizeof(T), alignment, extra);
}
// return memory pointed by p to the arena
// (actual behaviour may depend on allocator provided)
void free(void* p) noexcept
{
if(p) {
std::lock_guard<LockingPolicy> guard(mLock);
mListener.onFree(p);
mAllocator.free(p);
printf("Free\n");
allocCount.fetch_sub(1, std::memory_order_relaxed);
}
}
// some allocators require the size of the allocation for free
void free(void* p, size_t size) noexcept
{
if(p) {
std::lock_guard<LockingPolicy> guard(mLock);
mListener.onFree(p, size);
mAllocator.free(p, size);
printf("Free\n");
allocCount.fetch_sub(1, std::memory_order_relaxed);
}
}
// some allocators don't have a free() call, but a single reset() or rewind() instead
void reset() noexcept
{
std::lock_guard<LockingPolicy> guard(mLock);
mListener.onReset();
mAllocator.reset();
allocCount.fetch_sub(1, std::memory_order_relaxed);
}
void* getCurrent() noexcept { return mAllocator.getCurrent(); }
void rewind(void* addr) noexcept
{
std::lock_guard<LockingPolicy> guard(mLock);
mListener.onRewind(addr);
mAllocator.rewind(addr);
allocCount.fetch_sub(1, std::memory_order_relaxed);
}
// Allocate and construct an object
template<typename T, size_t ALIGN = alignof(T), typename... ARGS>
T* make(ARGS&& ... args) noexcept
{
void* const p = this->alloc(sizeof(T), ALIGN);
return p ? new(p) T(std::forward<ARGS>(args)...) : nullptr;
}
// destroys an object created with make<T>() above, and frees associated memory
template<typename T>
void destroy(T* p) noexcept
{
if(p) {
p->~T();
this->free((void*)p, sizeof(T));
}
}
char const* getName() const noexcept { return mArenaName; }
AllocatorPolicy& getAllocator() noexcept { return mAllocator; }
AllocatorPolicy const& getAllocator() const noexcept { return mAllocator; }
TrackingPolicy& getListener() noexcept { return mListener; }
TrackingPolicy const& getListener() const noexcept { return mListener; }
HeapArea& getArea() noexcept { return mArea; }
HeapArea const& getArea() const noexcept { return mArea; }
void setListener(TrackingPolicy listener) noexcept
{
std::swap(mListener, listener);
}
template <typename ... ARGS>
void emplaceListener(ARGS&& ... args) noexcept
{
mListener.~TrackingPolicy();
new (&mListener) TrackingPolicy(std::forward<ARGS>(args)...);
}
// An arena can't be copied
Arena(Arena const& rhs) noexcept = delete;
Arena& operator=(Arena const& rhs) noexcept = delete;
friend void swap(Arena& lhs, Arena& rhs) noexcept
{
using std::swap;
swap(lhs.mArea, rhs.mArea);
swap(lhs.mAllocator, rhs.mAllocator);
swap(lhs.mLock, rhs.mLock);
swap(lhs.mListener, rhs.mListener);
swap(lhs.mArenaName, rhs.mArenaName);
}
private:
HeapArea mArea; // We might want to make that a template parameter too eventually.
AllocatorPolicy mAllocator;
LockingPolicy mLock;
TrackingPolicy mListener;
char const* mArenaName = nullptr;
};
class FreeList
{
public:
FreeList() noexcept = default;
FreeList(void* begin, void* end, size_t elementSize, size_t alignment, size_t extra) noexcept
: mHead(init(begin, end, elementSize, alignment, extra))
#ifndef NDEBUG
, mBegin(begin), mEnd(end)
#endif
{
}
FreeList(const FreeList& rhs) = delete;
FreeList& operator=(const FreeList& rhs) = delete;
FreeList(FreeList&& rhs) noexcept = default;
FreeList& operator=(FreeList&& rhs) noexcept = default;
void* pop() noexcept
{
Node* const head = mHead;
mHead = head ? head->next : nullptr;
// this could indicate a use after free
assert(!mHead || mHead >= mBegin && mHead < mEnd);
return head;
}
void push(void* p) noexcept
{
assert(p);
assert(p >= mBegin && p < mEnd);
// TODO: assert this is one of our pointer (i.e.: it's address match one of ours)
Node* const head = static_cast<Node*>(p);
head->next = mHead;
mHead = head;
}
void* getFirst() noexcept
{
return mHead;
}
private:
struct Node
{
Node* next;
};
static Node* init(void* begin, void* end,
size_t elementSize, size_t alignment, size_t extra) noexcept
{
void* const p = pointermath::align(begin, alignment, extra);
void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra);
assert(p >= begin && p < end);
assert(n >= begin && n < end&& n > p);
const size_t d = uintptr_t(n) - uintptr_t(p);
const size_t num = (uintptr_t(end) - uintptr_t(p)) / d;
// set first entry
Node* head = static_cast<Node*>(p);
// next entry
Node* cur = head;
for(size_t i = 1; i < num; ++i) {
Node* next = pointermath::add(cur, d);
cur->next = next;
cur = next;
}
assert(cur < end);
assert(pointermath::add(cur, d) <= end);
cur->next = nullptr;
return head;
}
Node* mHead = nullptr;
#ifndef NDEBUG
// These are needed only for debugging...
void* mBegin = nullptr;
void* mEnd = nullptr;
#endif
};
template <
size_t ELEMENT_SIZE,
size_t ALIGNMENT = alignof(std::max_align_t),
size_t OFFSET = 0,
typename FREELIST = FreeList>
class PoolAllocator
{
static_assert(ELEMENT_SIZE >= sizeof(void*), "ELEMENT_SIZE must accommodate at least a pointer");
public:
// our allocator concept
void* alloc(size_t size = ELEMENT_SIZE,
size_t alignment = ALIGNMENT, size_t offset = OFFSET) noexcept
{
assert(size <= ELEMENT_SIZE);
assert(alignment <= ALIGNMENT);
assert(offset == OFFSET);
return mFreeList.pop();
}
void free(void* p, size_t = ELEMENT_SIZE) noexcept
{
mFreeList.push(p);
}
size_t getSize() const noexcept { return ELEMENT_SIZE; }
PoolAllocator(void* begin, void* end) noexcept
: mFreeList(begin, end, ELEMENT_SIZE, ALIGNMENT, OFFSET)
{
}
template <typename AREA>
explicit PoolAllocator(const AREA& area) noexcept
: PoolAllocator(area.begin(), area.end())
{
}
// Allocators can't be copied
PoolAllocator(const PoolAllocator& rhs) = delete;
PoolAllocator& operator=(const PoolAllocator& rhs) = delete;
PoolAllocator() noexcept = default;
~PoolAllocator() noexcept = default;
// API specific to this allocator
void* getCurrent() noexcept
{
return mFreeList.getFirst();
}
private:
FREELIST mFreeList;
};
Dodałem tylko w Arena printf
na free
żeby sprawdzić czy jest callowana ta metoda oraz licznik do debugowania. W momencie gdy wyskakuje błąd, który zaraz pokaże allocCount
wynosi 5852
więc ewidentnie się coś jest z synchronizacją.
Klasa Driver
też praktycznie jest copy-paste'owana
class SpinLock
{
std::atomic_flag mLock = ATOMIC_FLAG_INIT;
public:
void lock() noexcept
{
goto start;
do {
yield();
start:;
} while(mLock.test_and_set(std::memory_order_acquire));
}
void unlock() noexcept
{
mLock.clear(std::memory_order_release);
}
private:
inline void yield() noexcept
{
}
};
class HandleBase
{
public:
using HandleId = uint32_t;
static constexpr const HandleId nullid = HandleId{ std::numeric_limits<HandleId>::max() };
constexpr HandleBase() noexcept : object(nullid) {}
explicit HandleBase(HandleId id) noexcept : object(id)
{
//assert_invariant(object != nullid); // usually means an uninitialized handle is used
}
HandleBase(HandleBase const& rhs) noexcept = default;
HandleBase(HandleBase&& rhs) noexcept : object(rhs.object)
{
rhs.object = nullid;
}
HandleBase& operator=(HandleBase const& rhs) noexcept = default;
HandleBase& operator=(HandleBase&& rhs) noexcept
{
std::swap(object, rhs.object);
return *this;
}
explicit operator bool() const noexcept { return object != nullid; }
void clear() noexcept { object = nullid; }
bool operator==(const HandleBase& rhs) const noexcept { return object == rhs.object; }
bool operator!=(const HandleBase& rhs) const noexcept { return object != rhs.object; }
// get this handle's handleId
HandleId getId() const noexcept { return object; }
protected:
HandleId object;
};
template <typename T>
struct Handle : public HandleBase
{
using HandleBase::HandleBase;
template<typename B, typename = std::enable_if_t<std::is_base_of<T, B>::value> >
Handle(Handle<B> const& base) noexcept : HandleBase(base) {} // NOLINT(hicpp-explicit-conversions)
};
struct HwBase
{
};
struct HwVertexBuffer : public HwBase
{
HwVertexBuffer() noexcept = default;
HwVertexBuffer(uint8_t bufferCount, uint8_t attributeCount, uint32_t elementCount) noexcept
{
}
~HwVertexBuffer()
{
printf("hw dtor\n");
}
char b[136] = {};
};
struct Driver
{
Driver()
: mHandleArena("handles", 2 * 1024 * 1024)
{
}
struct GLVertexBuffer : public HwVertexBuffer
{
~GLVertexBuffer()
{
printf("gl dtor\n");
}
using HwVertexBuffer::HwVertexBuffer;
struct
{
// 4 * MAX_VERTEX_ATTRIBUTE_COUNT bytes
std::array<uint32_t, 16> buffers{};
} gl;
};
Handle<HwVertexBuffer> createVertexBufferS() noexcept
{
return initHandle<GLVertexBuffer>();
}
void destroyVertexBuffer(Handle<HwVertexBuffer> vbh)
{
if(vbh) {
GLVertexBuffer const* eb = handle_cast<const GLVertexBuffer*>(vbh);
//int n = int(eb->bufferCount);
//const auto& buffers = eb->gl.buffers;
//gl.deleteBuffers(n, buffers.data(), GL_ARRAY_BUFFER);
destruct(vbh, eb);
}
}
class HandleAllocator
{
PoolAllocator< 16, 16> mPool0;
PoolAllocator< 64, 32> mPool1;
PoolAllocator<208, 32> mPool2;
public:
static constexpr size_t MIN_ALIGNMENT_SHIFT = 4;
explicit HandleAllocator(const HeapArea& area)
: mPool0(area.begin(),
pointermath::add(area.begin(), (1 * area.getSize()) / 16)),
mPool1(pointermath::add(area.begin(), (1 * area.getSize()) / 16),
pointermath::add(area.begin(), (6 * area.getSize()) / 16)),
mPool2(pointermath::add(area.begin(), (6 * area.getSize()) / 16),
area.end())
{
}
void* alloc(size_t size, size_t alignment, size_t extra = 0) noexcept
{
if(size > mPool2.getSize()) {
__debugbreak();
}
if(size <= mPool0.getSize()) return mPool0.alloc(size, 16, extra);
if(size <= mPool1.getSize()) return mPool1.alloc(size, 32, extra);
if(size <= mPool2.getSize()) return mPool2.alloc(size, 32, extra);
return nullptr;
}
void free(void* p, size_t size) noexcept
{
if(size <= mPool0.getSize()) { mPool0.free(p); return; }
if(size <= mPool1.getSize()) { mPool1.free(p); return; }
if(size <= mPool2.getSize()) { mPool2.free(p); return; }
}
};
using HandleArena = Arena<HandleAllocator,
SpinLock,
TrackingPolicy::Debug>;
HandleArena mHandleArena;
HandleBase::HandleId allocateHandle(size_t size) noexcept
{
void* addr = mHandleArena.alloc(size);
if(!addr) {
__debugbreak();
}
char* const base = (char*)mHandleArena.getArea().begin();
size_t offset = (char*)addr - base;
return HandleBase::HandleId(offset >> HandleAllocator::MIN_ALIGNMENT_SHIFT);
}
template<typename D, typename ... ARGS>
Handle<D> initHandle(ARGS&& ... args) noexcept
{
static_assert(sizeof(D) <= 208, "Handle<> too large");
Handle<D> h{ allocateHandle(sizeof(D)) };
D* addr = handle_cast<D*>(h);
new(addr) D(std::forward<ARGS>(args)...);
return h;
}
template<typename D, typename B, typename ... ARGS>
typename std::enable_if<std::is_base_of<B, D>::value, D>::type*
construct(Handle<B> const& handle, ARGS&& ... args) noexcept
{
assert_invariant(handle);
D* addr = handle_cast<D*>(const_cast<Handle<B>&>(handle));
// currently we implement construct<> with dtor+ctor, we could use operator= also
// but all our dtors are trivial, ~D() is actually a noop.
addr->~D();
new(addr) D(std::forward<ARGS>(args)...);
return addr;
}
template<typename B, typename D,
typename = typename std::enable_if<std::is_base_of<B, D>::value, D>::type>
void destruct(Handle<B>& handle, D const* p) noexcept
{
// allow to destroy the nullptr, similarly to operator delete
if(p) {
p->~D();
mHandleArena.free(const_cast<D*>(p), sizeof(D));
}
}
template<typename Dp, typename B>
inline
typename std::enable_if<
std::is_pointer<Dp>::value&&
std::is_base_of<B, typename std::remove_pointer<Dp>::type>::value, Dp>::type
handle_cast(Handle<B>& handle) noexcept
{
//assert_invariant(handle);
if(!handle) return nullptr; // better to get a NPE than random behavior/corruption
char* const base = (char*)mHandleArena.getArea().begin();
size_t offset = handle.getId() << HandleAllocator::MIN_ALIGNMENT_SHIFT;
// assert that this handle is even a valid one
//assert_invariant(base + offset + sizeof(typename std::remove_pointer<Dp>::type) <= (char*)mHandleArena.getArea().end());
return static_cast<Dp>(static_cast<void*>(base + offset));
}
template<typename Dp, typename B>
inline
typename std::enable_if<
std::is_pointer<Dp>::value&&
std::is_base_of<B, typename std::remove_pointer<Dp>::type>::value, Dp>::type
handle_cast(Handle<B> const& handle) noexcept
{
return handle_cast<Dp>(const_cast<Handle<B>&>(handle));
}
};
Jedyne co zostało tutaj zmienione, to macra
#define DECL_DRIVER_API(methodName, paramsDecl, params) \
UTILS_ALWAYS_INLINE inline void methodName(paramsDecl);
#define DECL_DRIVER_API_SYNCHRONOUS(RetType, methodName, paramsDecl, params) \
RetType methodName(paramsDecl) override;
#define DECL_DRIVER_API_RETURN(RetType, methodName, paramsDecl, params) \
RetType methodName##S() noexcept override; \
UTILS_ALWAYS_INLINE inline void methodName##R(RetType, paramsDecl);
#include "private/backend/DriverAPI.inc"
Zamieniłem je na po prostu 2 funkcje
Handle<HwVertexBuffer> createVertexBufferS() noexcept
{
return initHandle<GLVertexBuffer>();
}
void destroyVertexBuffer(Handle<HwVertexBuffer> vbh)
{
if(vbh) {
GLVertexBuffer const* eb = handle_cast<const GLVertexBuffer*>(vbh);
//int n = int(eb->bufferCount);
//const auto& buffers = eb->gl.buffers;
//gl.deleteBuffers(n, buffers.data(), GL_ARRAY_BUFFER);
destruct(vbh, eb);
}
}
No i teraz tak, po kilku alokacjach w initHandle()
-> allocateHandle()
odpala się _debugbreak()
, bo addr
jest nullptr
. To może oznaczać tyle, że pamięć nie jest odpowienio czyszczona, co już zauważyłem po tym, że allocCount
przyjmuje jakąś dziwnie dużą wartość. Tylko teraz pytanie dlaczego tak się dzieje? Co jest tutaj nie tak skoro praktycznie wszystko zostało skopiowane?
Może mam coś źle z command queue?
Oto kod:
static constexpr size_t OBJECT_ALIGNMENT = alignof(std::max_align_t);
static constexpr size_t Align(size_t v)
{
return (v + (OBJECT_ALIGNMENT - 1)) & -OBJECT_ALIGNMENT;
}
struct CircularBuffer
{
static constexpr size_t BLOCK_BITS = 12;
static constexpr size_t BLOCK_SIZE = 1 << BLOCK_BITS;
static constexpr size_t BLOCK_MASK = BLOCK_SIZE - 1;
inline CircularBuffer(size_t bufferSize)
{
data = malloc(2 * bufferSize);
mSize = bufferSize;
tail = data;
head = data;
}
CircularBuffer(CircularBuffer const& rhs) = delete;
CircularBuffer(CircularBuffer&& rhs) = delete;
CircularBuffer& operator=(CircularBuffer const& rhs) = delete;
CircularBuffer& operator=(CircularBuffer&& rhs) = delete;
inline ~CircularBuffer()
{
free(data);
data = nullptr;
}
[[nodiscard]] inline void* Allocate(size_t size)
{
char* const cur = static_cast<char*>(head);
head = cur + size;
return cur;
}
template<typename T>
[[nodiscard]] inline T* Allocate(size_t count = 1, bool aligned = true)
{
return aligned ? (T*)Allocate(Align(sizeof(T)) * count) : (T*)Allocate(sizeof(T) * count);
}
template<typename U, typename ... Args>
inline void Construct(U* mem, Args&& ... args)
{
new(mem) U(std::forward<Args>(args)...);
}
[[nodiscard]] inline size_t max_size() const { return mSize; }
[[nodiscard]] inline size_t size() const { return uintptr_t(head) - uintptr_t(tail); }
[[nodiscard]] inline bool empty() const { return tail == head; }
[[nodiscard]] inline void* getHead() const { return head; }
[[nodiscard]] inline void* getTail() const { return tail; }
struct Range
{
void* const begin = nullptr;
void* const end = nullptr;
[[nodiscard]] inline size_t size() const { return uintptr_t(end) - uintptr_t(begin); }
};
[[nodiscard]] inline Range getRange() const
{
return { tail, head };
}
inline void Circularize()
{
if(intptr_t(head) - intptr_t(data) > intptr_t(mSize)) {
head = data;
}
tail = head;
}
private:
size_t mSize = 0;
void* data = nullptr;
void* tail = nullptr;
void* head = nullptr;
};
struct CommandBase
{
typedef void(*ExecuteFn)(CommandBase* self, uintptr_t& next);
inline CommandBase(ExecuteFn fn)
: fn(fn)
{
}
virtual ~CommandBase() = default;
[[nodiscard]] inline CommandBase* Execute()
{
uintptr_t next = 0;
fn(this, next);
return reinterpret_cast<CommandBase*>(reinterpret_cast<uintptr_t>(this) + next);
}
ExecuteFn fn;
};
template<typename FuncT, typename ... Args>
struct CustomCommand : CommandBase
{
inline CustomCommand(FuncT&& func, Args&& ... args)
: CommandBase(Execute), func(std::forward<FuncT>(func)), args(std::forward<Args>(args)...)
{
}
~CustomCommand() override = default;
static inline void Execute(CommandBase* base, uintptr_t& next)
{
next = Align(sizeof(CustomCommand));
CustomCommand* b = static_cast<CustomCommand*>(base);
std::apply(std::forward<FuncT>(b->func), std::move(b->args));
b->~CustomCommand();
}
FuncT func;
std::tuple<std::remove_reference_t<Args>...> args;
};
struct NoopCommand : CommandBase
{
inline explicit NoopCommand(void* next)
: CommandBase(Execute), next(size_t((char*)next - (char*)this))
{
}
~NoopCommand() override = default;
static inline void Execute(CommandBase* self, uintptr_t& next)
{
next = static_cast<NoopCommand*>(self)->next;
}
uintptr_t next = 0;
};
struct CommandQueue
{
Handle<HwVertexBuffer> CreateVertexBuffer()
{
Handle<HwVertexBuffer> handle = driver.createVertexBufferS();
Submit([](Driver& driver) {
//tutaj powinnien być jakiś kodzik, ale tylko dla testu nie trzeba niczego wpisywać
}, driver);
return handle;
}
void DestroyVertexBuffer(Handle<HwVertexBuffer> vbh)
{
Submit([](Driver& driver, Handle<HwVertexBuffer> v) {
driver->destroyVertexBuffer(v);
}, driver, std::move(vbh));
}
inline CommandQueue(Driver& driver, size_t requiredSize, size_t bufferSize)
: requiredSize((requiredSize + CircularBuffer::BLOCK_MASK) & ~CircularBuffer::BLOCK_MASK),
buffer(bufferSize),
freeSpace(bufferSize),
driver(driver)
{
}
Driver& driver;
template<typename FuncT, typename ... Args>
inline void Submit(FuncT&& funcT, Args&& ... args)
{
AllocateCommand<CustomCommand<FuncT, Args...>>(std::forward<FuncT>(funcT), std::forward<Args>(args)...);
}
template<typename Class, typename ... Args>
inline void AllocateCommand(Args&& ... args)
{
auto buff = buffer.Allocate<Class>();
buffer.Construct(buff, std::forward<Args>(args)...);
}
std::vector<CircularBuffer::Range> WaitForCommands() const
{
std::unique_lock<std::mutex> lock(mutex);
cv.wait(lock, [this]() { return !commandsToExecute.empty() || quit.load(std::memory_order_relaxed); });
return std::move(commandsToExecute);
}
void Flush()
{
if(buffer.empty()) {
return;
}
//Terminating command - last command needs to be nullptr to stop a while loop in the Execute() function.
new(buffer.Allocate(sizeof(NoopCommand))) NoopCommand(nullptr);
auto range = buffer.getRange();
size_t usedSpace = buffer.size();
buffer.Circularize();
std::unique_lock<std::mutex> lock(mutex);
commandsToExecute.push_back(range);
freeSpace -= usedSpace;
const size_t requiredSize = this->requiredSize;
if(freeSpace >= requiredSize) {
lock.unlock();
cv.notify_one();
} else {
cv.notify_one();
cv.wait(lock, [this, requiredSize]() { return freeSpace >= requiredSize; });
}
}
[[nodiscard]] bool Execute()
{
auto ranges = WaitForCommands();
if(ranges.empty()) {
return false;
}
for(auto& item : ranges) {
if(item.begin) {
CommandBase* base = static_cast<CommandBase*>(item.begin);
while(base) {
base = base->Execute();
}
ReleaseRange(item);
}
}
return true;
}
void ReleaseRange(const CircularBuffer::Range& range)
{
{
std::lock_guard<std::mutex> guard(mutex);
freeSpace += range.size();
}
cv.notify_one();
}
void Quit()
{
quit.store(true, std::memory_order_relaxed);
cv.notify_one();
}
std::atomic_bool quit{ false };
mutable std::mutex mutex;
mutable std::condition_variable cv;
size_t requiredSize = 0;
CircularBuffer buffer;
size_t freeSpace = 0;
mutable std::vector<CircularBuffer::Range> commandsToExecute;
};
Został on już tutaj trochę mocniej zmieniony (ale nie za bardzo). Zamiast 2 klas CommandStream
oraz CommandBufferQueue
mam jedną mniejszą klasę CommandQueue
, która łączy w sobie te 2 oryginalne klasy. A CustomCommand
jest implementacją w pewnym stopniu identycznej klasy Command : public ComandBase
w oryginalnym kodzie.
No i przykładowa funkcja main z zastosowaniem drivera i command queue
int main()
{
Driver driver;
CommandQueue commandQueue{driver, 1 * 1024 * 1024, 3 * 1024 * 1024 };
std::thread renderThread([&commandQueue]() {
while(true) {
if(!commandQueue.Execute()) {
break;
}
}
});
commandQueue.Flush();
bool quit = false;
while(!quit) {
commandQueue.Flush();
//tutaj w testowałem sobie jak będzie się zachowywać aplikacja przy różnych ilościach buferów.
static constexpr size_t s = 1;
std::array<Handle<HwVertexBuffer>, s> vbs;
for(auto& vb : vbs) {
vb = commandQueue.CreateVertexBuffer();
}
for(auto&& vb : vbs) {
commandQueue.DestroyVertexBuffer(std::move(vb));
}
commandQueue.Flush();
}
commandQueue.Flush();
commandQueue.Quit();
renderThread.join();
return 0;
}
Dodałem w niej trochę więcej commandQueue.Flush();
żeby również zasymulować oryginalny kod, który wywołuje flush
parę razy.
Przypomnę, że problem jest w przydzelaniu i zwalniaiu pamięci w funckach
HandleBase::HandleId allocateHandle(size_t size) noexcept
{
void* addr = mHandleArena.alloc(size);
if(!addr) {
__debugbreak();
}
char* const base = (char*)mHandleArena.getArea().begin();
size_t offset = (char*)addr - base;
return HandleBase::HandleId(offset >> HandleAllocator::MIN_ALIGNMENT_SHIFT);
}
template<typename B, typename D,
typename = typename std::enable_if<std::is_base_of<B, D>::value, D>::type>
void destruct(Handle<B>& handle, D const* p) noexcept
{
// allow to destroy the nullptr, similarly to operator delete
if(p) {
p->~D();
mHandleArena.free(const_cast<D*>(p), sizeof(D));
}
}
Testowałem kod na kompliatorze z flagą -fsanitize=thread
i jak zakomentujemy __debugbreak()
to pokazywany jest taki komunikat
Program received signal SIGSEGV, Segmentation fault.
0x000000000040335f in Driver::initHandle<Driver::GLVertexBuffer> (
this=0x7fffffffea70) at main.cpp:719
719 new(addr) D(std::forward<ARGS>(args)...);
tzn. w tym miejscu
template<typename D, typename ... ARGS>
Handle<D> initHandle(ARGS&& ... args) noexcept
{
static_assert(sizeof(D) <= 208, "Handle<> too large");
Handle<D> h{ allocateHandle(sizeof(D)) };
D* addr = handle_cast<D*>(h);
//O tutaj
new(addr) D(std::forward<ARGS>(args)...);
return h;
}
czyli tak naprawdę -fsanitize=thread
nie powiedział mi nic czego bym już nie wiedział, bo addr
daje nam zły adres, ponieważ allocateHandle
sfailowało podczas alokacji.