太牛了!!
感谢支持…
继续分享些邪修的
当场景中大量使用BmFont时候, 经常会发现 fillMeshVertices3D 方法占比很高
正常
修改引擎代码, 展开颜色, 分正交和透视, 大部分情况下不需要运算 透视
export function fillMeshVertices3D (node: Node, renderer: IBatcher, renderData: RenderData | null, color: Color): void {
if (!renderData) return;
const chunk = renderData.chunk;
const dataList = renderData.data;
const vData = chunk.vb;
const vertexCount = renderData.vertexCount;
const m = node.worldMatrix;
const m00 = m.m00; const m01 = m.m01; const m02 = m.m02; const m03 = m.m03;
const m04 = m.m04; const m05 = m.m05; const m06 = m.m06; const m07 = m.m07;
const m12 = m.m12; const m13 = m.m13; const m14 = m.m14; const m15 = m.m15;
// 预计算颜色,避免每次循环都转换
const colR = color.r / 255;
const colG = color.g / 255;
const colB = color.b / 255;
const colA = color.a / 255;
// 检查是否需要透视除法(优化常见情况)
const needsPerspective = m03 !== 0 || m07 !== 0 || m15 !== 1;
let vertexOffset = 0;
if (needsPerspective) {
// 透视投影路径
for (let i = 0; i < vertexCount; ++i) {
const vert = dataList[i];
const x = vert.x;
const y = vert.y;
let rhw = m03 * x + m07 * y + m15;
// 优化:避免条件判断,使用数学技巧
// 如果 rhw 接近 0,设为一个很小的数避免除零
rhw = 1 / (rhw + (rhw === 0 ? 1e-10 : 0));
vData[vertexOffset] = (m00 * x + m04 * y + m12) * rhw;
vData[vertexOffset + 1] = (m01 * x + m05 * y + m13) * rhw;
vData[vertexOffset + 2] = (m02 * x + m06 * y + m14) * rhw;
vData[vertexOffset + 5] = colR;
vData[vertexOffset + 6] = colG;
vData[vertexOffset + 7] = colB;
vData[vertexOffset + 8] = colA;
vertexOffset += renderData.floatStride;
}
} else {
// 正交投影快速路径(大多数 2D 情况)
for (let i = 0; i < vertexCount; ++i) {
const vert = dataList[i];
const x = vert.x;
const y = vert.y;
vData[vertexOffset] = m00 * x + m04 * y + m12;
vData[vertexOffset + 1] = m01 * x + m05 * y + m13;
vData[vertexOffset + 2] = m02 * x + m06 * y + m14;
vData[vertexOffset + 5] = colR;
vData[vertexOffset + 6] = colG;
vData[vertexOffset + 7] = colB;
vData[vertexOffset + 8] = colA;
vertexOffset += renderData.floatStride;
}
}
// fill index data
const vid = chunk.vertexOffset;
const meshBuffer = chunk.meshBuffer;
const ib = meshBuffer.iData;
let indexOffset = meshBuffer.indexOffset;
// 优化索引填充(展开循环)
const quadCount = vertexCount >> 2; // 除以4
for (let i = 0; i < quadCount; i++) {
const start = vid + (i << 2); // i * 4
ib[indexOffset] = start;
ib[indexOffset + 1] = start + 1;
ib[indexOffset + 2] = start + 2;
ib[indexOffset + 3] = start + 1;
ib[indexOffset + 4] = start + 3;
ib[indexOffset + 5] = start + 2;
indexOffset += 6;
}
meshBuffer.indexOffset += renderData.indexCount;
meshBuffer.setDirty();
}
邪修
合并函数, 透视, Z轴统统不考虑! 不对Color A 做Float32转换, 直接用node._uiProps.opacity, 更偏激的点 我们可以把rgb Color, 直接用ArrayBuffer 缓存 Float32的RGBA
fillBuffers(comp: Label, renderer: IBatcher): void {
const node = comp.node;
// 临时颜色,保持与原来逻辑(color.a 使用 node._uiProps.opacity)
const renderData = comp.renderData;
if (!renderData) return;
const chunk = renderData.chunk;
const dataList = renderData.data;
const vData = chunk.vb; // 顶点缓冲 Float32Array
const vertexCount = renderData.vertexCount;
const floatStride = renderData.floatStride; // 顶点步长(float 数)
const m = node.worldMatrix;
// 缓存世界矩阵分量(只需要正交变换分量)
const m00 = m.m00; const m01 = m.m01;
const m04 = m.m04; const m05 = m.m05;
const m12 = m.m12; const m13 = m.m13;
// 对于 2D 正交路径,m03,m07,m15 不参与透视除法(如果你的节点有 3D 变换请谨慎)
// 这里不做透视,直接使用仿射变换 (x,y,1) * matrix
const color = comp.color;
// 预计算颜色(归一化到 0..1,写入顶点颜色槽)
const colR = color.r / 255;
const colG = color.g / 255;
const colB = color.b / 255;
const colA = node._uiProps.opacity;
// 顶点写入起始偏移
let vertexOffset = 0;
// 为性能,把常用量缓存为局部变量引用
const list = dataList;
const stride = floatStride;
// 主循环:遍历每个顶点并写入位置与颜色
for (let i = 0; i < vertexCount; ++i) {
const vert = list[i];
const x = vert.x;
const y = vert.y;
// 写入变换后的位置(仿射 2D 变换)
vData[vertexOffset] = m00 * x + m04 * y + m12; // x'
vData[vertexOffset + 1] = m01 * x + m05 * y + m13; // y'
// vData[vertexOffset + 2] = m02 * x + m06 * y + m14; // z' (如果不使用可忽略)
// 保持原先顶点布局中 u,v 或其他槽位(如果需要写 uv,请在此处写入)
// 例如原版可能在 offset 3,4 写入 uv,这里保持不变(如果 renderData 已经填好了 uv,这里可以跳过)
// 写入颜色到假定的偏移 +5..+8(与原代码一致)
vData[vertexOffset + 5] = colR;
vData[vertexOffset + 6] = colG;
vData[vertexOffset + 7] = colB;
vData[vertexOffset + 8] = colA;
vertexOffset += stride;
}
// 填充索引
const vid = chunk.vertexOffset; // 顶点基址(vertex index 起始值)
const meshBuffer = chunk.meshBuffer;
const ib = meshBuffer.iData; // Uint16Array 或 Uint32Array
let indexOffset = meshBuffer.indexOffset;
// quadCount = vertexCount / 4
const quadCount = vertexCount >> 2;
for (let q = 0; q < quadCount; ++q) {
const start = vid + (q << 2); // q * 4
ib[indexOffset] = start;
ib[indexOffset + 1] = start + 1;
ib[indexOffset + 2] = start + 2;
ib[indexOffset + 3] = start + 1;
ib[indexOffset + 4] = start + 3;
ib[indexOffset + 5] = start + 2;
indexOffset += 6;
}
// 更新 meshBuffer 状态
meshBuffer.indexOffset += renderData.indexCount;
meshBuffer.setDirty();
}
给大佬顶一个
我理解是包体吧, 通常情况下 PNG8(TinyPNG) 压缩后是比 ASTC小; 如果目标是减少包体 对于 UI、图标、带大量透明区域的素材,优先用 PNG/PNG8的 atlas;对场景贴图、法线贴图、位图照片类用 ASTC。
niuBi
大佬六百六十六
真的是应用AI去落地解决游戏问题了,而不是虚有其表的去讲AI
老板看到了会怎么想
鸿蒙的DevEco 可以直接ai测试ccc吗还是说要生成指定包来测试
用AI也是需要技术的
666


只是测试下发热和耗电
插眼 66666
niubi

666


666666 
邪修之为什么大量node.active 性能差, 比如数千个node 再update里 active = true/false
我们先看下node内部发生了什么
大量调用 node.active 会触发节点及其子孙的生命周期回调(preload/onEnable/onDisable)、组件启用、变换和渲染数据重建,导致大量 CPU 开销和可能的内存分配/GC。一次性激活或频繁切换很多节点还会打乱渲染批次
public activateNode (node: Node, active: boolean): void {
if (active) {
const task = activateTasksPool.get();
if (task) {
this._activatingStack.push(task);
this._activateNodeRecursively(node, task.preload, task.onLoad, task.onEnable);
task.preload.invoke();
task.onLoad.invoke();
task.onEnable.invoke();
this._activatingStack.pop();
activateTasksPool.put(task);
}
} else {
this._deactivateNodeRecursively(node);
// remove children of this node from previous activating tasks to debounce
// (this is an inefficient operation but it ensures general case could be implemented in a efficient way)
const stack = this._activatingStack;
for (const lastTask of stack) {
lastTask.preload.cancelInactive(IsPreloadStarted);
lastTask.onLoad.cancelInactive(IsOnLoadStarted);
lastTask.onEnable.cancelInactive(IsOnEnableCalled);
}
}
node.emit(NodeEventType.ACTIVE_IN_HIERARCHY_CHANGED, node);
}
怎么优化?
这里有2个思路
普通版
- 手动管理Node的Visible, 不渲染或者超出屏幕边界时候 我们设置成null, 同时使用init 作为激活函数名, 不实用默认的onEnable, 这个方案对于3D是比较友好了
function enqueueRenderObject (model: Model): void {
// filter model by view visibility
if (model.enabled) {
if (scene.isCulledByLod(camera, model)) {
return;
}
if (model.castShadow) {
castShadowObjects.push(getRenderObject(model, camera));
csmLayerObjects.push(getRenderObject(model, camera));
}
if (model.node && ((visibility & model.node.layer) === model.node.layer)
|| (visibility & model.visFlags)) {
// frustum culling
if (model.worldBounds && !geometry.intersect.aabbFrustum(model.worldBounds, camera.frustum)) {
return;
}
renderObjects.push(getRenderObject(model, camera));
}
}
}
for (let i = 0; i < models.length; i++) {
enqueueRenderObject(models[i]);
}
邪修版
但对于2D 3D 大量伤害文字和特效, 设置成null, 会影响batcher2D 的排序;
2D文字/3D特效/拖尾混排

优化重构assembler, 上万文字,脱尾,特效, 2个assembler 搞定, 同时保证三角面的有序culling
我们在assembler里面也做了3个优化
- 预创建 预先创建好极限大小的buffer, 如果超过, 会把有限时间的移除
- 位置分配 针对光污染特效, 单独一个assembler不去做特定排序(不过先来后到 数量大的时候, 不会很明显) 通过slot 预先分配, 先不用删除, 把能用的slot 给后续节点, 比如有1000个再跑的特效, 有500个要删除, 先添加了300个, 这时候我们通过 vbF32.fill/iBuffer.fill 更新到指定长度就可以了, 前面的数据
private _createBuffers(maxVertCount: number, maxIndexCount: number): EffectBuffer {
const device = director.root!.device;
const vbByteLength = maxVertCount * MultiEffect.VERTEX_SIZE;
const ibByteLength = maxIndexCount * 2;
const vbF32 = new Float32Array(maxVertCount * MultiEffect.FLOATS_PER_VERT);
const iBuffer = new Uint16Array(maxIndexCount);
const vertexBuffer = device.createBuffer(new gfx.BufferInfo(
gfx.BufferUsageBit.VERTEX | gfx.BufferUsageBit.TRANSFER_DST,
gfx.MemoryUsageBit.HOST | gfx.MemoryUsageBit.DEVICE,
vbByteLength,
MultiEffect.VERTEX_SIZE
));
const indexBuffer = device.createBuffer(new gfx.BufferInfo(
gfx.BufferUsageBit.INDEX | gfx.BufferUsageBit.TRANSFER_DST,
gfx.MemoryUsageBit.HOST | gfx.MemoryUsageBit.DEVICE,
ibByteLength,
2
));
return { vertexBuffer, indexBuffer, vbF32, iBuffer };
}
private _processRemovals(config: MatConfig) {
if (!config.hasRemovals) return;
const effs = config.effList!;
let i = 0;
while (i < effs.length) {
const eff = effs[i];
if (eff.remove) {
config.freeSlots!.push(eff.slotIndex);
this._clearEffectBuffer(eff);
// 使用 splice 保持数组连续性
effs.splice(i, 1);
} else {
i++;
}
}
config.hasRemovals = false;
}
private _clearEffectBuffer(eff: EffectConfig) {
const buffers = eff.matConfig!.buffers!;
const mat = eff.matConfig!;
const length = mat.maxLength!;
const shapeCount = 2;
const vertexCount = length * shapeCount * MultiEffect.FLOATS_PER_VERT;
const vbStart = eff.offset;
const indexCount = (length - 1) * (eff.shapeCount - 1) * 2 * 3;
const ibStart = eff.indexOffset;
buffers.vbF32.fill(0, vbStart, vbStart + vertexCount);
buffers.iBuffer.fill(0, ibStart, ibStart + indexCount);
EffectPool.put(eff);
}
牛逼呀大佬