前置工作
在开始部署之前,需要准备以下环境:
-
WS63 (Hi3863) 开发环境
-
TensorFlow Lite Micro 源码
-
CMake 构建工具
CMakeLists 配置
set(TFLM_SRC_PATH "G:/HiSpark_SDK/fbb_ws63/tflite-micro")
set(TFLM_CORE_PATH "${TFLM_SRC_PATH}/tensorflow/lite")
set(FLATBUFFERS_DIR "G:/HiSpark_SDK/fbb_ws63/tflite-micro/tensorflow/lite/micro/tools/make/downloads/flatbuffers")
set(TFLM_COMPILER_PATH "${TFLM_SRC_PATH}/tensorflow/compiler")
set(RUY_PATHS
"${TFLM_SRC_PATH}/tensorflow/lite/micro/tools/make/downloads/ruy"
)
set(GEMMLOWP_FIXEDPOINT_PATHS
"${TFLM_SRC_PATH}/tensorflow/lite/micro/tools/make/downloads/gemmlowp"
"${TFLM_SRC_PATH}/tmp/tflm-ws63/third_party/gemmlowp"
"${TFLM_SRC_PATH}/third_party/gemmlowp"
)
set(FOUND_GEMMLOWP FALSE)
foreach(path ${GEMMLOWP_FIXEDPOINT_PATHS})
if(EXISTS "${path}/fixedpoint/fixedpoint.h")
set(GEMMLOWP_PATH ${path})
set(FOUND_GEMMLOWP TRUE)
message(STATUS "✓ Found gemmlowp at: ${path}")
break()
endif()
endforeach()
unset(PUBLIC_HEADER)
if(FOUND_GEMMLOWP)
list(APPEND PUBLIC_HEADER
${GEMMLOWP_PATH}
)
endif()
# 设置 PUBLIC_HEADER 变量以包含所有需要的头文件路径
list(APPEND PUBLIC_HEADER
${TFLM_SRC_PATH}
${TFLM_SRC_PATH}/tensorflow
${TFLM_SRC_PATH}/tensorflow/lite/kernels
${TFLM_CORE_PATH}
${TFLM_CORE_PATH}/micro
${TFLM_CORE_PATH}/micro/kernels
${TFLM_CORE_PATH}/micro/memory_planner
${TFLM_CORE_PATH}/micro/testing
${TFLM_CORE_PATH}/schema
${TFLM_CORE_PATH}/c
${TFLM_CORE_PATH}/core/api
${FLATBUFFERS_DIR}/include
${TFLM_CORE_PATH}/micro/kernels/cmsis_nn
CACHE INTERNAL ""
)
list(APPEND PUBLIC_HEADER ${RUY_PATHS})
# 设置源文件
set(CORE_API_SOURCES
"${TFLM_CORE_PATH}/core/api/flatbuffer_conversions.cc"
"${TFLM_CORE_PATH}/micro/tflite_bridge/flatbuffer_conversions_bridge.cc"
"${TFLM_CORE_PATH}/core/api/tensor_utils.cc"
)
set(SCHEMA_SOURCES
"${TFLM_COMPILER_PATH}/mlir/lite/schema/schema_utils.cc"
"${TFLM_CORE_PATH}/micro/flatbuffer_utils.cc"
)
set(C_API_SOURCES
"${TFLM_CORE_PATH}/core/c/common.cc"
)
set(CORE_INTERPRETER_SOURCES
"${TFLM_CORE_PATH}/micro/micro_allocator.cc"
"${TFLM_CORE_PATH}/micro/micro_interpreter.cc"
"${TFLM_CORE_PATH}/micro/micro_utils.cc"
"${TFLM_CORE_PATH}/micro/micro_op_resolver.cc"
"${TFLM_CORE_PATH}/micro/micro_context.cc"
"${TFLM_CORE_PATH}/micro/micro_interpreter_context.cc"
"${TFLM_CORE_PATH}/micro/micro_interpreter_graph.cc"
)
set(MEMORY_SOURCES
"${TFLM_CORE_PATH}/micro/memory_planner/greedy_memory_planner.cc"
"${TFLM_CORE_PATH}/micro/memory_planner/linear_memory_planner.cc"
"${TFLM_CORE_PATH}/micro/arena_allocator/single_arena_buffer_allocator.cc"
"${TFLM_CORE_PATH}/micro/arena_allocator/persistent_arena_buffer_allocator.cc"
"${TFLM_CORE_PATH}/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc"
"${TFLM_CORE_PATH}/micro/arena_allocator/recording_single_arena_buffer_allocator.cc"
"${TFLM_CORE_PATH}/micro/recording_micro_allocator.cc"
"${TFLM_CORE_PATH}/micro/memory_helpers.cc"
"${TFLM_CORE_PATH}/micro/micro_allocation_info.cc"
)
set(KERNEL_REGISTRY_SOURCES
"${TFLM_CORE_PATH}/micro/kernels/kernel_runner.cc"
"${TFLM_CORE_PATH}/micro/kernels/kernel_util.cc"
)
set(KERNEL_COMMON_SOURCES
"${TFLM_CORE_PATH}/micro/kernels/fully_connected_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/softmax_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/conv_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/depthwise_conv_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/pooling_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/add_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/mul_common.cc"
)
set(KERNEL_IMPLEMENTATION_SOURCES
"${TFLM_CORE_PATH}/micro/kernels/fully_connected.cc"
"${TFLM_CORE_PATH}/micro/kernels/softmax.cc"
"${TFLM_CORE_PATH}/micro/kernels/reshape.cc"
"${TFLM_CORE_PATH}/micro/kernels/quantize.cc"
"${TFLM_CORE_PATH}/micro/kernels/dequantize.cc"
"${TFLM_CORE_PATH}/micro/kernels/dequantize_common.cc"
"${TFLM_CORE_PATH}/micro/kernels/logistic.cc"
"${TFLM_CORE_PATH}/micro/kernels/add.cc"
"${TFLM_CORE_PATH}/micro/kernels/mul.cc"
"${TFLM_CORE_PATH}/micro/kernels/concatenation.cc"
"${TFLM_CORE_PATH}/micro/kernels/pooling.cc"
)
set(LOG_SOURCES
"${TFLM_CORE_PATH}/micro/micro_log.cc"
"${TFLM_CORE_PATH}/micro/tflite_bridge/micro_error_reporter.cc"
"${TFLM_COMPILER_PATH}/mlir/lite/core/api/error_reporter.cc"
"${TFLM_CORE_PATH}/micro/micro_time.cc"
)
set(TENSOR_UTILS_SOURCES
"${TFLM_CORE_PATH}/kernels/internal/tensor_utils.cc"
"${TFLM_CORE_PATH}/kernels/internal/quantization_util.cc"
"${TFLM_CORE_PATH}/kernels/internal/common.cc"
"${TFLM_CORE_PATH}/kernels/internal/portable_tensor_utils.cc"
"${TFLM_CORE_PATH}/kernels/kernel_util.cc"
)
set(OTHER_SOURCES
"${TFLM_CORE_PATH}/micro/micro_profiler.cc"
"${TFLM_CORE_PATH}/micro/micro_resource_variable.cc"
"${TFLM_CORE_PATH}/micro/test_helpers.cc"
)
# 合并所有源文件
set(ALL_TFLM_SOURCES
${CORE_API_SOURCES}
${SCHEMA_SOURCES}
${C_API_SOURCES}
${CORE_INTERPRETER_SOURCES}
${MEMORY_SOURCES}
${KERNEL_REGISTRY_SOURCES}
${KERNEL_COMMON_SOURCES}
${KERNEL_IMPLEMENTATION_SOURCES}
${LOG_SOURCES}
${TENSOR_UTILS_SOURCES}
${OTHER_SOURCES}
)
set(PUBLIC_HEADER "${PUBLIC_HEADER}" PARENT_SCOPE)
set(SOURCES "${SOURCES}"
${ALL_TFLM_SOURCES}
"${CMAKE_CURRENT_SOURCE_DIR}/tflm_hello_world.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/hello_world_model.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/tflm_debug_log.cc"
PARENT_SCOPE)
一、项目背景
在 WS63 嵌入式平台上验证 TensorFlow Lite Micro (TFLM) 的功能,确保 TFLM 库能够正确编译和运行。
二、文件结构
src/application/samples/tflm_hello_world/
├── CMakeLists.txt # 构建配置,引用 TFLM 源码
├── tflm_hello_world.cc # 主验证程序 ⭐
├── tflm_debug_log.cc # 平台日志实现 ⭐
├── hello_world_model.cc # TFLite 模型数据
├── test_cpp.cc # C++ 编译测试
└── models/
└── hello_world_model.h # 模型头文件
三、核心设计决策
1. 日志系统设计(关键难点)
问题:TFLM 的 micro_log.cc 需要平台相关的 DebugLog 函数。
初次尝试(错误)
// ❌ 错误:使用可变参数
extern "C" void DebugLog(const char* format, ...) {
va_list args;
va_start(args, format);
vsnprintf(buffer, sizeof(buffer), format, args);
osal_printk("%s", buffer);
va_end(args);
}
问题表现:输出乱码
-
Tensor arena size: 10534308 bytes(应该是 6000)
-
Status: 恖5(应该是 PASS/FAIL)
根因分析
// TFLM 的 micro_log.cc 调用方式
void VMicroPrintf(const char* format, va_list args) {
DebugLog(format, args); // ❌ 传递的是 va_list,不是 ...
}
最终方案(正确)
// ✅ 正确:接受 va_list 参数
extern "C" void DebugLog(const char* format, va_list args) {
char buffer[512];
vsnprintf(buffer, sizeof(buffer), format, args);
osal_printk("%s", buffer);
}
// TFLM 还需要这个函数
extern "C" int DebugVsnprintf(char* buffer, size_t buf_size,
const char* format, va_list vlist) {
return vsnprintf(buffer, buf_size, format, vlist);
}
2. 主程序设计
核心流程:
-
加载模型 (GetModel)
-
创建 OpResolver (注册 FullyConnected)
-
创建 Interpreter (分配 TensorArena)
-
分配张量 (AllocateTensors)
-
获取输入/输出张量
-
执行推理测试 (Invoke)
-
验证结果
关键代码结构:
// 使用 int8 量化模型
input->data.int8[0] = golden_inputs_int8[i];
interpreter.Invoke();
float y_pred = (output->data.int8[0] - zero_point) * scale;
3. 内存管理
静态内存分配(嵌入式环境要求):
constexpr int kTensorArenaSize = 6000;
static uint8_t tensor_arena[kTensorArenaSize];
避免使用 new/delete,防止动态内存分配问题。
四、遇到的问题与解决
| 问题 | 原因 | 解决方案 |
|---|---|---|
| undefined reference to ‘DebugLog’ | TFLM 需要平台日志函数 | 实现 DebugLog(const char*, va_list) |
| 输出乱码、数值错误 | 函数签名不匹配(... vs va_list) |
修正为接受 va_list 参数 |
| 编译时未发现源文件 | CMakeLists.txt 缺少文件 | 添加 tflm_debug_log.cc 到 SOURCES |
五、验证结果
测试 4 个输入值的 sin(x) 预测:
| x (输入) | sin(x) 期望值 | 预测值 | 误差 | 状态 |
|---|---|---|---|---|
| 0.77 | 0.6961 | 0.6964 | 0.00031 | ✅ PASS |
| 1.57 | 1.0000 | 0.9949 | 0.00508 | ✅ PASS |
| 2.30 | 0.7457 | 0.7296 | 0.01610 | ✅ PASS |
| 3.14 | 0.0016 | -0.0083 | 0.00988 | ✅ PASS |
所有测试误差 < 0.05(容差),验证成功!
六、关键经验
-
仔细阅读 TFLM 头文件:
debug_log.h明确定义了DebugLog的签名 -
理解调用链:
MicroPrintf→VMicroPrintf→DebugLog(va_list) -
嵌入式适配要点:
- 提供平台相关的
DebugLog和DebugVsnprintf - 使用静态内存分配
- 使用
osal_printk而非printf
- 提供平台相关的
-
量化模型处理:理解
scale和zero_point的反量化公式