LOADING

加载过慢请开启缓存 浏览器默认开启

毕业设计 (2) - 部署 TFLM hello_world demo 到 WS63 (Hi3863) 平台

前置工作

在开始部署之前,需要准备以下环境:

  • WS63 (Hi3863) 开发环境

  • TensorFlow Lite Micro 源码

  • CMake 构建工具

CMakeLists 配置

set(TFLM_SRC_PATH "G:/HiSpark_SDK/fbb_ws63/tflite-micro")
set(TFLM_CORE_PATH "${TFLM_SRC_PATH}/tensorflow/lite")
set(FLATBUFFERS_DIR "G:/HiSpark_SDK/fbb_ws63/tflite-micro/tensorflow/lite/micro/tools/make/downloads/flatbuffers")
set(TFLM_COMPILER_PATH "${TFLM_SRC_PATH}/tensorflow/compiler")

set(RUY_PATHS
    "${TFLM_SRC_PATH}/tensorflow/lite/micro/tools/make/downloads/ruy"
)

set(GEMMLOWP_FIXEDPOINT_PATHS
    "${TFLM_SRC_PATH}/tensorflow/lite/micro/tools/make/downloads/gemmlowp"
    "${TFLM_SRC_PATH}/tmp/tflm-ws63/third_party/gemmlowp"
    "${TFLM_SRC_PATH}/third_party/gemmlowp"
)

set(FOUND_GEMMLOWP FALSE)
foreach(path ${GEMMLOWP_FIXEDPOINT_PATHS})
    if(EXISTS "${path}/fixedpoint/fixedpoint.h")
        set(GEMMLOWP_PATH ${path})
        set(FOUND_GEMMLOWP TRUE)
        message(STATUS "✓ Found gemmlowp at: ${path}")
        break()
    endif()
endforeach()

unset(PUBLIC_HEADER)

if(FOUND_GEMMLOWP)
    list(APPEND PUBLIC_HEADER
        ${GEMMLOWP_PATH}
    )
endif()

# 设置 PUBLIC_HEADER 变量以包含所有需要的头文件路径
list(APPEND PUBLIC_HEADER
    ${TFLM_SRC_PATH}
    ${TFLM_SRC_PATH}/tensorflow
    ${TFLM_SRC_PATH}/tensorflow/lite/kernels
    ${TFLM_CORE_PATH}
    ${TFLM_CORE_PATH}/micro
    ${TFLM_CORE_PATH}/micro/kernels
    ${TFLM_CORE_PATH}/micro/memory_planner
    ${TFLM_CORE_PATH}/micro/testing
    ${TFLM_CORE_PATH}/schema
    ${TFLM_CORE_PATH}/c
    ${TFLM_CORE_PATH}/core/api
    ${FLATBUFFERS_DIR}/include
    ${TFLM_CORE_PATH}/micro/kernels/cmsis_nn
    CACHE INTERNAL ""
)

list(APPEND PUBLIC_HEADER ${RUY_PATHS})

# 设置源文件
set(CORE_API_SOURCES
    "${TFLM_CORE_PATH}/core/api/flatbuffer_conversions.cc"
    "${TFLM_CORE_PATH}/micro/tflite_bridge/flatbuffer_conversions_bridge.cc"
    "${TFLM_CORE_PATH}/core/api/tensor_utils.cc"
)

set(SCHEMA_SOURCES
    "${TFLM_COMPILER_PATH}/mlir/lite/schema/schema_utils.cc"
    "${TFLM_CORE_PATH}/micro/flatbuffer_utils.cc"
)

set(C_API_SOURCES
    "${TFLM_CORE_PATH}/core/c/common.cc"
)

set(CORE_INTERPRETER_SOURCES
    "${TFLM_CORE_PATH}/micro/micro_allocator.cc"
    "${TFLM_CORE_PATH}/micro/micro_interpreter.cc"
    "${TFLM_CORE_PATH}/micro/micro_utils.cc"
    "${TFLM_CORE_PATH}/micro/micro_op_resolver.cc"
    "${TFLM_CORE_PATH}/micro/micro_context.cc"
    "${TFLM_CORE_PATH}/micro/micro_interpreter_context.cc"
    "${TFLM_CORE_PATH}/micro/micro_interpreter_graph.cc"
)

set(MEMORY_SOURCES
    "${TFLM_CORE_PATH}/micro/memory_planner/greedy_memory_planner.cc"
    "${TFLM_CORE_PATH}/micro/memory_planner/linear_memory_planner.cc"
    "${TFLM_CORE_PATH}/micro/arena_allocator/single_arena_buffer_allocator.cc"
    "${TFLM_CORE_PATH}/micro/arena_allocator/persistent_arena_buffer_allocator.cc"
    "${TFLM_CORE_PATH}/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc"
    "${TFLM_CORE_PATH}/micro/arena_allocator/recording_single_arena_buffer_allocator.cc"
    "${TFLM_CORE_PATH}/micro/recording_micro_allocator.cc"
    "${TFLM_CORE_PATH}/micro/memory_helpers.cc"
    "${TFLM_CORE_PATH}/micro/micro_allocation_info.cc"
)

set(KERNEL_REGISTRY_SOURCES
    "${TFLM_CORE_PATH}/micro/kernels/kernel_runner.cc"
    "${TFLM_CORE_PATH}/micro/kernels/kernel_util.cc"
)

set(KERNEL_COMMON_SOURCES
    "${TFLM_CORE_PATH}/micro/kernels/fully_connected_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/softmax_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/conv_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/depthwise_conv_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/pooling_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/add_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/mul_common.cc"
)

set(KERNEL_IMPLEMENTATION_SOURCES
    "${TFLM_CORE_PATH}/micro/kernels/fully_connected.cc"
    "${TFLM_CORE_PATH}/micro/kernels/softmax.cc"
    "${TFLM_CORE_PATH}/micro/kernels/reshape.cc"
    "${TFLM_CORE_PATH}/micro/kernels/quantize.cc"
    "${TFLM_CORE_PATH}/micro/kernels/dequantize.cc"
    "${TFLM_CORE_PATH}/micro/kernels/dequantize_common.cc"
    "${TFLM_CORE_PATH}/micro/kernels/logistic.cc"
    "${TFLM_CORE_PATH}/micro/kernels/add.cc"
    "${TFLM_CORE_PATH}/micro/kernels/mul.cc"
    "${TFLM_CORE_PATH}/micro/kernels/concatenation.cc"
    "${TFLM_CORE_PATH}/micro/kernels/pooling.cc"
)

set(LOG_SOURCES
    "${TFLM_CORE_PATH}/micro/micro_log.cc"
    "${TFLM_CORE_PATH}/micro/tflite_bridge/micro_error_reporter.cc"
    "${TFLM_COMPILER_PATH}/mlir/lite/core/api/error_reporter.cc"
    "${TFLM_CORE_PATH}/micro/micro_time.cc"
)

set(TENSOR_UTILS_SOURCES
    "${TFLM_CORE_PATH}/kernels/internal/tensor_utils.cc"
    "${TFLM_CORE_PATH}/kernels/internal/quantization_util.cc"
    "${TFLM_CORE_PATH}/kernels/internal/common.cc"
    "${TFLM_CORE_PATH}/kernels/internal/portable_tensor_utils.cc"
    "${TFLM_CORE_PATH}/kernels/kernel_util.cc"
)

set(OTHER_SOURCES
    "${TFLM_CORE_PATH}/micro/micro_profiler.cc"
    "${TFLM_CORE_PATH}/micro/micro_resource_variable.cc"
    "${TFLM_CORE_PATH}/micro/test_helpers.cc"
)

# 合并所有源文件
set(ALL_TFLM_SOURCES
    ${CORE_API_SOURCES}
    ${SCHEMA_SOURCES}
    ${C_API_SOURCES}
    ${CORE_INTERPRETER_SOURCES}
    ${MEMORY_SOURCES}
    ${KERNEL_REGISTRY_SOURCES}
    ${KERNEL_COMMON_SOURCES}
    ${KERNEL_IMPLEMENTATION_SOURCES}
    ${LOG_SOURCES}
    ${TENSOR_UTILS_SOURCES}
    ${OTHER_SOURCES}
)

set(PUBLIC_HEADER "${PUBLIC_HEADER}" PARENT_SCOPE)

set(SOURCES "${SOURCES}"
    ${ALL_TFLM_SOURCES}
    "${CMAKE_CURRENT_SOURCE_DIR}/tflm_hello_world.cc"
    "${CMAKE_CURRENT_SOURCE_DIR}/hello_world_model.cc"
    "${CMAKE_CURRENT_SOURCE_DIR}/tflm_debug_log.cc"
    PARENT_SCOPE)

一、项目背景

在 WS63 嵌入式平台上验证 TensorFlow Lite Micro (TFLM) 的功能,确保 TFLM 库能够正确编译和运行。

二、文件结构

src/application/samples/tflm_hello_world/
├── CMakeLists.txt           # 构建配置,引用 TFLM 源码
├── tflm_hello_world.cc      # 主验证程序 ⭐
├── tflm_debug_log.cc        # 平台日志实现 ⭐
├── hello_world_model.cc     # TFLite 模型数据
├── test_cpp.cc              # C++ 编译测试
└── models/
    └── hello_world_model.h  # 模型头文件

三、核心设计决策

1. 日志系统设计(关键难点)

问题:TFLM 的 micro_log.cc 需要平台相关的 DebugLog 函数。

初次尝试(错误)

// ❌ 错误:使用可变参数
extern "C" void DebugLog(const char* format, ...) {
    va_list args;
    va_start(args, format);
    vsnprintf(buffer, sizeof(buffer), format, args);
    osal_printk("%s", buffer);
    va_end(args);
}

问题表现:输出乱码

  • Tensor arena size: 10534308 bytes(应该是 6000)

  • Status: 恖5(应该是 PASS/FAIL)

根因分析

// TFLM 的 micro_log.cc 调用方式
void VMicroPrintf(const char* format, va_list args) {
  DebugLog(format, args);  // ❌ 传递的是 va_list,不是 ...
}

最终方案(正确)

// ✅ 正确:接受 va_list 参数
extern "C" void DebugLog(const char* format, va_list args) {
    char buffer[512];
    vsnprintf(buffer, sizeof(buffer), format, args);
    osal_printk("%s", buffer);
}

// TFLM 还需要这个函数
extern "C" int DebugVsnprintf(char* buffer, size_t buf_size,
                              const char* format, va_list vlist) {
    return vsnprintf(buffer, buf_size, format, vlist);
}

2. 主程序设计

核心流程

  1. 加载模型 (GetModel)

  2. 创建 OpResolver (注册 FullyConnected)

  3. 创建 Interpreter (分配 TensorArena)

  4. 分配张量 (AllocateTensors)

  5. 获取输入/输出张量

  6. 执行推理测试 (Invoke)

  7. 验证结果

关键代码结构

// 使用 int8 量化模型
input->data.int8[0] = golden_inputs_int8[i];
interpreter.Invoke();
float y_pred = (output->data.int8[0] - zero_point) * scale;

3. 内存管理

静态内存分配(嵌入式环境要求):

constexpr int kTensorArenaSize = 6000;
static uint8_t tensor_arena[kTensorArenaSize];

避免使用 new/delete,防止动态内存分配问题。

四、遇到的问题与解决

问题 原因 解决方案
undefined reference to ‘DebugLog’ TFLM 需要平台日志函数 实现 DebugLog(const char*, va_list)
输出乱码、数值错误 函数签名不匹配(... vs va_list 修正为接受 va_list 参数
编译时未发现源文件 CMakeLists.txt 缺少文件 添加 tflm_debug_log.ccSOURCES

五、验证结果

测试 4 个输入值的 sin(x) 预测:

x (输入) sin(x) 期望值 预测值 误差 状态
0.77 0.6961 0.6964 0.00031 ✅ PASS
1.57 1.0000 0.9949 0.00508 ✅ PASS
2.30 0.7457 0.7296 0.01610 ✅ PASS
3.14 0.0016 -0.0083 0.00988 ✅ PASS

所有测试误差 < 0.05(容差),验证成功!

六、关键经验

  1. 仔细阅读 TFLM 头文件debug_log.h 明确定义了 DebugLog 的签名

  2. 理解调用链MicroPrintfVMicroPrintfDebugLog(va_list)

  3. 嵌入式适配要点

    • 提供平台相关的 DebugLogDebugVsnprintf
    • 使用静态内存分配
    • 使用 osal_printk 而非 printf
  4. 量化模型处理:理解 scalezero_point 的反量化公式

参考资源

问答