Skip to content

Commit 3466984

Browse files
author
quic_calvnguy
committed
[QNN-EP] Disable DSPQ polling when needed
- If performance mode updates are not burst, disable DSPQ polling
1 parent ec9b8fe commit 3466984

File tree

7 files changed

+384
-207
lines changed

7 files changed

+384
-207
lines changed

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 29 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "DSP/QnnDspCommon.h"
1414
#include "HTP/QnnHtpCommon.h"
1515
#include "HTP/QnnHtpContext.h"
16-
#include "HTP/QnnHtpPerfInfrastructure.h"
1716
#include "HTP/QnnHtpSystemContext.h"
1817
#include "IR/QnnIrCommon.h"
1918
#include "IR/QnnIrGraph.h"
@@ -1354,192 +1353,50 @@ Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_
13541353
return Status::OK();
13551354
}
13561355

1357-
Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id,
1358-
HtpPerformanceMode htp_performance_mode) {
1356+
Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
1357+
uint32_t rpc_polling_time,
1358+
uint32_t rpc_control_latency) {
13591359
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
13601360
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
13611361
// set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
13621362
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");
1363-
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
1364-
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
1365-
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");
1366-
1367-
auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
1368-
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
1369-
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
1370-
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
13711363

1372-
constexpr const int kNumConfigs = 1;
1373-
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> power_configs(
1374-
kNumConfigs);
1375-
QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
1376-
dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
1377-
QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
1378-
dcvs_v3.contextId = htp_power_config_client_id;
1379-
dcvs_v3.setSleepDisable = 0;
1380-
dcvs_v3.sleepDisable = 0;
1381-
dcvs_v3.setDcvsEnable = 1;
1382-
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;
1383-
// choose performance mode
1384-
switch (htp_performance_mode) {
1385-
case HtpPerformanceMode::kHtpBurst:
1386-
dcvs_v3.setSleepLatency = 1; // true
1387-
dcvs_v3.sleepLatency = kSleepMinLatency;
1388-
dcvs_v3.dcvsEnable = kDcvsDisable;
1389-
dcvs_v3.setBusParams = 1;
1390-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1391-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1392-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1393-
dcvs_v3.setCoreParams = 1;
1394-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1395-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1396-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
1397-
break;
1398-
case HtpPerformanceMode::kHtpSustainedHighPerformance:
1399-
case HtpPerformanceMode::kHtpHighPerformance:
1400-
dcvs_v3.setSleepLatency = 1; // true
1401-
dcvs_v3.sleepLatency = kSleepLowLatency;
1402-
dcvs_v3.dcvsEnable = kDcvsDisable;
1403-
dcvs_v3.setBusParams = 1;
1404-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO;
1405-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO;
1406-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO;
1407-
dcvs_v3.setCoreParams = 1;
1408-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO;
1409-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO;
1410-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO;
1411-
break;
1412-
case HtpPerformanceMode::kHtpBalanced:
1413-
dcvs_v3.setSleepLatency = 1; // true
1414-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1415-
dcvs_v3.dcvsEnable = kDcvsEnable;
1416-
dcvs_v3.setBusParams = 1;
1417-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1418-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1419-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1420-
dcvs_v3.setCoreParams = 1;
1421-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1422-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1423-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
1424-
break;
1425-
case HtpPerformanceMode::kHtpLowBalanced:
1426-
dcvs_v3.setSleepLatency = 1; // true
1427-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1428-
dcvs_v3.dcvsEnable = kDcvsEnable;
1429-
dcvs_v3.setBusParams = 1;
1430-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM;
1431-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
1432-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM;
1433-
dcvs_v3.setCoreParams = 1;
1434-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM;
1435-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
1436-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM;
1437-
break;
1438-
case HtpPerformanceMode::kHtpHighPowerSaver:
1439-
dcvs_v3.setSleepLatency = 1; // true
1440-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1441-
dcvs_v3.dcvsEnable = kDcvsEnable;
1442-
dcvs_v3.setBusParams = 1;
1443-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1444-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1445-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1446-
dcvs_v3.setCoreParams = 1;
1447-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1448-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1449-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
1450-
break;
1451-
case HtpPerformanceMode::kHtpPowerSaver:
1452-
dcvs_v3.setSleepLatency = 1; // true
1453-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1454-
dcvs_v3.dcvsEnable = kDcvsEnable;
1455-
dcvs_v3.setBusParams = 1;
1456-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS;
1457-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
1458-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
1459-
dcvs_v3.setCoreParams = 1;
1460-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS;
1461-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
1462-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
1463-
break;
1464-
case HtpPerformanceMode::kHtpLowPowerSaver:
1465-
dcvs_v3.setSleepLatency = 1; // true
1466-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1467-
dcvs_v3.dcvsEnable = kDcvsEnable;
1468-
dcvs_v3.setBusParams = 1;
1469-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
1470-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2;
1471-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2;
1472-
dcvs_v3.setCoreParams = 1;
1473-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
1474-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2;
1475-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2;
1476-
break;
1477-
case HtpPerformanceMode::kHtpExtremePowerSaver:
1478-
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
1479-
dcvs_v3.setSleepLatency = 1; // true
1480-
dcvs_v3.sleepLatency = kSleepMediumLatency;
1481-
dcvs_v3.dcvsEnable = kDcvsEnable;
1482-
dcvs_v3.setBusParams = 1;
1483-
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
1484-
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
1485-
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
1486-
dcvs_v3.setCoreParams = 1;
1487-
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
1488-
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
1489-
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
1490-
break;
1491-
default:
1492-
ORT_THROW("Invalid performance profile %d", static_cast<int>(htp_performance_mode));
1493-
break;
1494-
}
1495-
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
1496-
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
1497-
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");
1364+
std::lock_guard<std::mutex> lock(htp_power_config_mutex_);
1365+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time));
1366+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency));
1367+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface()));
14981368

14991369
return Status::OK();
15001370
}
15011371

1502-
Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
1503-
uint32_t rpc_control_latency,
1504-
uint32_t rpc_polling_time) {
1372+
Status QnnBackendManager::SetHtpPerformanceMode(uint32_t htp_power_config_client_id,
1373+
HtpPerformanceMode htp_performance_mode) {
15051374
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
15061375
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
1507-
// set RPC control latency. Otherwise, this causes a segfault because the QNN backend library is unloaded.
1508-
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP RPC control latency if backend setup is not complete.");
1509-
1510-
constexpr int kNumRpcPollingPowerConfigs = 2;
1511-
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs;
1512-
rpc_power_configs.reserve(kNumRpcPollingPowerConfigs);
1376+
// set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
1377+
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");
15131378

1514-
// Set rpc control latency here
1515-
if (rpc_control_latency != 0) {
1516-
auto& rpc_control_latency_cfg = rpc_power_configs.emplace_back();
1517-
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
1518-
rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency;
1519-
}
1379+
std::lock_guard<std::mutex> lock(htp_power_config_mutex_);
1380+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode, htp_power_config_client_id));
1381+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface()));
15201382

1521-
// Note: v68 does not support rpc polling mode
1522-
if (rpc_polling_time != 0) {
1523-
auto& rpc_polling_time_cfg = rpc_power_configs.emplace_back();
1524-
rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
1525-
rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time;
1526-
}
1527-
1528-
if (rpc_power_configs.size() > 0) {
1529-
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
1530-
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
1531-
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");
1383+
return Status::OK();
1384+
}
15321385

1533-
auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
1534-
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
1535-
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
1536-
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
1386+
Status QnnBackendManager::SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
1387+
HtpPerformanceMode htp_performance_mode,
1388+
uint32_t rpc_polling_time,
1389+
uint32_t rpc_control_latency) {
1390+
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
1391+
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
1392+
// set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
1393+
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");
15371394

1538-
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr =
1539-
ObtainNullTermPtrVector(rpc_power_configs);
1540-
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
1541-
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
1542-
}
1395+
std::lock_guard<std::mutex> lock(htp_power_config_mutex_);
1396+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time));
1397+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency));
1398+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode, htp_power_config_client_id));
1399+
ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface()));
15431400

15441401
return Status::OK();
15451402
}

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "core/providers/qnn/builder/op_builder_factory.h"
2828
#include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h"
2929
#include "core/providers/qnn/builder/qnn_def.h"
30+
#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h"
3031
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
3132
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"
3233

@@ -163,12 +164,17 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
163164

164165
Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);
165166

166-
Status SetHtpPowerConfig(uint32_t htp_power_config_client_id,
167-
HtpPerformanceMode htp_performance_mode);
168-
169167
Status SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
170-
uint32_t rpc_control_latency,
171-
uint32_t rpc_polling_time);
168+
uint32_t rpc_polling_time,
169+
uint32_t rpc_control_latency);
170+
171+
Status SetHtpPerformanceMode(uint32_t htp_power_config_client_id,
172+
HtpPerformanceMode htp_performance_mode);
173+
174+
Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
175+
HtpPerformanceMode htp_performance_mode,
176+
uint32_t rpc_polling_time,
177+
uint32_t rpc_control_latency);
172178

173179
const QNN_INTERFACE_VER_TYPE& GetQnnInterface() { return qnn_interface_; }
174180

@@ -300,16 +306,6 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
300306

301307
bool IsDevicePropertySupported();
302308

303-
template <typename T>
304-
std::vector<std::add_pointer_t<std::add_const_t<T>>> ObtainNullTermPtrVector(const std::vector<T>& vec) {
305-
std::vector<std::add_pointer_t<std::add_const_t<T>>> ret;
306-
for (auto& elem : vec) {
307-
ret.push_back(&elem);
308-
}
309-
ret.push_back(nullptr);
310-
return ret;
311-
}
312-
313309
std::string GetBackendBuildId() {
314310
char* backend_build_id{nullptr};
315311
if (QNN_SUCCESS != qnn_interface_.backendGetBuildId((const char**)&backend_build_id)) {
@@ -419,6 +415,8 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
419415
QnnBackend_Config_t** backend_config_ = nullptr;
420416
Qnn_LogHandle_t log_handle_ = nullptr;
421417
Qnn_DeviceHandle_t device_handle_ = nullptr;
418+
power::HtpPowerConfigManager htp_power_config_manager_;
419+
std::mutex htp_power_config_mutex_;
422420

423421
// Map of Qnn_ContextHandle_t to QnnContextHandleRecord.
424422
// The QnnContextHandleRecord has ownership of the Qnn_ContextHandle_t.

onnxruntime/core/providers/qnn/builder/qnn_def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ constexpr const int kSleepMediumLatency = 1000;
111111
constexpr const int kSleepHighLatency = 2000;
112112
constexpr const int kDcvsDisable = 0;
113113
constexpr const int kDcvsEnable = 1;
114+
constexpr const uint32_t kDisableRpcPolling = 0;
115+
constexpr const uint32_t kDisableRpcControlLatency = 0;
116+
constexpr const uint32_t kMaxRpcPolling = 9999;
114117

115118
struct OnnxTensorInfo {
116119
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTensorInfo);

0 commit comments

Comments
 (0)