summary refs log tree commit diff
path: root/drivers/misc
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-07-27 11:49:37 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-07-27 11:49:37 +0200
commit65a9bde6ed484880987a6d88de6e372eca52059f (patch)
treea192e1a8a54d334f457413f9c16041ffdf11533d /drivers/misc
parent860e73b49cd933c708e3e1e1e07cdea81b6acd1c (diff)
parent92ed301919932f777713b9172e525674157e983d (diff)
downloadlinux-65a9bde6ed484880987a6d88de6e372eca52059f.tar.gz
Merge 5.8-rc7 into char-misc-next
This should resolve the merge/build issues reported when trying to
create linux-next.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c23
-rw-r--r--drivers/misc/habanalabs/common/device.c2
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c10
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h19
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c2
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c19
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c11
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c124
-rw-r--r--drivers/misc/habanalabs/goya/goya.c20
9 files changed, 138 insertions, 92 deletions
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index fc4372c18ce2..0bc036e01ee8 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 	pkt.i2c_reg = i2c_reg;
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_DEVICE_TIMEOUT_USEC, (long *) val);
+						0, (long *) val);
 
 	if (rc)
 		dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
@@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 	pkt.value = cpu_to_le64(val);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_DEVICE_TIMEOUT_USEC, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
@@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
 	pkt.value = cpu_to_le64(state);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						HL_DEVICE_TIMEOUT_USEC, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
@@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
 	if (*ppos)
 		return 0;
 
-	sprintf(tmp_buf, "%d\n", hdev->clock_gating);
+	sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
 	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
 			strlen(tmp_buf) + 1);
 
@@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
 {
 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 	struct hl_device *hdev = entry->hdev;
-	u32 value;
+	u64 value;
 	ssize_t rc;
 
 	if (atomic_read(&hdev->in_reset)) {
@@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
 		return 0;
 	}
 
-	rc = kstrtouint_from_user(buf, count, 10, &value);
+	rc = kstrtoull_from_user(buf, count, 16, &value);
 	if (rc)
 		return rc;
 
-	if (value) {
-		hdev->clock_gating = 1;
-		if (hdev->asic_funcs->enable_clock_gating)
-			hdev->asic_funcs->enable_clock_gating(hdev);
-	} else {
-		if (hdev->asic_funcs->disable_clock_gating)
-			hdev->asic_funcs->disable_clock_gating(hdev);
-		hdev->clock_gating = 0;
-	}
+	hdev->clock_gating_mask = value;
+	hdev->asic_funcs->set_clock_gating(hdev);
 
 	return count;
 }
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 9919ff121067..be16b75bdfdb 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -630,7 +630,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 		hdev->in_debug = 0;
 
 		if (!hdev->hard_reset_pending)
-			hdev->asic_funcs->enable_clock_gating(hdev);
+			hdev->asic_funcs->set_clock_gating(hdev);
 
 		goto out;
 	}
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index b2b84510b932..5981dbd8c6df 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -64,7 +64,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 	pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
 
 	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
-				sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+						sizeof(pkt), 0, NULL);
 }
 
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
@@ -147,7 +147,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
 	pkt.value = cpu_to_le64(event_type);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-			HL_DEVICE_TIMEOUT_USEC, &result);
+						0, &result);
 
 	if (rc)
 		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
@@ -186,7 +186,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 						ARMCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
-			total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
+						total_pkt_size, 0, &result);
 
 	if (rc)
 		dev_err(hdev->dev, "failed to unmask IRQ array\n");
@@ -207,7 +207,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)
 	test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
-			sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+						sizeof(test_pkt), 0, &result);
 
 	if (!rc) {
 		if (result != ARMCP_PACKET_FENCE_VAL)
@@ -251,7 +251,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
-			sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+						sizeof(hb_pkt), 0, &result);
 
 	if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
 		rc = -EIO;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index bf9abfa47b7a..eb42aa5476a9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -627,8 +627,9 @@ enum div_select_defs {
  * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
  *                              ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to ArmCP and verify response.
- * @enable_clock_gating: enable clock gating for reducing power consumption.
- * @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @set_clock_gating: enable/disable clock gating per engine according to
+ *                    clock gating mask in hdev
+ * @disable_clock_gating: disable clock gating completely
  * @debug_coresight: perform certain actions on Coresight for debugging.
  * @is_device_idle: return true if device is idle, false otherwise.
  * @soft_reset_late_init: perform certain actions needed after soft reset.
@@ -636,7 +637,11 @@ enum div_select_defs {
  * @hw_queues_unlock: release H/W queues lock.
  * @get_pci_id: retrieve PCI ID.
  * @get_eeprom_data: retrieve EEPROM data from F/W.
- * @send_cpu_message: send buffer to ArmCP.
+ * @send_cpu_message: send message to F/W. If the message is timedout, the
+ *                    driver will eventually reset the device. The timeout can
+ *                    be determined by the calling function or it can be 0 and
+ *                    then the timeout is the default timeout for the specific
+ *                    ASIC
  * @get_hw_state: retrieve the H/W state
  * @pci_bars_map: Map PCI BARs.
  * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
@@ -728,7 +733,7 @@ struct hl_asic_funcs {
 	int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
 			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
-	void (*enable_clock_gating)(struct hl_device *hdev);
+	void (*set_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
 	bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
@@ -1452,6 +1457,9 @@ struct hl_device_idle_busy_ts {
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, the driver will restore
  *             this value and update the F/W after the re-initialization
+ * @clock_gating_mask: is clock gating enabled. bitmask that represents the
+ *                     different engines. See debugfs-driver-habanalabs for
+ *                     details.
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @cs_active_cnt: number of active command submissions on this device (active
@@ -1479,7 +1487,6 @@ struct hl_device_idle_busy_ts {
  * @init_done: is the initialization of the device done.
  * @mmu_enable: is MMU enabled.
  * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
- * @clock_gating: is clock gating enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
  * @dma_mask: the dma mask that was set for this device
  * @in_debug: is device under debug. This, together with fpriv_list, enforces
@@ -1556,6 +1563,7 @@ struct hl_device {
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
+	u64				clock_gating_mask;
 	atomic_t			in_reset;
 	enum hl_pll_frequency		curr_pll_profile;
 	int				cs_active_cnt;
@@ -1577,7 +1585,6 @@ struct hl_device {
 	u8				dram_default_page_mapping;
 	u8				pmmu_huge_range;
 	u8				init_done;
-	u8				clock_gating;
 	u8				device_cpu_disabled;
 	u8				dma_mask;
 	u8				in_debug;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index f38664b03865..c6b31e93fb5e 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
 	hdev->fw_loading = 1;
 	hdev->cpu_queues_enable = 1;
 	hdev->heartbeat = 1;
-	hdev->clock_gating = 1;
+	hdev->clock_gating_mask = ULONG_MAX;
 
 	hdev->reset_pcilink = 0;
 	hdev->axi_drain = 0;
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 8c6cd77e6af6..b997336fa75f 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -10,7 +10,6 @@
 #include <linux/pci.h>
 #include <linux/hwmon.h>
 
-#define SENSORS_PKT_TIMEOUT		1000000	/* 1s */
 #define HWMON_NR_SENSOR_TYPES		(hwmon_pwm + 1)
 
 int hl_build_hwmon_channel_info(struct hl_device *hdev,
@@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev,
 	pkt.type = __cpu_to_le16(attr);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-			SENSORS_PKT_TIMEOUT, value);
+						0, value);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev,
 	pkt.value = __cpu_to_le64(value);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						SENSORS_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev,
@@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev,
 	pkt.type = __cpu_to_le16(attr);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SENSORS_PKT_TIMEOUT, value);
+						0, value);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev,
 	pkt.type = __cpu_to_le16(attr);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SENSORS_PKT_TIMEOUT, value);
+						0, value);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
 	pkt.type = __cpu_to_le16(attr);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SENSORS_PKT_TIMEOUT, value);
+						0, value);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
 	pkt.type = __cpu_to_le16(attr);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SENSORS_PKT_TIMEOUT, value);
+						0, value);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 	pkt.value = cpu_to_le64(value);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SENSORS_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev,
@@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev,
 	pkt.value = __cpu_to_le64(value);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						SENSORS_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev,
@@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev,
 	pkt.value = __cpu_to_le64(value);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						SENSORS_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index c4e7c682d584..b3cb0ac4721c 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,9 +9,6 @@
 
 #include <linux/pci.h>
 
-#define SET_CLK_PKT_TIMEOUT	1000000	/* 1s */
-#define SET_PWR_PKT_TIMEOUT	1000000	/* 1s */
-
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 {
 	struct armcp_packet pkt;
@@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 	pkt.pll_index = cpu_to_le32(pll_index);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						SET_CLK_PKT_TIMEOUT, &result);
+						0, &result);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 	pkt.value = cpu_to_le64(freq);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SET_CLK_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev,
@@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev)
 				ARMCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-						SET_PWR_PKT_TIMEOUT, &result);
+						0, &result);
 
 	if (rc) {
 		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
@@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)
 	pkt.value = cpu_to_le64(value);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					SET_PWR_PKT_TIMEOUT, NULL);
+						0, NULL);
 
 	if (rc)
 		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4a1a52608fc0..78fbff646f99 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -80,6 +80,7 @@
 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
+#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
 
 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
 
@@ -98,6 +99,11 @@
 
 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
 
+#define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
+		BIT(GAUDI_ENGINE_ID_MME_0) |\
+		BIT(GAUDI_ENGINE_ID_MME_2) |\
+		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -106,14 +112,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 };
 
 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
-	[GAUDI_PCI_DMA_1] = 0,
-	[GAUDI_PCI_DMA_2] = 1,
-	[GAUDI_PCI_DMA_3] = 5,
-	[GAUDI_HBM_DMA_1] = 2,
-	[GAUDI_HBM_DMA_2] = 3,
-	[GAUDI_HBM_DMA_3] = 4,
-	[GAUDI_HBM_DMA_4] = 6,
-	[GAUDI_HBM_DMA_5] = 7
+	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
+	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
+	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
+	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
+	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
+	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
+	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
+	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
 };
 
 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
@@ -1766,7 +1772,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 
 	gaudi_init_hbm_cred(hdev);
 
-	gaudi_disable_clock_gating(hdev);
+	hdev->asic_funcs->disable_clock_gating(hdev);
 
 	for (tpc_id = 0, tpc_offset = 0;
 				tpc_id < TPC_NUMBER_OF_ENGINES;
@@ -2475,46 +2481,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
 }
 
-static void gaudi_enable_clock_gating(struct hl_device *hdev)
+static void gaudi_set_clock_gating(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 qman_offset;
 	int i;
 
-	if (!hdev->clock_gating)
-		return;
-
-	if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
-		return;
-
 	/* In case we are during debug session, don't enable the clock gate
 	 * as it may interfere
 	 */
 	if (hdev->in_debug)
 		return;
 
-	for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
+	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
+		if (!(hdev->clock_gating_mask &
+					(BIT_ULL(gaudi_dma_assignment[i]))))
+			continue;
+
 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
 				QMAN_UPPER_CP_CGM_PWR_GATE_EN);
 	}
 
-	for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
+	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
+		if (!(hdev->clock_gating_mask &
+					(BIT_ULL(gaudi_dma_assignment[i]))))
+			continue;
+
 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
 				QMAN_COMMON_CP_CGM_PWR_GATE_EN);
 	}
 
-	WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
-	WREG32(mmMME0_QM_CGM_CFG,
-			QMAN_COMMON_CP_CGM_PWR_GATE_EN);
-	WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
-	WREG32(mmMME2_QM_CGM_CFG,
-			QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+	if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
+		WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
+		WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+	}
+
+	if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
+		WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
+		WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+	}
 
 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
+		if (!(hdev->clock_gating_mask &
+					(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
+			continue;
+
 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
 				QMAN_CGM1_PWR_GATE_EN);
 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
@@ -2594,7 +2609,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
 	gaudi_stop_hbm_dma_qmans(hdev);
 	gaudi_stop_pci_dma_qmans(hdev);
 
-	gaudi_disable_clock_gating(hdev);
+	hdev->asic_funcs->disable_clock_gating(hdev);
 
 	msleep(wait_timeout_ms);
 
@@ -2920,7 +2935,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 	gaudi_init_tpc_qmans(hdev);
 
-	gaudi_enable_clock_gating(hdev);
+	hdev->asic_funcs->set_clock_gating(hdev);
 
 	gaudi_enable_timestamp(hdev);
 
@@ -3025,7 +3040,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 					HW_CAP_HBM_DMA | HW_CAP_PLL |
 					HW_CAP_MMU |
 					HW_CAP_SRAM_SCRAMBLER |
-					HW_CAP_HBM_SCRAMBLER);
+					HW_CAP_HBM_SCRAMBLER |
+					HW_CAP_CLK_GATE);
+
 	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
 }
 
@@ -3376,6 +3393,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
 		return 0;
 	}
 
+	if (!timeout)
+		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
+
 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
 						timeout, result);
 }
@@ -3778,6 +3798,12 @@ static int gaudi_validate_cb(struct hl_device *hdev,
 			rc = -EPERM;
 			break;
 
+		case PACKET_WREG_BULK:
+			dev_err(hdev->dev,
+				"User not allowed to use WREG_BULK\n");
+			rc = -EPERM;
+			break;
+
 		case PACKET_LOAD_AND_EXE:
 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
 				(struct packet_load_and_exe *) user_pkt);
@@ -3793,7 +3819,6 @@ static int gaudi_validate_cb(struct hl_device *hdev,
 			break;
 
 		case PACKET_WREG_32:
-		case PACKET_WREG_BULK:
 		case PACKET_MSG_LONG:
 		case PACKET_MSG_SHORT:
 		case PACKET_REPEAT:
@@ -4453,13 +4478,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
 	int rc = 0;
 
 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+				(hdev->clock_gating_mask &
+						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
 			dev_err_ratelimited(hdev->dev,
 				"Can't read register - clock gating is enabled!\n");
 			rc = -EFAULT;
 		} else {
 			*val = RREG32(addr - CFG_BASE);
 		}
+
 	} else if ((addr >= SRAM_BASE_ADDR) &&
 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4495,13 +4525,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
 	int rc = 0;
 
 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+				(hdev->clock_gating_mask &
+						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
 			dev_err_ratelimited(hdev->dev,
 				"Can't write register - clock gating is enabled!\n");
 			rc = -EFAULT;
 		} else {
 			WREG32(addr - CFG_BASE, val);
 		}
+
 	} else if ((addr >= SRAM_BASE_ADDR) &&
 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4537,7 +4572,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
 	int rc = 0;
 
 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+				(hdev->clock_gating_mask &
+						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
 			dev_err_ratelimited(hdev->dev,
 				"Can't read register - clock gating is enabled!\n");
 			rc = -EFAULT;
@@ -4547,6 +4586,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
 
 			*val = (((u64) val_h) << 32) | val_l;
 		}
+
 	} else if ((addr >= SRAM_BASE_ADDR) &&
 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4583,7 +4623,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
 	int rc = 0;
 
 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+				(hdev->clock_gating_mask &
+						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
 			dev_err_ratelimited(hdev->dev,
 				"Can't write register - clock gating is enabled!\n");
 			rc = -EFAULT;
@@ -4592,6 +4636,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
 			WREG32(addr + sizeof(u32) - CFG_BASE,
 				upper_32_bits(val));
 		}
+
 	} else if ((addr >= SRAM_BASE_ADDR) &&
 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4813,7 +4858,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
 
-	hdev->asic_funcs->enable_clock_gating(hdev);
+	hdev->asic_funcs->set_clock_gating(hdev);
 
 	mutex_unlock(&gaudi->clk_gate_mutex);
 }
@@ -5205,7 +5250,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
 
 enable_clk_gate:
 	if (params->disable_clock_gating) {
-		hdev->asic_funcs->enable_clock_gating(hdev);
+		hdev->asic_funcs->set_clock_gating(hdev);
+
 		mutex_unlock(&gaudi->clk_gate_mutex);
 	}
 
@@ -5552,7 +5598,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
 	/* Clear interrupts */
 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
 
-	hdev->asic_funcs->enable_clock_gating(hdev);
+	hdev->asic_funcs->set_clock_gating(hdev);
 
 	mutex_unlock(&gaudi->clk_gate_mutex);
 
@@ -6078,7 +6124,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
 	if (s)
 		seq_puts(s, "\n");
 
-	hdev->asic_funcs->enable_clock_gating(hdev);
+	hdev->asic_funcs->set_clock_gating(hdev);
 
 	mutex_unlock(&gaudi->clk_gate_mutex);
 
@@ -6179,7 +6225,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 		dev_err(hdev->dev,
 			"Timeout while waiting for TPC%d icache prefetch\n",
 			tpc_id);
-		hdev->asic_funcs->enable_clock_gating(hdev);
+		hdev->asic_funcs->set_clock_gating(hdev);
 		mutex_unlock(&gaudi->clk_gate_mutex);
 		return -EIO;
 	}
@@ -6208,7 +6254,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 		1000,
 		kernel_timeout);
 
-	hdev->asic_funcs->enable_clock_gating(hdev);
+	hdev->asic_funcs->set_clock_gating(hdev);
 	mutex_unlock(&gaudi->clk_gate_mutex);
 
 	if (rc) {
@@ -6517,7 +6563,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
 	.send_heartbeat = gaudi_send_heartbeat,
-	.enable_clock_gating = gaudi_enable_clock_gating,
+	.set_clock_gating = gaudi_set_clock_gating,
 	.disable_clock_gating = gaudi_disable_clock_gating,
 	.debug_coresight = gaudi_debug_coresight,
 	.is_device_idle = gaudi_is_device_idle,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 4473ded313d6..a4dfea7556da 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -88,6 +88,7 @@
 #define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
+#define GOYA_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
 
 #define GOYA_QMAN0_FENCE_VAL		0xD169B243
 
@@ -2904,6 +2905,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
 		return 0;
 	}
 
+	if (!timeout)
+		timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
+
 	return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
 					timeout, result);
 }
@@ -4499,8 +4503,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 	pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
 						ARMCP_PKT_CTL_OPCODE_SHIFT);
 
-	rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
-			HL_DEVICE_TIMEOUT_USEC, &result);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
+						total_pkt_size,	0, &result);
 
 	if (rc)
 		dev_err(hdev->dev, "failed to unmask IRQ array\n");
@@ -4532,8 +4536,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
 				ARMCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.value = cpu_to_le64(event_type);
 
-	rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-			HL_DEVICE_TIMEOUT_USEC, &result);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+						0, &result);
 
 	if (rc)
 		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
@@ -5096,14 +5100,14 @@ int goya_armcp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static void goya_enable_clock_gating(struct hl_device *hdev)
+static void goya_set_clock_gating(struct hl_device *hdev)
 {
-
+	/* clock gating not supported in Goya */
 }
 
 static void goya_disable_clock_gating(struct hl_device *hdev)
 {
-
+	/* clock gating not supported in Goya */
 }
 
 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
@@ -5322,7 +5326,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
 	.send_heartbeat = goya_send_heartbeat,
-	.enable_clock_gating = goya_enable_clock_gating,
+	.set_clock_gating = goya_set_clock_gating,
 	.disable_clock_gating = goya_disable_clock_gating,
 	.debug_coresight = goya_debug_coresight,
 	.is_device_idle = goya_is_device_idle,