summary refs log tree commit diff
path: root/drivers/misc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-08 08:56:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-08 08:56:37 -0700
commita09476668e3016ea4a7b0a7ebd02f44e0546c12c (patch)
tree0a2e435d40b1785bc3df0c455fbf858adc28d36c /drivers/misc
parente8bc52cb8df80c31c73c726ab58ea9746e9ff734 (diff)
parent7cd04013fbf3e6dcb67ca6b59aa813269a2ad9ce (diff)
downloadlinux-a09476668e3016ea4a7b0a7ebd02f44e0546c12c.tar.gz
Merge tag 'char-misc-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc and other driver updates from Greg KH:
 "Here is the large set of char/misc and other small driver subsystem
  changes for 6.1-rc1. Loads of different things in here:

   - IIO driver updates, additions, and changes. Probably the largest
     part of the diffstat

   - habanalabs driver update with support for new hardware and
     features, the second largest part of the diff.

   - fpga subsystem driver updates and additions

   - mhi subsystem updates

   - Coresight driver updates

   - gnss subsystem updates

   - extcon driver updates

   - icc subsystem updates

   - fsi subsystem updates

   - nvmem subsystem and driver updates

   - misc driver updates

   - speakup driver additions for new features

   - lots of tiny driver updates and cleanups

  All of these have been in the linux-next tree for a while with no
  reported issues"

* tag 'char-misc-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (411 commits)
  w1: Split memcpy() of struct cn_msg flexible array
  spmi: pmic-arb: increase SPMI transaction timeout delay
  spmi: pmic-arb: block access for invalid PMIC arbiter v5 SPMI writes
  spmi: pmic-arb: correct duplicate APID to PPID mapping logic
  spmi: pmic-arb: add support to dispatch interrupt based on IRQ status
  spmi: pmic-arb: check apid against limits before calling irq handler
  spmi: pmic-arb: do not ack and clear peripheral interrupts in cleanup_irq
  spmi: pmic-arb: handle spurious interrupt
  spmi: pmic-arb: add a print in cleanup_irq
  drivers: spmi: Directly use ida_alloc()/free()
  MAINTAINERS: add TI ECAP driver info
  counter: ti-ecap-capture: capture driver support for ECAP
  Documentation: ABI: sysfs-bus-counter: add frequency & num_overflows items
  dt-bindings: counter: add ti,am62-ecap-capture.yaml
  counter: Introduce the COUNTER_COMP_ARRAY component type
  counter: Consolidate Counter extension sysfs attribute creation
  counter: Introduce the Count capture component
  counter: 104-quad-8: Add Signal polarity component
  counter: Introduce the Signal polarity component
  counter: interrupt-cnt: Implement watch_validate callback
  ...
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig1
-rw-r--r--drivers/misc/Makefile3
-rw-r--r--drivers/misc/altera-stapl/altera.c8
-rw-r--r--drivers/misc/bcm-vk/bcm_vk_dev.c8
-rw-r--r--drivers/misc/eeprom/eeprom.c2
-rw-r--r--drivers/misc/eeprom/idt_89hpesx.c2
-rw-r--r--drivers/misc/fastrpc.c2
-rw-r--r--drivers/misc/habanalabs/Kconfig1
-rw-r--r--drivers/misc/habanalabs/Makefile8
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c127
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c75
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c35
-rw-r--r--drivers/misc/habanalabs/common/device.c147
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c184
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h171
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c44
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c123
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c4
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c24
-rw-r--r--drivers/misc/habanalabs/common/memory.c57
-rw-r--r--drivers/misc/habanalabs/common/memory_mgr.c10
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c31
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c10
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c185
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2.c671
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2P.h10
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2_masks.h21
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2_security.c26
-rw-r--r--drivers/misc/habanalabs/goya/goya.c62
-rw-r--r--drivers/misc/habanalabs/include/common/cpucp_if.h103
-rw-r--r--drivers/misc/habanalabs/include/common/hl_boot_if.h37
-rw-r--r--drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h2
-rw-r--r--drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h185
-rw-r--r--drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h57
-rw-r--r--drivers/misc/ics932s401.c2
-rw-r--r--drivers/misc/mchp_pci1xxxx/Kconfig13
-rw-r--r--drivers/misc/mchp_pci1xxxx/Makefile1
-rw-r--r--drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c165
-rw-r--r--drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h28
-rw-r--r--drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c427
-rw-r--r--drivers/misc/mei/bus-fixup.c2
-rw-r--r--drivers/misc/mei/gsc-me.c1
-rw-r--r--drivers/misc/mei/hw-txe.c2
-rw-r--r--drivers/misc/ocxl/file.c2
-rw-r--r--drivers/misc/pci_endpoint_test.c34
-rw-r--r--drivers/misc/sgi-xp/xp.h4
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c16
-rw-r--r--drivers/misc/xilinx_sdfec.c3
48 files changed, 2335 insertions, 801 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 94e9fb4cdd76..358ad56f6524 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -513,4 +513,5 @@ source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
 source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/mchp_pci1xxxx/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2be8542616dd..ac9b3e757ba1 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -60,4 +60,5 @@ obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
 obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
 obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
 obj-$(CONFIG_OPEN_DICE)		+= open-dice.o
-obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
\ No newline at end of file
+obj-$(CONFIG_GP_PCI1XXXX)	+= mchp_pci1xxxx/
+obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
index 075f3a36d512..a58b7cb81d98 100644
--- a/drivers/misc/altera-stapl/altera.c
+++ b/drivers/misc/altera-stapl/altera.c
@@ -1014,7 +1014,7 @@ exit_done:
 			 * ...argument 0 is string ID
 			 */
 			count = strlen(msg_buff);
-			strlcpy(&msg_buff[count],
+			strscpy(&msg_buff[count],
 				&p[str_table + args[0]],
 				ALTERA_MESSAGE_LENGTH - count);
 			break;
@@ -2146,7 +2146,7 @@ static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
 						&p[note_table + (8 * i) + 4])];
 
 				if (value != NULL)
-					strlcpy(value, value_ptr, vallen);
+					strscpy(value, value_ptr, vallen);
 
 			}
 		}
@@ -2162,13 +2162,13 @@ static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
 			status = 0;
 
 			if (key != NULL)
-				strlcpy(key, &p[note_strings +
+				strscpy(key, &p[note_strings +
 						get_unaligned_be32(
 						&p[note_table + (8 * i)])],
 					keylen);
 
 			if (value != NULL)
-				strlcpy(value, &p[note_strings +
+				strscpy(value, &p[note_strings +
 						get_unaligned_be32(
 						&p[note_table + (8 * i) + 4])],
 					vallen);
diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
index a16b99bdaa13..d4a96137728d 100644
--- a/drivers/misc/bcm-vk/bcm_vk_dev.c
+++ b/drivers/misc/bcm-vk/bcm_vk_dev.c
@@ -1339,7 +1339,7 @@ static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, vk);
 
 	irq = pci_alloc_irq_vectors(pdev,
-				    1,
+				    VK_MSIX_IRQ_MIN_REQ,
 				    VK_MSIX_IRQ_MAX,
 				    PCI_IRQ_MSI | PCI_IRQ_MSIX);
 
@@ -1401,7 +1401,7 @@ static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		bcm_vk_tty_set_irq_enabled(vk, i);
 	}
 
-	id = ida_simple_get(&bcm_vk_ida, 0, 0, GFP_KERNEL);
+	id = ida_alloc(&bcm_vk_ida, GFP_KERNEL);
 	if (id < 0) {
 		err = id;
 		dev_err(dev, "unable to get id\n");
@@ -1500,7 +1500,7 @@ err_kfree_name:
 	misc_device->name = NULL;
 
 err_ida_remove:
-	ida_simple_remove(&bcm_vk_ida, id);
+	ida_free(&bcm_vk_ida, id);
 
 err_irq:
 	for (i = 0; i < vk->num_irqs; i++)
@@ -1573,7 +1573,7 @@ static void bcm_vk_remove(struct pci_dev *pdev)
 	if (misc_device->name) {
 		misc_deregister(misc_device);
 		kfree(misc_device->name);
-		ida_simple_remove(&bcm_vk_ida, vk->devid);
+		ida_free(&bcm_vk_ida, vk->devid);
 	}
 	for (i = 0; i < vk->num_irqs; i++)
 		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 4a9445fea93d..8a841a75d893 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -136,7 +136,7 @@ static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
 	 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
 		return -ENODEV;
 
-	strlcpy(info->type, "eeprom", I2C_NAME_SIZE);
+	strscpy(info->type, "eeprom", I2C_NAME_SIZE);
 
 	return 0;
 }
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
index ada2a3af36d7..bb3ed352b95f 100644
--- a/drivers/misc/eeprom/idt_89hpesx.c
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -1075,7 +1075,7 @@ static const struct i2c_device_id *idt_ee_match_id(struct fwnode_handle *fwnode)
 		return NULL;
 
 	p = strchr(compatible, ',');
-	strlcpy(devname, p ? p + 1 : compatible, sizeof(devname));
+	strscpy(devname, p ? p + 1 : compatible, sizeof(devname));
 	/* Search through the device name */
 	while (id->name[0]) {
 		if (strcmp(devname, id->name) == 0)
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 5d9e3483b89d..7ff0b63c25e3 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1515,7 +1515,7 @@ static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr
 	args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
 	args[1].length = dsp_attr_buf_len;
 	args[1].fd = -1;
-	fl->pd = 1;
+	fl->pd = USER_PD;
 
 	return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
 				       FASTRPC_SCALARS(0, 1, 1), args);
diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig
index 861c81006c6d..bd01d0d940c0 100644
--- a/drivers/misc/habanalabs/Kconfig
+++ b/drivers/misc/habanalabs/Kconfig
@@ -10,6 +10,7 @@ config HABANA_AI
 	select HWMON
 	select DMA_SHARED_BUFFER
 	select CRC32
+	select FW_LOADER
 	help
 	  Enables PCIe card driver for Habana's AI Processors (AIP) that are
 	  designed to accelerate Deep Learning inference and training workloads.
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index b35d7000c86b..a48a9e0969ed 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
 include $(src)/common/Makefile
 habanalabs-y += $(HL_COMMON_FILES)
 
-include $(src)/goya/Makefile
-habanalabs-y += $(HL_GOYA_FILES)
+include $(src)/gaudi2/Makefile
+habanalabs-y += $(HL_GAUDI2_FILES)
 
 include $(src)/gaudi/Makefile
 habanalabs-y += $(HL_GAUDI_FILES)
 
-include $(src)/gaudi2/Makefile
-habanalabs-y += $(HL_GAUDI2_FILES)
+include $(src)/goya/Makefile
+habanalabs-y += $(HL_GOYA_FILES)
 
 habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index b027f66f8bd4..2b332991ac6a 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -12,20 +12,18 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+#define CB_VA_POOL_SIZE		(4UL * SZ_1G)
+
 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct hl_vm_va_block *va_block, *tmp;
-	dma_addr_t bus_addr;
-	u64 virt_addr;
 	u32 page_size = prop->pmmu.page_size;
-	s32 offset;
 	int rc;
 
 	if (!hdev->supports_cb_mapping) {
 		dev_err_ratelimited(hdev->dev,
-				"Cannot map CB because no VA range is allocated for CB mapping\n");
+				"Mapping a CB to the device's MMU is not supported\n");
 		return -EINVAL;
 	}
 
@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 		return -EINVAL;
 	}
 
-	INIT_LIST_HEAD(&cb->va_block_list);
-
-	for (bus_addr = cb->bus_address;
-			bus_addr < cb->bus_address + cb->size;
-			bus_addr += page_size) {
-
-		virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
-		if (!virt_addr) {
-			dev_err(hdev->dev,
-				"Failed to allocate device virtual address for CB\n");
-			rc = -ENOMEM;
-			goto err_va_pool_free;
-		}
+	if (cb->is_mmu_mapped)
+		return 0;
 
-		va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
-		if (!va_block) {
-			rc = -ENOMEM;
-			gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
-			goto err_va_pool_free;
-		}
+	cb->roundup_size = roundup(cb->size, page_size);
 
-		va_block->start = virt_addr;
-		va_block->end = virt_addr + page_size - 1;
-		va_block->size = page_size;
-		list_add_tail(&va_block->node, &cb->va_block_list);
+	cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
+	if (!cb->virtual_addr) {
+		dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
+		return -ENOMEM;
 	}
 
-	mutex_lock(&ctx->mmu_lock);
-
-	bus_addr = cb->bus_address;
-	offset = 0;
-	list_for_each_entry(va_block, &cb->va_block_list, node) {
-		rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
-				va_block->size, list_is_last(&va_block->node,
-							&cb->va_block_list));
-		if (rc) {
-			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
-				va_block->start);
-			goto err_va_umap;
-		}
-
-		bus_addr += va_block->size;
-		offset += va_block->size;
+	mutex_lock(&hdev->mmu_lock);
+	rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
+		goto err_va_umap;
 	}
-
 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
-
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	cb->is_mmu_mapped = true;
-
 	return rc;
 
 err_va_umap:
-	list_for_each_entry(va_block, &cb->va_block_list, node) {
-		if (offset <= 0)
-			break;
-		hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
-				offset <= va_block->size);
-		offset -= va_block->size;
-	}
-
-	rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
-
-	mutex_unlock(&ctx->mmu_lock);
-
-err_va_pool_free:
-	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
-		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
-		list_del(&va_block->node);
-		kfree(va_block);
-	}
-
+	mutex_unlock(&hdev->mmu_lock);
+	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
 	return rc;
 }
 
 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 {
 	struct hl_device *hdev = ctx->hdev;
-	struct hl_vm_va_block *va_block, *tmp;
-
-	mutex_lock(&ctx->mmu_lock);
-
-	list_for_each_entry(va_block, &cb->va_block_list, node)
-		if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
-				list_is_last(&va_block->node,
-						&cb->va_block_list)))
-			dev_warn_ratelimited(hdev->dev,
-					"Failed to unmap CB's va 0x%llx\n",
-					va_block->start);
 
+	mutex_lock(&hdev->mmu_lock);
+	hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
+	mutex_unlock(&hdev->mmu_lock);
 
-	mutex_unlock(&ctx->mmu_lock);
-
-	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
-		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
-		list_del(&va_block->node);
-		kfree(va_block);
-	}
+	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
 }
 
 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
 static int hl_cb_info(struct hl_mem_mgr *mmg,
 			u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
 {
-	struct hl_vm_va_block *va_block;
 	struct hl_cb *cb;
 	int rc = 0;
 
@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
 	}
 
 	if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
-		va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
-		if (va_block) {
-			*device_va = va_block->start;
+		if (cb->is_mmu_mapped) {
+			*device_va = cb->virtual_addr;
 		} else {
 			dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
 			rc = -EINVAL;
@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
 		return -ENOMEM;
 	}
 
-	rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
-			prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
+	ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
+					CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
+	if (!ctx->cb_va_pool_base) {
+		rc = -ENOMEM;
+		goto err_pool_destroy;
+	}
+	rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to add memory to VA gen pool for CB mapping\n");
-		goto err_pool_destroy;
+		goto err_unreserve_va_block;
 	}
 
 	return 0;
 
+err_unreserve_va_block:
+	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
 err_pool_destroy:
 	gen_pool_destroy(ctx->cb_va_pool);
 
@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
 		return;
 
 	gen_pool_destroy(ctx->cb_va_pool);
+	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
 }
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 90a4574cbe2d..fa05770865c6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -12,7 +12,9 @@
 #include <linux/slab.h>
 
 #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
-					HL_CS_FLAGS_COLLECTIVE_WAIT)
+			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
+			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+
 
 #define MAX_TS_ITER_NUM 10
 
@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
 	}
 
 	/* Save only the first CS timeout parameters */
-	rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
+	rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
 	if (rc) {
-		hdev->last_error.cs_timeout.timestamp = ktime_get();
-		hdev->last_error.cs_timeout.seq = cs->sequence;
+		hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
+		hdev->captured_err_info.cs_timeout.seq = cs->sequence;
 
 		event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
 				HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 		return CS_RESERVE_SIGNALS;
 	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
 		return CS_UNRESERVE_SIGNALS;
+	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+		return CS_TYPE_ENGINE_CORE;
 	else
 		return CS_TYPE_DEFAULT;
 }
@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 	u32 cs_type_flags, num_chunks;
 	enum hl_device_status status;
 	enum hl_cs_type cs_type;
+	bool is_sync_stream;
 
 	if (!hl_device_operational(hdev, &status)) {
 		return -EBUSY;
@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 	cs_type = hl_cs_get_cs_type(cs_type_flags);
 	num_chunks = args->in.num_chunks_execute;
 
-	if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
-			cs_type == CS_TYPE_COLLECTIVE_WAIT) &&
-			!hdev->supports_sync_stream)) {
+	is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
+			cs_type == CS_TYPE_COLLECTIVE_WAIT);
+
+	if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
 		dev_err(hdev->dev, "Sync stream CS is not supported\n");
 		return -EINVAL;
 	}
@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 			dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
 			return -EINVAL;
 		}
-	} else if (num_chunks != 1) {
+	} else if (is_sync_stream && num_chunks != 1) {
 		dev_err(hdev->dev,
 			"Sync stream CS mandates one chunk only, context %d\n",
 			ctx->asid);
@@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
 	bool need_soft_reset = false;
-	int rc = 0, do_ctx_switch;
+	int rc = 0, do_ctx_switch = 0;
 	void __user *chunks;
 	u32 num_chunks, tmp;
 	u16 sob_count;
 	int ret;
 
-	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+	if (hdev->supports_ctx_switch)
+		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
 
 	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
 		mutex_lock(&hpriv->restore_phase_mutex);
@@ -1661,9 +1668,10 @@ wait_again:
 			}
 		}
 
-		ctx->thread_ctx_switch_wait_token = 1;
+		if (hdev->supports_ctx_switch)
+			ctx->thread_ctx_switch_wait_token = 1;
 
-	} else if (!ctx->thread_ctx_switch_wait_token) {
+	} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
 		rc = hl_poll_timeout_memory(hdev,
 			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
 			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
@@ -2351,6 +2359,41 @@ out:
 	return rc;
 }
 
+static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
+						u32 num_engine_cores, u32 core_command)
+{
+	int rc;
+	struct hl_device *hdev = hpriv->hdev;
+	void __user *engine_cores_arr;
+	u32 *cores;
+
+	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
+		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
+		return -EINVAL;
+	}
+
+	if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
+		dev_err(hdev->dev, "Engine core command is invalid\n");
+		return -EINVAL;
+	}
+
+	engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
+	cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
+	if (!cores)
+		return -ENOMEM;
+
+	if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
+		dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
+		kfree(cores);
+		return -EFAULT;
+	}
+
+	rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
+	kfree(cores);
+
+	return rc;
+}
+
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	union hl_cs_args *args = data;
@@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		rc = cs_ioctl_unreserve_signals(hpriv,
 					args->in.encaps_sig_handle_id);
 		break;
+	case CS_TYPE_ENGINE_CORE:
+		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
+				args->in.num_engine_cores, args->in.core_command);
+		break;
 	default:
 		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
 						args->in.cs_flags,
@@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com
 	ktime_t max_ktime, first_cs_time;
 	enum hl_cs_wait_status status;
 
-	memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
+	memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
 
 	/* get all fences under the same lock */
 	rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
@@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	}
 
 	/* allocate array for the fences */
-	fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
+	fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
 	if (!fence_arr) {
 		rc = -ENOMEM;
 		goto free_seq_arr;
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 64439f33a19b..48d3ec8b5c82 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data)
 		if (ctx->asid != HL_KERNEL_ASID_ID &&
 		    !list_empty(&ctx->hw_block_mem_list)) {
 			seq_puts(s, "\nhw_block mappings:\n\n");
-			seq_puts(s, "    virtual address    size    HW block id\n");
-			seq_puts(s, "-------------------------------------------\n");
+			seq_puts(s,
+				"    virtual address    block size    mapped size    HW block id\n");
+			seq_puts(s,
+				"---------------------------------------------------------------\n");
 			mutex_lock(&ctx->hw_block_list_lock);
-			list_for_each_entry(lnode, &ctx->hw_block_mem_list,
-					    node) {
+			list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
 				seq_printf(s,
-					"    0x%-14lx   %-6u      %-9u\n",
-					lnode->vaddr, lnode->size, lnode->id);
+					"    0x%-14lx   %-6u        %-6u             %-9u\n",
+					lnode->vaddr, lnode->block_size, lnode->mapped_size,
+					lnode->id);
 			}
 			mutex_unlock(&ctx->hw_block_list_lock);
 		}
@@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data)
 	struct hl_debugfs_entry *entry = s->private;
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
+	struct engines_data eng_data;
 
 	if (hdev->reset_info.in_reset) {
 		dev_warn_ratelimited(hdev->dev,
@@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data)
 		return 0;
 	}
 
-	hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
+	eng_data.actual_size = 0;
+	eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
+	eng_data.buf = vmalloc(eng_data.allocated_buf_size);
+	if (!eng_data.buf)
+		return -ENOMEM;
+
+	hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
+
+	if (eng_data.actual_size > eng_data.allocated_buf_size) {
+		dev_err(hdev->dev,
+				"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
+				eng_data.actual_size, eng_data.allocated_buf_size);
+		vfree(eng_data.buf);
+		return -ENOMEM;
+	}
+
+	seq_write(s, eng_data.buf, eng_data.actual_size);
+
+	vfree(eng_data.buf);
 
 	return 0;
 }
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b30aeb1c657f..233d8b46c831 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -13,6 +13,8 @@
 #include <linux/pci.h>
 #include <linux/hwmon.h>
 
+#include <trace/events/habanalabs.h>
+
 #define HL_RESET_DELAY_USEC		10000	/* 10ms */
 
 enum dma_alloc_type {
@@ -26,8 +28,9 @@ enum dma_alloc_type {
 /*
  * hl_set_dram_bar- sets the bar to allow later access to address
  *
- * @hdev: pointer to habanalabs device structure
+ * @hdev: pointer to habanalabs device structure.
  * @addr: the address the caller wants to access.
+ * @region: the PCI region.
  *
  * @return: the old BAR base address on success, U64_MAX for failure.
  *	    The caller should set it back to the old address after use.
@@ -37,58 +40,64 @@ enum dma_alloc_type {
  * This function can be called also if the bar doesn't need to be set,
  * in that case it just won't change the base.
  */
-static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr)
+static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u64 bar_base_addr;
+	u64 bar_base_addr, old_base;
 
-	bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
+	if (is_power_of_2(prop->dram_pci_bar_size))
+		bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
+	else
+		bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
+				prop->dram_pci_bar_size;
 
-	return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
-}
+	old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
 
+	/* in case of success we need to update the new BAR base */
+	if (old_base != U64_MAX)
+		region->region_base = bar_base_addr;
+
+	return old_base;
+}
 
 static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type, enum pci_region region_type)
 {
 	struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
+	void __iomem *acc_addr;
 	u64 old_base = 0, rc;
 
 	if (region_type == PCI_REGION_DRAM) {
-		old_base = hl_set_dram_bar(hdev, addr);
+		old_base = hl_set_dram_bar(hdev, addr, region);
 		if (old_base == U64_MAX)
 			return -EIO;
 	}
 
+	acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
+			region->offset_in_bar;
 	switch (acc_type) {
 	case DEBUGFS_READ8:
-		*val = readb(hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		*val = readb(acc_addr);
 		break;
 	case DEBUGFS_WRITE8:
-		writeb(*val, hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		writeb(*val, acc_addr);
 		break;
 	case DEBUGFS_READ32:
-		*val = readl(hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		*val = readl(acc_addr);
 		break;
 	case DEBUGFS_WRITE32:
-		writel(*val, hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		writel(*val, acc_addr);
 		break;
 	case DEBUGFS_READ64:
-		*val = readq(hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		*val = readq(acc_addr);
 		break;
 	case DEBUGFS_WRITE64:
-		writeq(*val, hdev->pcie_bar[region->bar_id] +
-			addr - region->region_base + region->offset_in_bar);
+		writeq(*val, acc_addr);
 		break;
 	}
 
 	if (region_type == PCI_REGION_DRAM) {
-		rc = hl_set_dram_bar(hdev, old_base);
+		rc = hl_set_dram_bar(hdev, old_base, region);
 		if (rc == U64_MAX)
 			return -EIO;
 	}
@@ -97,9 +106,10 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
 }
 
 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
-		gfp_t flag, enum dma_alloc_type alloc_type)
+					gfp_t flag, enum dma_alloc_type alloc_type,
+					const char *caller)
 {
-	void *ptr;
+	void *ptr = NULL;
 
 	switch (alloc_type) {
 	case DMA_ALLOC_COHERENT:
@@ -113,11 +123,16 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t
 		break;
 	}
 
+	if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
+		trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
+						caller);
+
 	return ptr;
 }
 
 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
-					dma_addr_t dma_handle, enum dma_alloc_type alloc_type)
+					dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
+					const char *caller)
 {
 	switch (alloc_type) {
 	case DMA_ALLOC_COHERENT:
@@ -130,39 +145,44 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
 		hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
 		break;
 	}
+
+	trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
 }
 
-void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
-					gfp_t flag)
+void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
+					gfp_t flag, const char *caller)
 {
-	return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT);
+	return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller);
 }
 
-void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
-					dma_addr_t dma_handle)
+void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
+					dma_addr_t dma_handle, const char *caller)
 {
-	hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT);
+	hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
 }
 
-void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
+void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
+						dma_addr_t *dma_handle, const char *caller)
 {
-	return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE);
+	return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
 }
 
-void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
+void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
+						const char *caller)
 {
-	hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE);
+	hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
 }
 
-void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
-					dma_addr_t *dma_handle)
+void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
+					dma_addr_t *dma_handle, const char *caller)
 {
-	return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL);
+	return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller);
 }
 
-void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
+void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
+					const char *caller)
 {
-	hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL);
+	hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
 }
 
 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
@@ -267,6 +287,30 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
 	return 0;
 }
 
+void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
+{
+	va_list args;
+	int str_size;
+
+	va_start(args, fmt);
+	/* Calculate formatted string length. Assuming each string is null terminated, hence
+	 * increment result by 1
+	 */
+	str_size = vsnprintf(NULL, 0, fmt, args) + 1;
+	va_end(args);
+
+	if ((e->actual_size + str_size) < e->allocated_buf_size) {
+		va_start(args, fmt);
+		vsnprintf(e->buf + e->actual_size, str_size, fmt, args);
+		va_end(args);
+	}
+
+	/* Need to update the size even when not updating destination buffer to get the exact size
+	 * of all input strings
+	 */
+	e->actual_size += str_size;
+}
+
 enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
@@ -322,6 +366,8 @@ static void hpriv_release(struct kref *ref)
 
 	hdev = hpriv->hdev;
 
+	hdev->asic_funcs->send_device_activity(hdev, false);
+
 	put_pid(hpriv->taskpid);
 
 	hl_debugfs_remove_file(hpriv);
@@ -673,7 +719,7 @@ static int device_early_init(struct hl_device *hdev)
 
 	if (hdev->asic_prop.completion_queues_count) {
 		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
-				sizeof(*hdev->cq_wq),
+				sizeof(struct workqueue_struct *),
 				GFP_KERNEL);
 		if (!hdev->cq_wq) {
 			rc = -ENOMEM;
@@ -1091,7 +1137,9 @@ int hl_device_resume(struct hl_device *hdev)
 	/* 'in_reset' was set to true during suspend, now we must clear it in order
 	 * for hard reset to be performed
 	 */
+	spin_lock(&hdev->reset_info.lock);
 	hdev->reset_info.in_reset = 0;
+	spin_unlock(&hdev->reset_info.lock);
 
 	rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
 	if (rc) {
@@ -1518,6 +1566,13 @@ kill_processes:
 	 */
 	hdev->disabled = false;
 
+	/* F/W security enabled indication might be updated after hard-reset */
+	if (hard_reset) {
+		rc = hl_fw_read_preboot_status(hdev);
+		if (rc)
+			goto out_err;
+	}
+
 	rc = hdev->asic_funcs->hw_init(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
@@ -1556,7 +1611,7 @@ kill_processes:
 		if (!hdev->asic_prop.fw_security_enabled)
 			hl_fw_set_max_power(hdev);
 	} else {
-		rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
+		rc = hdev->asic_funcs->compute_reset_late_init(hdev);
 		if (rc) {
 			if (reset_upon_device_release)
 				dev_err(hdev->dev,
@@ -1704,7 +1759,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	char *name;
 	bool add_cdev_sysfs_on_err = false;
 
-	name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
+	hdev->cdev_idx = hdev->id / 2;
+
+	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
 	if (!name) {
 		rc = -ENOMEM;
 		goto out_disabled;
@@ -1719,7 +1776,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	if (rc)
 		goto out_disabled;
 
-	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
+	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
 	if (!name) {
 		rc = -ENOMEM;
 		goto free_dev;
@@ -1806,7 +1863,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	}
 
 	hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs,
-					sizeof(*hdev->shadow_cs_queue), GFP_KERNEL);
+					sizeof(struct hl_cs *), GFP_KERNEL);
 	if (!hdev->shadow_cs_queue) {
 		rc = -ENOMEM;
 		goto cq_fini;
@@ -1997,10 +2054,10 @@ out_disabled:
 	if (hdev->pdev)
 		dev_err(&hdev->pdev->dev,
 			"Failed to initialize hl%d. Device is NOT usable !\n",
-			hdev->id / 2);
+			hdev->cdev_idx);
 	else
 		pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
-			hdev->id / 2);
+			hdev->cdev_idx);
 
 	return rc;
 }
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 608ca67527a5..2de6a9bd564d 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -15,14 +15,6 @@
 
 #define FW_FILE_MAX_SIZE		0x1400000 /* maximum size of 20MB */
 
-struct fw_binning_conf {
-	u64 tpc_binning;
-	u32 dec_binning;
-	u32 hbm_binning;
-	u32 edma_binning;
-	u32 mme_redundancy;
-};
-
 static char *extract_fw_ver_from_str(const char *fw_str)
 {
 	char *str, *fw_ver, *whitespace;
@@ -260,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	struct cpucp_packet *pkt;
 	dma_addr_t pkt_dma_addr;
 	struct hl_bd *sent_bd;
-	u32 tmp, expected_ack_val, pi;
+	u32 tmp, expected_ack_val, pi, opcode;
 	int rc;
 
 	pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
@@ -327,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 
 	rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
 	if (rc) {
-		dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
-			rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT);
+		opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
+
+		if (!prop->supports_advanced_cpucp_rc) {
+			dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
+			goto scrub_descriptor;
+		}
+
+		switch (rc) {
+		case cpucp_packet_invalid:
+			dev_err(hdev->dev,
+				"CPU packet %d is not supported by F/W\n", opcode);
+			break;
+		case cpucp_packet_fault:
+			dev_err(hdev->dev,
+				"F/W failed processing CPU packet %d\n", opcode);
+			break;
+		case cpucp_packet_invalid_pkt:
+			dev_dbg(hdev->dev,
+				"CPU packet %d is not supported by F/W\n", opcode);
+			break;
+		case cpucp_packet_invalid_params:
+			dev_err(hdev->dev,
+				"F/W reports invalid parameters for CPU packet %d\n", opcode);
+			break;
+
+		default:
+			dev_err(hdev->dev,
+				"Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
+		}
 
 		/* propagate the return code from the f/w to the callers who want to check it */
 		if (result)
@@ -340,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 		*result = le64_to_cpu(pkt->result);
 	}
 
+scrub_descriptor:
 	/* Scrub previous buffer descriptor 'ctl' field which contains the
 	 * previous PI value written during packet submission.
 	 * We must do this or else F/W can read an old value upon queue wraparound.
@@ -462,6 +482,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 			size);
 }
 
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
+{
+	struct cpucp_packet pkt;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET <<	CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.value = cpu_to_le64(open);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+	if (rc)
+		dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
+
+	return rc;
+}
+
 int hl_fw_send_heartbeat(struct hl_device *hdev)
 {
 	struct cpucp_packet hb_pkt;
@@ -581,6 +616,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
 
 	/* All warnings should go here in order not to reach the unknown error validation */
+	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
+		dev_warn(hdev->dev,
+			"Device boot warning - EEPROM failure detected, default settings applied\n");
+		/* This is a warning so we don't want it to disable the
+		 * device
+		 */
+		err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL;
+	}
+
 	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
 		dev_warn(hdev->dev,
 			"Device boot warning - Skipped DRAM initialization\n");
@@ -1476,6 +1520,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	 */
 	prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
 
+	prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN);
+
 	dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
 							cpu_boot_dev_sts0);
 
@@ -1514,7 +1560,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev)
 	hdev->asic_funcs->init_firmware_preload_params(hdev);
 
 	/*
-	 * In order to determine boot method (static VS dymanic) we need to
+	 * In order to determine boot method (static VS dynamic) we need to
 	 * read the boot caps register
 	 */
 	rc = hl_fw_read_preboot_caps(hdev);
@@ -1781,7 +1827,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
  *
  * @return the CRC32 result
  *
- * NOTE: kernel's CRC32 differ's from standard CRC32 calculation.
+ * NOTE: kernel's CRC32 differs from standard CRC32 calculation.
  *       in order to be aligned we need to flip the bits of both the input
  *       initial CRC and kernel's CRC32 result.
  *       in addition both sides use initial CRC of 0,
@@ -1798,7 +1844,7 @@ static u32 hl_fw_compat_crc32(u8 *data, size_t size)
  *
  * @hdev: pointer to the habanalabs device structure
  * @addr: device address of memory transfer
- * @size: memory transter size
+ * @size: memory transfer size
  * @region: PCI memory region
  *
  * @return 0 on success, otherwise non-zero error code
@@ -1854,50 +1900,36 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
 	u64 addr;
 	int rc;
 
-	if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) {
-		dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
+	if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC)
+		dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
 				fw_desc->header.magic);
-		return -EIO;
-	}
 
-	if (fw_desc->header.version != HL_COMMS_DESC_VER) {
-		dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
+	if (fw_desc->header.version != HL_COMMS_DESC_VER)
+		dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
 				fw_desc->header.version);
-		return -EIO;
-	}
 
 	/*
-	 * calc CRC32 of data without header.
+	 * Calc CRC32 of data without header. use the size of the descriptor
+	 * reported by firmware, without calculating it ourself, to allow adding
+	 * more fields to the lkd_fw_comms_desc structure.
 	 * note that no alignment/stride address issues here as all structures
-	 * are 64 bit padded
+	 * are 64 bit padded.
 	 */
-	data_size = sizeof(struct lkd_fw_comms_desc) -
-					sizeof(struct comms_desc_header);
 	data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header);
-
-	if (le16_to_cpu(fw_desc->header.size) != data_size) {
-		dev_err(hdev->dev,
-			"Invalid descriptor size 0x%x, expected size 0x%zx\n",
-				le16_to_cpu(fw_desc->header.size), data_size);
-		return -EIO;
-	}
+	data_size = le16_to_cpu(fw_desc->header.size);
 
 	data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
-
 	if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) {
-		dev_err(hdev->dev,
-			"CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
-					data_crc32, fw_desc->header.crc32);
+		dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
+			data_crc32, fw_desc->header.crc32);
 		return -EIO;
 	}
 
 	/* find memory region to which to copy the image */
 	addr = le64_to_cpu(fw_desc->img_addr);
 	region_id = hl_get_pci_memory_region(hdev, addr);
-	if ((region_id != PCI_REGION_SRAM) &&
-			((region_id != PCI_REGION_DRAM))) {
-		dev_err(hdev->dev,
-			"Invalid region to copy FW image address=%llx\n", addr);
+	if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) {
+		dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr);
 		return -EIO;
 	}
 
@@ -1914,8 +1946,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
 					fw_loader->dynamic_loader.fw_image_size,
 					region);
 	if (rc) {
-		dev_err(hdev->dev,
-			"invalid mem transfer request for FW image\n");
+		dev_err(hdev->dev, "invalid mem transfer request for FW image\n");
 		return rc;
 	}
 
@@ -2422,18 +2453,6 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
 		msg.reset_cause = *(__u8 *) data;
 		break;
 
-	case HL_COMMS_BINNING_CONF_TYPE:
-	{
-		struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data;
-
-		msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning);
-		msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning);
-		msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning);
-		msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning);
-		msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy);
-		break;
-	}
-
 	default:
 		dev_err(hdev->dev,
 			"Send COMMS message - invalid message type %u\n",
@@ -2503,13 +2522,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	 */
 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
 
-	/* if no preboot loaded indication- wait for preboot */
-	if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) {
-		rc = hl_fw_wait_preboot_ready(hdev);
-		if (rc)
-			return -EIO;
-	}
-
 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
 						0, true,
 						fw_loader->cpu_timeout);
@@ -2547,7 +2559,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	/*
 	 * when testing FW load (without Linux) on PLDM we don't want to
 	 * wait until boot fit is active as it may take several hours.
-	 * instead, we load the bootfit and let it do all initializations in
+	 * instead, we load the bootfit and let it do all initialization in
 	 * the background.
 	 */
 	if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
@@ -2961,3 +2973,49 @@ void hl_fw_set_max_power(struct hl_device *hdev)
 	if (rc)
 		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
 }
+
+static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
+					u32 nonce, u32 timeout)
+{
+	struct cpucp_packet pkt = {};
+	dma_addr_t req_dma_addr;
+	void *req_cpu_addr;
+	int rc;
+
+	req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
+	if (!data) {
+		dev_err(hdev->dev,
+			"Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
+		return -ENOMEM;
+	}
+
+	memset(data, 0, size);
+
+	pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.addr = cpu_to_le64(req_dma_addr);
+	pkt.data_max_size = cpu_to_le32(size);
+	pkt.nonce = cpu_to_le32(nonce);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					timeout, NULL);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
+		goto out;
+	}
+
+	memcpy(data, req_cpu_addr, size);
+
+out:
+	hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
+
+	return rc;
+}
+
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+				u32 nonce)
+{
+	return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
+					sizeof(struct cpucp_sec_attest_info), nonce,
+					HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index d59bba9e55c9..58c95b13be69 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -66,6 +66,7 @@ struct hl_fpriv;
 #define HL_CPUCP_INFO_TIMEOUT_USEC	10000000 /* 10s */
 #define HL_CPUCP_EEPROM_TIMEOUT_USEC	10000000 /* 10s */
 #define HL_CPUCP_MON_DUMP_TIMEOUT_USEC	10000000 /* 10s */
+#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */
 
 #define HL_FW_STATUS_POLL_INTERVAL_USEC		10000 /* 10ms */
 #define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC	1000000 /* 1s */
@@ -94,7 +95,7 @@ struct hl_fpriv;
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
 /**
- * enum hl_mmu_page_table_locaion - mmu page table location
+ * enum hl_mmu_page_table_location - mmu page table location
  * @MMU_DR_PGT: page-table is located on device DRAM.
  * @MMU_HR_PGT: page-table is located on host memory.
  * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
@@ -143,6 +144,25 @@ enum hl_mmu_enablement {
 
 #define HL_MAX_DCORES			8
 
+/* DMA alloc/free wrappers */
+#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \
+	hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__)
+
+#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \
+	hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__)
+
+#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \
+	hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__)
+
+#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \
+	hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__)
+
+#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \
+	hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__)
+
+#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
+	hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
+
 /*
  * Reset Flags
  *
@@ -208,6 +228,7 @@ enum hl_protection_levels {
  * struct iterate_module_ctx - HW module iterator
  * @fn: function to apply to each HW module instance
  * @data: optional internal data to the function iterator
+ * @rc: return code for optional use of iterator/iterator-caller
  */
 struct iterate_module_ctx {
 	/*
@@ -217,10 +238,12 @@ struct iterate_module_ctx {
 	 * @inst: HW module instance within the block
 	 * @offset: current HW module instance offset from the 1-st HW module instance
 	 *          in the 1-st block
-	 * @data: function specific data
+	 * @ctx: the iterator context.
 	 */
-	void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data);
+	void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset,
+			struct iterate_module_ctx *ctx);
 	void *data;
+	int rc;
 };
 
 struct hl_block_glbl_sec {
@@ -342,7 +365,8 @@ enum hl_cs_type {
 	CS_TYPE_WAIT,
 	CS_TYPE_COLLECTIVE_WAIT,
 	CS_RESERVE_SIGNALS,
-	CS_UNRESERVE_SIGNALS
+	CS_UNRESERVE_SIGNALS,
+	CS_TYPE_ENGINE_CORE
 };
 
 /*
@@ -544,10 +568,6 @@ struct hl_hints_range {
  * @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned.
  * @dram_enabled_mask: which DRAMs are enabled.
  * @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned.
- * @cb_va_start_addr: virtual start address of command buffers which are mapped
- *                    to the device's MMU.
- * @cb_va_end_addr: virtual end address of command buffers which are mapped to
- *                  the device's MMU.
  * @dram_hints_align_mask: dram va hint addresses alignment mask which is used
  *                  for hints validity check.
  * @cfg_base_address: config space base address.
@@ -614,6 +634,7 @@ struct hl_hints_range {
  *                                      which the property supports_user_set_page_size is true
  *                                      (i.e. the DRAM supports multiple page sizes), otherwise
  *                                      it will shall  be equal to dram_page_size.
+ * @num_engine_cores: number of engine cpu cores
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
@@ -658,6 +679,7 @@ struct hl_hints_range {
  * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
  * @supports_user_set_page_size: true if user can set the allocation page size.
  * @dma_mask: the dma mask to be set for this device
+ * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -689,8 +711,6 @@ struct asic_fixed_properties {
 	u64				tpc_binning_mask;
 	u64				dram_enabled_mask;
 	u64				dram_binning_mask;
-	u64				cb_va_start_addr;
-	u64				cb_va_end_addr;
 	u64				dram_hints_align_mask;
 	u64				cfg_base_address;
 	u64				mmu_cache_mng_addr;
@@ -734,6 +754,7 @@ struct asic_fixed_properties {
 	u32				faulty_dram_cluster_map;
 	u32				xbar_edge_enabled_mask;
 	u32				device_mem_alloc_default_page_size;
+	u32				num_engine_cores;
 	u16				collective_first_sob;
 	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
@@ -766,6 +787,7 @@ struct asic_fixed_properties {
 	u8				set_max_power_on_device_init;
 	u8				supports_user_set_page_size;
 	u8				dma_mask;
+	u8				supports_advanced_cpucp_rc;
 };
 
 /**
@@ -797,7 +819,7 @@ struct hl_fence {
  * @lock: spinlock to protect fence.
  * @hdev: habanalabs device structure.
  * @hw_sob: the H/W SOB used in this signal/wait CS.
- * @encaps_sig_hdl: encaps signals hanlder.
+ * @encaps_sig_hdl: encaps signals handler.
  * @cs_seq: command submission sequence number.
  * @type: type of the CS - signal/wait.
  * @sob_val: the SOB value that is used in this signal/wait CS.
@@ -898,14 +920,14 @@ struct hl_mmap_mem_buf {
  * @buf: back pointer to the parent mappable memory buffer
  * @debugfs_list: node in debugfs list of command buffers.
  * @pool_list: node in pool list of command buffers.
- * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
- *                 the device's MMU.
  * @kernel_address: Holds the CB's kernel virtual address.
+ * @virtual_addr: Holds the CB's virtual address.
  * @bus_address: Holds the CB's DMA address.
  * @size: holds the CB's size.
+ * @roundup_size: holds the cb size after roundup to page size.
  * @cs_cnt: holds number of CS that this CB participates in.
  * @is_pool: true if CB was acquired from the pool, false otherwise.
- * @is_internal: internaly allocated
+ * @is_internal: internally allocated
  * @is_mmu_mapped: true if the CB is mapped to the device's MMU.
  */
 struct hl_cb {
@@ -914,10 +936,11 @@ struct hl_cb {
 	struct hl_mmap_mem_buf	*buf;
 	struct list_head	debugfs_list;
 	struct list_head	pool_list;
-	struct list_head	va_block_list;
 	void			*kernel_address;
+	u64			virtual_addr;
 	dma_addr_t		bus_address;
 	u32			size;
+	u32			roundup_size;
 	atomic_t		cs_cnt;
 	u8			is_pool;
 	u8			is_internal;
@@ -1113,7 +1136,7 @@ struct timestamp_reg_info {
  * @fence: hl fence object for interrupt completion
  * @cq_target_value: CQ target value
  * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
- *                  handler for taget value comparison
+ *                  handler for target value comparison
  */
 struct hl_user_pending_interrupt {
 	struct timestamp_reg_info	ts_reg_info;
@@ -1372,6 +1395,18 @@ struct fw_load_mgr {
 struct hl_cs;
 
 /**
+ * struct engines_data - asic engines data
+ * @buf: buffer for engines data in ascii
+ * @actual_size: actual size of data that was written by the driver to the allocated buffer
+ * @allocated_buf_size: total size of allocated buffer
+ */
+struct engines_data {
+	char *buf;
+	int actual_size;
+	u32 allocated_buf_size;
+};
+
+/**
  * struct hl_asic_funcs - ASIC specific functions that are can be called from
  *                        common code.
  * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
@@ -1434,11 +1469,9 @@ struct hl_cs;
  * @send_heartbeat: send is-alive packet to CPU-CP and verify response.
  * @debug_coresight: perform certain actions on Coresight for debugging.
  * @is_device_idle: return true if device is idle, false otherwise.
- * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
+ * @compute_reset_late_init: perform certain actions needed after a compute reset
  * @hw_queues_lock: acquire H/W queues lock.
  * @hw_queues_unlock: release H/W queues lock.
- * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC
- * @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC
  * @get_pci_id: retrieve PCI ID.
  * @get_eeprom_data: retrieve EEPROM data from F/W.
  * @get_monitor_dump: retrieve monitor registers dump from F/W.
@@ -1498,6 +1531,8 @@ struct hl_cs;
  * @check_if_razwi_happened: check if there was a razwi due to RR violation.
  * @access_dev_mem: access device memory
  * @set_dram_bar_base: set the base of the DRAM BAR
+ * @set_engine_cores: set a config command to enigne cores
+ * @send_device_activity: indication to FW about device availability
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1570,13 +1605,11 @@ struct hl_asic_funcs {
 	int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
-					u8 mask_len, struct seq_file *s);
-	int (*non_hard_reset_late_init)(struct hl_device *hdev);
+	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+				struct engines_data *e);
+	int (*compute_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
-	void (*kdma_lock)(struct hl_device *hdev, int dcore_id);
-	void (*kdma_unlock)(struct hl_device *hdev, int dcore_id);
 	u32 (*get_pci_id)(struct hl_device *hdev);
 	int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
 	int (*get_monitor_dump)(struct hl_device *hdev, void *data);
@@ -1634,6 +1667,9 @@ struct hl_asic_funcs {
 	int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
 				u64 addr, u64 *val, enum debugfs_access_type acc_type);
 	u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+	int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
+					u32 num_cores, u32 core_command);
+	int (*send_device_activity)(struct hl_device *hdev, bool open);
 };
 
 
@@ -1727,10 +1763,10 @@ struct hl_cs_outcome {
 
 /**
  * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
- * @outcome_map: index of completed CS searcheable by sequence number
+ * @outcome_map: index of completed CS searchable by sequence number
  * @used_list: list of outcome objects currently in use
  * @free_list: list of outcome objects currently not in use
- * @nodes_pool: a static pool of preallocated outcome objects
+ * @nodes_pool: a static pool of pre-allocated outcome objects
  * @db_lock: any operation on the store must take this lock
  */
 struct hl_cs_outcome_store {
@@ -1754,12 +1790,10 @@ struct hl_cs_outcome_store {
  * @refcount: reference counter for the context. Context is released only when
  *		this hits 0l. It is incremented on CS and CS_WAIT.
  * @cs_pending: array of hl fence objects representing pending CS.
- * @outcome_store: storage data structure used to remember ouitcomes of completed
+ * @outcome_store: storage data structure used to remember outcomes of completed
  *                 command submissions for a long time after CS id wraparound.
  * @va_range: holds available virtual addresses for host and dram mappings.
  * @mem_hash_lock: protects the mem_hash.
- * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
- *            MMU hash or walking the PGT requires talking this lock.
  * @hw_block_list_lock: protects the HW block memory list.
  * @debugfs_list: node in debugfs list of contexts.
  * @hw_block_mem_list: list of HW block virtual mapped addresses.
@@ -1767,6 +1801,7 @@ struct hl_cs_outcome_store {
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
  * @sig_mgr: encaps signals handle manager.
+ * @cb_va_pool_base: the base address for the device VA pool
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
  *			to user so user could inquire about CS. It is used as
  *			index to cs_pending array.
@@ -1795,13 +1830,13 @@ struct hl_ctx {
 	struct hl_cs_outcome_store	outcome_store;
 	struct hl_va_range		*va_range[HL_VA_RANGE_TYPE_MAX];
 	struct mutex			mem_hash_lock;
-	struct mutex			mmu_lock;
 	struct mutex			hw_block_list_lock;
 	struct list_head		debugfs_list;
 	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
 	struct gen_pool			*cb_va_pool;
 	struct hl_encaps_signals_mgr	sig_mgr;
+	u64				cb_va_pool_base;
 	u64				cs_sequence;
 	u64				*dram_default_hops;
 	spinlock_t			cs_lock;
@@ -1823,7 +1858,6 @@ struct hl_ctx_mgr {
 };
 
 
-
 /*
  * COMMAND SUBMISSIONS
  */
@@ -1889,7 +1923,7 @@ struct hl_userptr {
  * @tdr_active: true if TDR was activated for this CS (to prevent
  *		double TDR activation).
  * @aborted: true if CS was aborted due to some device error.
- * @timestamp: true if a timestmap must be captured upon completion.
+ * @timestamp: true if a timestamp must be captured upon completion.
  * @staged_last: true if this is the last staged CS and needs completion.
  * @staged_first: true if this is the first staged CS and we need to receive
  *                timeout for this CS.
@@ -2047,14 +2081,16 @@ struct hl_vm_hash_node {
  * @node: node to hang on the list in context object.
  * @ctx: the context this node belongs to.
  * @vaddr: virtual address of the HW block.
- * @size: size of the block.
+ * @block_size: size of the block.
+ * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done.
  * @id: HW block id (handle).
  */
 struct hl_vm_hw_block_list_node {
 	struct list_head	node;
 	struct hl_ctx		*ctx;
 	unsigned long		vaddr;
-	u32			size;
+	u32			block_size;
+	u32			mapped_size;
 	u32			id;
 };
 
@@ -2214,7 +2250,7 @@ struct hl_info_list {
 
 /**
  * struct hl_debugfs_entry - debugfs dentry wrapper.
- * @info_ent: dentry realted ops.
+ * @info_ent: dentry related ops.
  * @dev_entry: ASIC specific debugfs manager.
  */
 struct hl_debugfs_entry {
@@ -2492,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 				break; \
 			(val) = __elbi_read; \
 		} else {\
-			(val) = RREG32((u32)addr); \
+			(val) = RREG32((u32)(addr)); \
 		} \
 		if (cond) \
 			break; \
@@ -2503,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 					break; \
 				(val) = __elbi_read; \
 			} else {\
-				(val) = RREG32((u32)addr); \
+				(val) = RREG32((u32)(addr)); \
 			} \
 			break; \
 		} \
@@ -2919,7 +2955,7 @@ struct razwi_info {
  * struct undefined_opcode_info - info about last undefined opcode error
  * @timestamp: timestamp of the undefined opcode error
  * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ
- *                   entiers. In case all streams array entries are
+ *                   entries. In case all streams array entries are
  *                   filled with values, it means the execution was in Lower-CP.
  * @cq_addr: the address of the current handled command buffer
  * @cq_size: the size of the current handled command buffer
@@ -2946,12 +2982,12 @@ struct undefined_opcode_info {
 };
 
 /**
- * struct last_error_session_info - info about last session errors occurred.
- * @cs_timeout: CS timeout error last information.
- * @razwi: razwi last information.
+ * struct hl_error_info - holds information collected during an error.
+ * @cs_timeout: CS timeout error information.
+ * @razwi: razwi information.
  * @undef_opcode: undefined opcode information
  */
-struct last_error_session_info {
+struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
 	struct razwi_info		razwi;
 	struct undefined_opcode_info	undef_opcode;
@@ -2960,7 +2996,7 @@ struct last_error_session_info {
 /**
  * struct hl_reset_info - holds current device reset information.
  * @lock: lock to protect critical reset flows.
- * @compute_reset_cnt: number of compte resets since the driver was loaded.
+ * @compute_reset_cnt: number of compute resets since the driver was loaded.
  * @hard_reset_cnt: number of hard resets since the driver was loaded.
  * @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset,
  *                             here we hold the hard reset flags.
@@ -2971,7 +3007,7 @@ struct last_error_session_info {
  * @hard_reset_pending: is there a hard reset work pending.
  * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
  *                    triggered, and cleared after it is shared with preboot.
- * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
+ * @prev_reset_trigger: saves the previous trigger which caused a reset, overridden
  *                      with a new value on next reset
  * @reset_trigger_repeated: set if device reset is triggered more than once with
  *                          same cause.
@@ -3041,6 +3077,12 @@ struct hl_reset_info {
  * @asid_mutex: protects asid_bitmap.
  * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
  * @debug_lock: protects critical section of setting debug mode for device
+ * @mmu_lock: protects the MMU page tables and invalidation h/w. Although the
+ *            page tables are per context, the invalidation h/w is per MMU.
+ *            Therefore, we can't allow multiple contexts (we only have two,
+ *            user and kernel) to access the invalidation h/w at the same time.
+ *            In addition, any change to the PGT, modifying the MMU hash or
+ *            walking the PGT requires talking this lock.
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
@@ -3049,7 +3091,7 @@ struct hl_reset_info {
  * @hl_chip_info: ASIC's sensors information.
  * @device_status_description: device status description.
  * @hl_debugfs: device's debugfs manager.
- * @cb_pool: list of preallocated CBs.
+ * @cb_pool: list of pre allocated CBs.
  * @cb_pool_lock: protects the CB pool.
  * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
  * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
@@ -3070,7 +3112,7 @@ struct hl_reset_info {
  * @state_dump_specs: constants and dictionaries needed to dump system state.
  * @multi_cs_completion: array of multi-CS completion.
  * @clk_throttling: holds information about current/previous clock throttling events
- * @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @captured_err_info: holds information about errors.
  * @reset_info: holds current device reset information.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
  * @fw_major_version: major version of current loaded preboot.
@@ -3111,7 +3153,8 @@ struct hl_reset_info {
  * @edma_binning: contains mask of edma engines that is received from the f/w which
  *                   indicates which edma engines are binned-out
  * @id: device minor.
- * @id_control: minor of the control device
+ * @id_control: minor of the control device.
+ * @cdev_idx: char device index. Used for setting its name.
  * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
  *                    addresses.
  * @is_in_dram_scrub: true if dram scrub operation is on going.
@@ -3165,6 +3208,7 @@ struct hl_reset_info {
  *                         Used only for testing.
  * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies
  *             that the f/w is always alive. Used only for testing.
+ * @supports_ctx_switch: true if a ctx switch is required upon first submission.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -3204,6 +3248,7 @@ struct hl_device {
 	struct mutex			asid_mutex;
 	struct mutex			send_cpu_message_lock;
 	struct mutex			debug_lock;
+	struct mutex			mmu_lock;
 	struct asic_fixed_properties	asic_prop;
 	const struct hl_asic_funcs	*asic_funcs;
 	void				*asic_specific;
@@ -3242,7 +3287,7 @@ struct hl_device {
 	struct multi_cs_completion	multi_cs_completion[
 							MULTI_CS_MAX_USER_CTX];
 	struct hl_clk_throttle		clk_throttling;
-	struct last_error_session_info	last_error;
+	struct hl_error_info		captured_err_info;
 
 	struct hl_reset_info		reset_info;
 
@@ -3271,6 +3316,7 @@ struct hl_device {
 	u32				edma_binning;
 	u16				id;
 	u16				id_control;
+	u16				cdev_idx;
 	u16				cpu_pci_msb_addr;
 	u8				is_in_dram_scrub;
 	u8				disabled;
@@ -3300,6 +3346,7 @@ struct hl_device {
 	u8				compute_ctx_in_release;
 	u8				supports_mmu_prefetch;
 	u8				reset_upon_device_release;
+	u8				supports_ctx_switch;
 
 	/* Parameters for bring-up */
 	u64				nic_ports_mask;
@@ -3426,15 +3473,18 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 }
 
 uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
-void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
-					gfp_t flag);
-void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
-					dma_addr_t dma_handle);
-void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
-void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
-void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
-					dma_addr_t *dma_handle);
-void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr);
+void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
+					gfp_t flag, const char *caller);
+void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
+					dma_addr_t dma_handle, const char *caller);
+void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
+						dma_addr_t *dma_handle, const char *caller);
+void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
+						const char *caller);
+void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
+					dma_addr_t *dma_handle, const char *caller);
+void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
+					const char *caller);
 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
@@ -3513,6 +3563,7 @@ void hl_sysfs_fini(struct hl_device *hdev);
 
 int hl_hwmon_init(struct hl_device *hdev);
 void hl_hwmon_fini(struct hl_device *hdev);
+void hl_hwmon_release_resources(struct hl_device *hdev);
 
 int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
 			struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
@@ -3557,7 +3608,7 @@ void hl_hw_block_mem_init(struct hl_ctx *ctx);
 void hl_hw_block_mem_fini(struct hl_ctx *ctx);
 
 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
-		enum hl_va_range_type type, u32 size, u32 alignment);
+		enum hl_va_range_type type, u64 size, u32 alignment);
 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 		u64 start_addr, u64 size);
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
@@ -3674,6 +3725,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
 				struct cpucp_hbm_row_info *info);
 int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
 int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open);
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
 int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
@@ -3697,6 +3749,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va
 void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 long hl_fw_get_max_power(struct hl_device *hdev);
 void hl_fw_set_max_power(struct hl_device *hdev);
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+				u32 nonce);
 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
@@ -3743,6 +3797,7 @@ struct hl_mmap_mem_buf *
 hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
 		      struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
 		      void *args);
+__printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index f733ead605e7..112632afe7d5 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -14,6 +14,9 @@
 #include <linux/aer.h>
 #include <linux/module.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/habanalabs.h>
+
 #define HL_DRIVER_AUTHOR	"HabanaLabs Kernel Driver Team"
 
 #define HL_DRIVER_DESC		"Driver for HabanaLabs's AI Accelerators"
@@ -27,7 +30,10 @@ static struct class *hl_class;
 static DEFINE_IDR(hl_devs_idr);
 static DEFINE_MUTEX(hl_devs_idr_lock);
 
-static int timeout_locked = 30;
+#define HL_DEFAULT_TIMEOUT_LOCKED	30	/* 30 seconds */
+#define GAUDI_DEFAULT_TIMEOUT_LOCKED	600	/* 10 minutes */
+
+static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
 static int reset_on_lockup = 1;
 static int memory_scrub;
 static ulong boot_error_status_mask = ULONG_MAX;
@@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask,
 #define PCI_IDS_GAUDI_SEC		0x1010
 
 #define PCI_IDS_GAUDI2			0x1020
-#define PCI_IDS_GAUDI2_SEC		0x1030
 
 static const struct pci_device_id ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
-	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ids);
@@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device)
 	case PCI_IDS_GAUDI2:
 		asic_type = ASIC_GAUDI2;
 		break;
-	case PCI_IDS_GAUDI2_SEC:
-		asic_type = ASIC_GAUDI2_SEC;
-		break;
 	default:
 		asic_type = ASIC_INVALID;
 		break;
@@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
 {
 	switch (asic_type) {
 	case ASIC_GAUDI_SEC:
-	case ASIC_GAUDI2_SEC:
 		return true;
 	default:
 		return false;
@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	mutex_lock(&hdev->fpriv_list_lock);
 
 	if (!hl_device_operational(hdev, &status)) {
-		dev_err_ratelimited(hdev->dev,
+		dev_dbg_ratelimited(hdev->dev,
 			"Can't open %s because it is %s\n",
 			dev_name(hdev->dev), hdev->status[status]);
 
@@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	list_add(&hpriv->dev_node, &hdev->fpriv_list);
 	mutex_unlock(&hdev->fpriv_list_lock);
 
+	hdev->asic_funcs->send_device_activity(hdev, true);
+
 	hl_debugfs_add_file(hpriv);
 
-	atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
-	atomic_set(&hdev->last_error.razwi.write_enable, 1);
-	hdev->last_error.undef_opcode.write_enable = true;
+	atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
+	atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
+	hdev->captured_err_info.undef_opcode.write_enable = true;
 
 	hdev->open_counter++;
 	hdev->last_successful_open_jif = jiffies;
@@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	mutex_lock(&hdev->fpriv_ctrl_list_lock);
 
 	if (!hl_device_operational(hdev, NULL)) {
-		dev_err_ratelimited(hdev->dev_ctrl,
+		dev_dbg_ratelimited(hdev->dev_ctrl,
 			"Can't open %s because it is disabled or in reset\n",
 			dev_name(hdev->dev_ctrl));
 		rc = -EPERM;
@@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
 	hdev->boot_error_status_mask = boot_error_status_mask;
 }
 
-static void fixup_device_params_per_asic(struct hl_device *hdev)
+static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
 {
 	switch (hdev->asic_type) {
-	case ASIC_GOYA:
 	case ASIC_GAUDI:
 	case ASIC_GAUDI_SEC:
+		/* If user didn't request a different timeout than the default one, we have
+		 * a different default timeout for Gaudi
+		 */
+		if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
+			hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
+										MSEC_PER_SEC);
+
+		hdev->reset_upon_device_release = 0;
+		break;
+
+	case ASIC_GOYA:
 		hdev->reset_upon_device_release = 0;
 		break;
 
@@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev)
 	hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
 
 	if (tmp_timeout)
-		hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
+		hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
 	else
 		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
 
@@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev)
 	if (!hdev->cpu_queues_enable)
 		hdev->heartbeat = 0;
 
-	fixup_device_params_per_asic(hdev);
+	fixup_device_params_per_asic(hdev, tmp_timeout);
 
 	return 0;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 6a30bd98ab5e..43afe40966e5 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
@@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 
 	hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
 	hw_ip.server_type = prop->server_type;
+	hw_ip.security_enabled = prop->fw_security_enabled;
 
 	return copy_to_user(out, &hw_ip,
 		min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
@@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	info.seq = hdev->last_error.cs_timeout.seq;
-	info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
+	info.seq = hdev->captured_err_info.cs_timeout.seq;
+	info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
 
 	return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
 }
@@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
-	info.addr = hdev->last_error.razwi.addr;
-	info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
-	info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
-	info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
-	info.error_type = hdev->last_error.razwi.type;
+	info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
+	info.addr = hdev->captured_err_info.razwi.addr;
+	info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
+	info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
+	info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
+	info.error_type = hdev->captured_err_info.razwi.type;
 
 	return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
 }
@@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
-	info.engine_id = hdev->last_error.undef_opcode.engine_id;
-	info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
-	info.cq_size = hdev->last_error.undef_opcode.cq_size;
-	info.stream_id = hdev->last_error.undef_opcode.stream_id;
-	info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
-	memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
+	info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
+	info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
+	info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
+	info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
+	info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
+	info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
+	memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
 			sizeof(info.cb_addr_streams));
 
 	return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
@@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
 	return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
 }
 
+static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct cpucp_sec_attest_info *sec_attest_info;
+	struct hl_info_sec_attest *info;
+	u32 max_size = args->return_size;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
+	if (!sec_attest_info)
+		return -ENOMEM;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto free_sec_attest_info;
+	}
+
+	rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
+	if (rc)
+		goto free_info;
+
+	info->nonce = le32_to_cpu(sec_attest_info->nonce);
+	info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
+	info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
+	info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
+	info->pcr_num_reg = sec_attest_info->pcr_num_reg;
+	info->pcr_reg_len = sec_attest_info->pcr_reg_len;
+	info->quote_sig_len = sec_attest_info->quote_sig_len;
+	memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
+	memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
+	memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
+	memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
+	memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
+
+	rc = copy_to_user(out, info,
+				min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+	kfree(info);
+free_sec_attest_info:
+	kfree(sec_attest_info);
+
+	return rc;
+}
+
 static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	int rc;
@@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return 0;
 }
 
+static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	u32 status_buf_size = args->return_size;
+	struct hl_device *hdev = hpriv->hdev;
+	struct engines_data eng_data;
+	int rc;
+
+	if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
+		return -EINVAL;
+
+	eng_data.actual_size = 0;
+	eng_data.allocated_buf_size = status_buf_size;
+	eng_data.buf = vmalloc(status_buf_size);
+	if (!eng_data.buf)
+		return -ENOMEM;
+
+	hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
+
+	if (eng_data.actual_size > eng_data.allocated_buf_size) {
+		dev_err(hdev->dev,
+			"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
+			eng_data.actual_size, status_buf_size);
+		vfree(eng_data.buf);
+		return -ENOMEM;
+	}
+
+	args->user_buffer_actual_size = eng_data.actual_size;
+	rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
+				-EFAULT : 0;
+
+	vfree(eng_data.buf);
+
+	return rc;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_DRAM_PENDING_ROWS:
 		return dram_pending_rows_info(hpriv, args);
 
+	case HL_INFO_SECURED_ATTESTATION:
+		return sec_attest_info(hpriv, args);
+
 	case HL_INFO_REGISTER_EVENTFD:
 		return eventfd_register(hpriv, args);
 
 	case HL_INFO_UNREGISTER_EVENTFD:
 		return eventfd_unregister(hpriv, args);
 
+	case HL_INFO_ENGINE_STATUS:
+		return engine_status_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -EINVAL;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 3f15ab9d827f..d0087c0ec48c 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 
 	q->kernel_address = p;
 
-	q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
-					sizeof(*q->shadow_queue),
-					GFP_KERNEL);
+	q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
 	if (!q->shadow_queue) {
 		dev_err(hdev->dev,
 			"Failed to allocate shadow queue for H/W queue %d\n",
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 57f5d2c48330..55eb0203817f 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
 		curr_arr[sensors_by_type_next_index[type]++] = flags;
 	}
 
-	channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL);
+	channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *),
+				GFP_KERNEL);
 	if (!channels_info) {
 		rc = -ENOMEM;
 		goto channels_info_array_err;
@@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev)
 
 	hwmon_device_unregister(hdev->hwmon_dev);
 }
+
+void hl_hwmon_release_resources(struct hl_device *hdev)
+{
+	const struct hwmon_channel_info **channel_info_arr;
+	int i = 0;
+
+	if (!hdev->hl_chip_info->info)
+		return;
+
+	channel_info_arr = hdev->hl_chip_info->info;
+
+	while (channel_info_arr[i]) {
+		kfree(channel_info_arr[i]->config);
+		kfree(channel_info_arr[i]);
+		i++;
+	}
+
+	kfree(channel_info_arr);
+
+	hdev->hl_chip_info->info = NULL;
+}
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 61bc1bfe984a..ef28f3b37b93 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
 	prev = list_prev_entry(va_block, node);
 	if (&prev->node != va_list && prev->end + 1 == va_block->start) {
 		prev->end = va_block->end;
-		prev->size = prev->end - prev->start;
+		prev->size = prev->end - prev->start + 1;
 		list_del(&va_block->node);
 		kfree(va_block);
 		va_block = prev;
@@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
 	next = list_next_entry(va_block, node);
 	if (&next->node != va_list && va_block->end + 1 == next->start) {
 		next->start = va_block->start;
-		next->size = next->end - next->start;
+		next->size = next->end - next->start + 1;
 		list_del(&va_block->node);
 		kfree(va_block);
 	}
@@ -755,7 +755,7 @@ out:
  * - Return the start address of the virtual block.
  */
 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
-		enum hl_va_range_type type, u32 size, u32 alignment)
+		enum hl_va_range_type type, u64 size, u32 alignment)
 {
 	return get_va_block(hdev, ctx->va_range[type], size, 0,
 			max(alignment, ctx->va_range[type]->page_size),
@@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
 		goto va_block_err;
 	}
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 
 	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
 		dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
-		mutex_unlock(&ctx->mmu_lock);
+		mutex_unlock(&hdev->mmu_lock);
 		goto map_err;
 	}
 
 	rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
 				ctx->asid, ret_vaddr, phys_pg_pack->total_size);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 	if (rc)
 		goto map_err;
 
@@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	else
 		vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 
 	unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
 
@@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
 							phys_pg_pack->total_size);
 
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	/*
 	 * If the context is closing we don't need to check for the MMU cache
@@ -1418,18 +1418,23 @@ vm_type_err:
 	return rc;
 }
 
-static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
-			u32 *size)
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
 {
-	u32 block_id = 0;
+	u32 block_id;
 	int rc;
 
+	*handle = 0;
+	if (size)
+		*size = 0;
+
 	rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+	if (rc)
+		return rc;
 
 	*handle = block_id | HL_MMAP_TYPE_BLOCK;
 	*handle <<= PAGE_SHIFT;
 
-	return rc;
+	return 0;
 }
 
 static void hw_block_vm_close(struct vm_area_struct *vma)
@@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma)
 	struct hl_vm_hw_block_list_node *lnode =
 		(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
 	struct hl_ctx *ctx = lnode->ctx;
+	long new_mmap_size;
+
+	new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
+	if (new_mmap_size > 0) {
+		lnode->mapped_size = new_mmap_size;
+		return;
+	}
 
 	mutex_lock(&ctx->hw_block_list_lock);
 	list_del(&lnode->node);
@@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 	if (!lnode)
 		return -ENOMEM;
 
-	vma->vm_ops = &hw_block_vm_ops;
-	vma->vm_private_data = lnode;
-
-	hl_ctx_get(ctx);
-
 	rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
 	if (rc) {
-		hl_ctx_put(ctx);
 		kfree(lnode);
 		return rc;
 	}
 
+	hl_ctx_get(ctx);
+
 	lnode->ctx = ctx;
 	lnode->vaddr = vma->vm_start;
-	lnode->size = block_size;
+	lnode->block_size = block_size;
+	lnode->mapped_size = lnode->block_size;
 	lnode->id = block_id;
 
+	vma->vm_private_data = lnode;
+	vma->vm_ops = &hw_block_vm_ops;
+
 	mutex_lock(&ctx->hw_block_list_lock);
 	list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
 	mutex_unlock(&ctx->hw_block_list_lock);
@@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
 		return -EFAULT;
 	}
 
-	userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
-					GFP_KERNEL);
+	userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!userptr->pages)
 		return -ENOMEM;
 
@@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 		unmap_device_va(ctx, &args, true);
 	}
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 
 	/* invalidate the cache once after the unmapping loop */
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
 
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	INIT_LIST_HEAD(&free_list);
 
diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c
index 56df962d2f3c..1936d653699e 100644
--- a/drivers/misc/habanalabs/common/memory_mgr.c
+++ b/drivers/misc/habanalabs/common/memory_mgr.c
@@ -11,7 +11,7 @@
  * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
  *                        the buffer descriptor.
  *
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
  * @handle: requested buffer handle
  *
  * Find the buffer in the store and return a pointer to its descriptor.
@@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
  * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
  *                              given handle.
  *
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
  * @handle: requested buffer handle
  *
  * Decrease the reference to the buffer, and release it if it was the last one.
@@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
 /**
  * hl_mmap_mem_buf_alloc - allocate a new mappable buffer
  *
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
  * @behavior: behavior object describing this buffer polymorphic behavior
  * @gfp: gfp flags to use for the memory allocations
  * @args: additional args passed to behavior->alloc
@@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
 /**
  * hl_mem_mgr_mmap - map the given buffer to the user
  *
- * @mmg: unifed memory manager
+ * @mmg: unified memory manager
  * @vma: the vma object for which mmap was closed.
  * @args: additional args passed to behavior->mmap
  *
@@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
 /**
  * hl_mem_mgr_fini - release unified memory manager
  *
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
  *
  * Release the unified memory manager. Shall be called from an interrupt context.
  */
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 60740de47b34..cf8946266615 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -9,6 +9,8 @@
 
 #include "../habanalabs.h"
 
+#include <trace/events/habanalabs.h>
+
 /**
  * hl_mmu_get_funcs() - get MMU functions structure
  * @hdev: habanalabs device structure.
@@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev)
 	if (!hdev->mmu_enable)
 		return 0;
 
+	mutex_init(&hdev->mmu_lock);
+
 	if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
 		rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
 		if (rc)
@@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev)
 
 	if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
 		hdev->mmu_func[MMU_HR_PGT].fini(hdev);
+
+	mutex_destroy(&hdev->mmu_lock);
 }
 
 /**
@@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
 	if (!hdev->mmu_enable)
 		return 0;
 
-	mutex_init(&ctx->mmu_lock);
-
 	if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
 		rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
 		if (rc)
@@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 
 	if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
 		hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
-
-	mutex_destroy(&ctx->mmu_lock);
 }
 
 /*
@@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
 	if (flush_pte)
 		mmu_funcs->flush(ctx);
 
+	if (trace_habanalabs_mmu_unmap_enabled() && !rc)
+		trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
+
 	return rc;
 }
 
@@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
 	if (flush_pte)
 		mmu_funcs->flush(ctx);
 
+	trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
+
 	return 0;
 
 err:
@@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
 			dev_err(hdev->dev,
 				"Map failed for va 0x%llx to pa 0x%llx\n",
 				curr_va, curr_pa);
+			/* last mapping failed so don't try to unmap it - reduce off by page_size */
+			off -= page_size;
 			goto unmap;
 		}
 	}
@@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
 	mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	if (rc)
 		return rc;
@@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work)
 {
 	struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
 	struct hl_ctx *ctx = pfw->ctx;
+	struct hl_device *hdev = ctx->hdev;
 
-	if (!hl_device_operational(ctx->hdev, NULL))
+	if (!hl_device_operational(hdev, NULL))
 		goto put_ctx;
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 
-	ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid,
-								pfw->va, pfw->size);
+	hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
 
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 put_ctx:
 	/*
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 6c5271f01160..36e9814139d1 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -375,6 +375,14 @@ out:
 	return max_size;
 }
 
+static ssize_t security_enabled_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
+}
+
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
@@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status);
 static DEVICE_ATTR_RO(thermal_ver);
 static DEVICE_ATTR_RO(uboot_ver);
 static DEVICE_ATTR_RO(fw_os_ver);
+static DEVICE_ATTR_RO(security_enabled);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_thermal_ver.attr,
 	&dev_attr_uboot_ver.attr,
 	&dev_attr_fw_os_ver.attr,
+	&dev_attr_security_enabled.attr,
 	NULL,
 };
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index cb2988e2c7a8..92560414e843 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -899,12 +899,13 @@ static int gaudi_early_fini(struct hl_device *hdev)
  */
 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 {
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 	int rc;
 
-	if (hdev->asic_prop.fw_security_enabled) {
+	if ((hdev->fw_components & FW_TYPE_LINUX) &&
+			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
 		struct gaudi_device *gaudi = hdev->asic_specific;
 
 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
@@ -939,9 +940,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 			else
 				freq = pll_clk / (div_fctr + 1);
 		} else {
-			dev_warn(hdev->dev,
-				"Received invalid div select value: %d",
-				div_sel);
+			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
 			freq = 0;
 		}
 	}
@@ -985,9 +984,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
 
 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
-	dst_addr = (prop->sram_user_base_address &
-			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
-			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
+
+	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
+	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
+				round_up(prop->sram_user_base_address, SZ_8K));
 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
 
 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
@@ -1683,23 +1683,7 @@ disable_pci_access:
 
 static void gaudi_late_fini(struct hl_device *hdev)
 {
-	const struct hwmon_channel_info **channel_info_arr;
-	int i = 0;
-
-	if (!hdev->hl_chip_info->info)
-		return;
-
-	channel_info_arr = hdev->hl_chip_info->info;
-
-	while (channel_info_arr[i]) {
-		kfree(channel_info_arr[i]->config);
-		kfree(channel_info_arr[i]);
-		i++;
-	}
-
-	kfree(channel_info_arr);
-
-	hdev->hl_chip_info->info = NULL;
+	hl_hwmon_release_resources(hdev);
 }
 
 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
@@ -4723,7 +4707,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev)
 	addr = prop->sram_user_base_address;
 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
 
-	dev_dbg(hdev->dev, "Scrubing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
+	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
 			addr, addr + size, val);
 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
 	if (rc) {
@@ -6911,9 +6895,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea
 							stream, cq_ptr, size);
 
 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		hdev->last_error.undef_opcode.cq_addr = cq_ptr;
-		hdev->last_error.undef_opcode.cq_size = size;
-		hdev->last_error.undef_opcode.stream_id = stream;
+		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+		hdev->captured_err_info.undef_opcode.cq_size = size;
+		hdev->captured_err_info.undef_opcode.stream_id = stream;
 	}
 }
 
@@ -6979,7 +6963,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
 	}
 
 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
+		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
 
 		if (arr_idx == 0) {
@@ -7063,11 +7047,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
 		}
 		/* check for undefined opcode */
 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
-				hdev->last_error.undef_opcode.write_enable) {
-			memset(&hdev->last_error.undef_opcode, 0,
-						sizeof(hdev->last_error.undef_opcode));
+				hdev->captured_err_info.undef_opcode.write_enable) {
+			memset(&hdev->captured_err_info.undef_opcode, 0,
+						sizeof(hdev->captured_err_info.undef_opcode));
 
-			hdev->last_error.undef_opcode.write_enable = false;
+			hdev->captured_err_info.undef_opcode.write_enable = false;
 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
 		}
 
@@ -7233,12 +7217,6 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 
 	switch (event_type) {
 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
-		/* In TPC QM event, notify on TPC assertion. While there isn't
-		 * a specific event for assertion yet, the FW generates QM event.
-		 * The SW upper layer will inspect an internal mapped area to indicate
-		 * if the event is a tpc assertion or tpc QM.
-		 */
-		*event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
 		index = event_type - GAUDI_EVENT_TPC0_QM;
 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
@@ -7349,18 +7327,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
 
 		/* In case it's the first razwi, save its parameters*/
-		rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
+		rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
 		if (rc) {
-			hdev->last_error.razwi.timestamp = ktime_get();
-			hdev->last_error.razwi.addr = razwi_addr;
-			hdev->last_error.razwi.engine_id_1 = engine_id_1;
-			hdev->last_error.razwi.engine_id_2 = engine_id_2;
+			hdev->captured_err_info.razwi.timestamp = ktime_get();
+			hdev->captured_err_info.razwi.addr = razwi_addr;
+			hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
+			hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
 			/*
 			 * If first engine id holds non valid value the razwi initiator
 			 * does not have engine id
 			 */
-			hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
-			hdev->last_error.razwi.type = razwi_type;
+			hdev->captured_err_info.razwi.non_engine_initiator =
+									(engine_id_1 == U16_MAX);
+			hdev->captured_err_info.razwi.type = razwi_type;
 
 		}
 	}
@@ -7427,7 +7406,7 @@ static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
 		event_type, desc);
 }
 
-static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
+static int gaudi_compute_reset_late_init(struct hl_device *hdev)
 {
 	/* GAUDI doesn't support any reset except hard-reset */
 	return -EPERM;
@@ -7702,6 +7681,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
 		gaudi_print_irq_info(hdev, event_type, true);
 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
 		goto reset_device;
 
@@ -7711,6 +7691,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
 		gaudi_print_irq_info(hdev, event_type, false);
 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_HBM0_SPI_0:
@@ -7722,6 +7703,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_HBM0_SPI_1:
@@ -7733,6 +7715,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
 		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI_EVENT_TPC0_DEC:
@@ -7743,10 +7726,17 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_TPC5_DEC:
 	case GAUDI_EVENT_TPC6_DEC:
 	case GAUDI_EVENT_TPC7_DEC:
+		/* In TPC DEC event, notify on TPC assertion. While there isn't
+		 * a specific event for assertion yet, the FW generates TPC DEC event.
+		 * The SW upper layer will inspect an internal mapped area to indicate
+		 * if the event is a TPC Assertion or a "real" TPC DEC.
+		 */
+		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
 		gaudi_print_irq_info(hdev, event_type, true);
 		reset_required = gaudi_tpc_read_interrupts(hdev,
 					tpc_dec_event_to_tpc_id(event_type),
 					"AXI_SLV_DEC_Error");
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		if (reset_required) {
 			dev_err(hdev->dev, "reset required due to %s\n",
 				gaudi_irq_map_table[event_type].name);
@@ -7755,6 +7745,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 			goto reset_device;
 		} else {
 			hl_fw_unmask_irq(hdev, event_type);
+			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
 		}
 		break;
 
@@ -7770,6 +7761,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		reset_required = gaudi_tpc_read_interrupts(hdev,
 					tpc_krn_event_to_tpc_id(event_type),
 					"KRN_ERR");
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		if (reset_required) {
 			dev_err(hdev->dev, "reset required due to %s\n",
 				gaudi_irq_map_table[event_type].name);
@@ -7778,6 +7770,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 			goto reset_device;
 		} else {
 			hl_fw_unmask_irq(hdev, event_type);
+			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
 		}
 		break;
 
@@ -7806,9 +7799,25 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		gaudi_print_irq_info(hdev, event_type, true);
 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
 		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI_EVENT_PCIE_DEC:
+	case GAUDI_EVENT_CPU_AXI_SPLITTER:
+	case GAUDI_EVENT_PSOC_AXI_DEC:
+	case GAUDI_EVENT_PSOC_PRSTN_FALL:
+		gaudi_print_irq_info(hdev, event_type, true);
+		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		break;
+
+	case GAUDI_EVENT_MMU_PAGE_FAULT:
+	case GAUDI_EVENT_MMU_WR_PERM:
+		gaudi_print_irq_info(hdev, event_type, true);
+		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+		break;
+
 	case GAUDI_EVENT_MME0_WBC_RSP:
 	case GAUDI_EVENT_MME0_SBAB0_RSP:
 	case GAUDI_EVENT_MME1_WBC_RSP:
@@ -7817,11 +7826,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_MME2_SBAB0_RSP:
 	case GAUDI_EVENT_MME3_WBC_RSP:
 	case GAUDI_EVENT_MME3_SBAB0_RSP:
-	case GAUDI_EVENT_CPU_AXI_SPLITTER:
-	case GAUDI_EVENT_PSOC_AXI_DEC:
-	case GAUDI_EVENT_PSOC_PRSTN_FALL:
-	case GAUDI_EVENT_MMU_PAGE_FAULT:
-	case GAUDI_EVENT_MMU_WR_PERM:
 	case GAUDI_EVENT_RAZWI_OR_ADC:
 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
@@ -7841,10 +7845,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		gaudi_print_irq_info(hdev, event_type, true);
 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
 		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
 		break;
 
 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
 		gaudi_print_irq_info(hdev, event_type, true);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_TPC0_BMON_SPMU:
@@ -7858,11 +7864,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
 		gaudi_print_irq_info(hdev, event_type, false);
 		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
 		hl_fw_unmask_irq(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
@@ -7870,6 +7878,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		gaudi_print_sm_sei_info(hdev, event_type,
 					&eq_entry->sm_sei_data);
 		rc = hl_state_dump(hdev);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		if (rc)
 			dev_err(hdev->dev,
 				"Error during system state dump %d\n", rc);
@@ -7880,6 +7889,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		break;
 
 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		gaudi_print_clk_change_info(hdev, event_type);
 		hl_fw_unmask_irq(hdev, event_type);
 		break;
@@ -7889,20 +7899,24 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		dev_err(hdev->dev,
 			"Received high temp H/W interrupt %d (cause %d)\n",
 			event_type, cause);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI_EVENT_DEV_RESET_REQ:
 		gaudi_print_irq_info(hdev, event_type, false);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
 		gaudi_print_irq_info(hdev, event_type, false);
 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_FW_ALIVE_S:
 		gaudi_print_irq_info(hdev, event_type, false);
 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	default:
@@ -8066,8 +8080,8 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
-					u8 mask_len, struct seq_file *s)
+static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+		struct engines_data *e)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
@@ -8079,8 +8093,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	u64 offset;
 	int i, dma_id, port;
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
 			"---  -------  ------------  ----------  -------------\n");
 
@@ -8097,14 +8111,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 		if (mask && !is_eng_idle)
 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
-		if (s)
-			seq_printf(s, fmt, dma_id,
+		if (e)
+			hl_engine_data_sprintf(e, fmt, dma_id,
 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
 				qm_cgm_sts, dma_core_sts0);
 	}
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
 			"---  -------  ------------  ----------  ----------\n");
 
@@ -8119,14 +8133,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 		if (mask && !is_eng_idle)
 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
-		if (s)
-			seq_printf(s, fmt, i,
+		if (e)
+			hl_engine_data_sprintf(e, fmt, i,
 				is_eng_idle ? "Y" : "N",
 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
 	}
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
 			"---  -------  ------------  ----------  -----------\n");
 
@@ -8147,20 +8161,21 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 		if (mask && !is_eng_idle)
 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
-		if (s) {
+		if (e) {
 			if (!is_slave)
-				seq_printf(s, fmt, i,
+				hl_engine_data_sprintf(e, fmt, i,
 					is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
 			else
-				seq_printf(s, mme_slave_fmt, i,
+				hl_engine_data_sprintf(e, mme_slave_fmt, i,
 					is_eng_idle ? "Y" : "N", "-",
 					"-", mme_arch_sts);
 		}
 	}
 
-	if (s)
-		seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
+	if (e)
+		hl_engine_data_sprintf(e,
+				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
 				"---  -------  ------------  ----------\n");
 
 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
@@ -8174,8 +8189,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 			if (mask && !is_eng_idle)
 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
-			if (s)
-				seq_printf(s, nic_fmt, port,
+			if (e)
+				hl_engine_data_sprintf(e, nic_fmt, port,
 						is_eng_idle ? "Y" : "N",
 						qm_glbl_sts0, qm_cgm_sts);
 		}
@@ -8189,15 +8204,15 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 			if (mask && !is_eng_idle)
 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
-			if (s)
-				seq_printf(s, nic_fmt, port,
+			if (e)
+				hl_engine_data_sprintf(e, nic_fmt, port,
 						is_eng_idle ? "Y" : "N",
 						qm_glbl_sts0, qm_cgm_sts);
 		}
 	}
 
-	if (s)
-		seq_puts(s, "\n");
+	if (e)
+		hl_engine_data_sprintf(e, "\n");
 
 	return is_idle;
 }
@@ -8392,13 +8407,13 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 		goto destroy_internal_cb_pool;
 	}
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
 			hdev->internal_cb_pool_dma_addr,
 			HOST_SPACE_INTERNAL_CB_SZ);
 
 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	if (rc)
 		goto unreserve_internal_cb_pool;
@@ -8425,13 +8440,13 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
 		return;
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	gen_pool_destroy(hdev->internal_cb_pool);
 
@@ -9148,6 +9163,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group
 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
 }
 
+static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
+{
+	return 0;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -9192,11 +9212,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.send_heartbeat = gaudi_send_heartbeat,
 	.debug_coresight = gaudi_debug_coresight,
 	.is_device_idle = gaudi_is_device_idle,
-	.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
+	.compute_reset_late_init = gaudi_compute_reset_late_init,
 	.hw_queues_lock = gaudi_hw_queues_lock,
 	.hw_queues_unlock = gaudi_hw_queues_unlock,
-	.kdma_lock = NULL,
-	.kdma_unlock = NULL,
 	.get_pci_id = gaudi_get_pci_id,
 	.get_eeprom_data = gaudi_get_eeprom_data,
 	.get_monitor_dump = gaudi_get_monitor_dump,
@@ -9242,6 +9260,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
+	.send_device_activity = gaudi_send_device_activity,
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 98336a1a84b0..75c4bef7841c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -21,7 +21,7 @@
 
 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
 
-#define GAUDI2_RESET_TIMEOUT_MSEC		500		/* 500ms */
+#define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
@@ -117,6 +117,12 @@
 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
 #define MMU_RANGE_INV_ASID_SHIFT		2
 
+/* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
+ * a 2 entries FIFO, and hence it is not enabled for it.
+ */
+#define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
+#define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
+
 #define GAUDI2_MAX_STRING_LEN			64
 
 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
@@ -610,7 +616,7 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
 	"qman_axi_err",
 	"wap sei (wbc axi err)",
 	"arc sei",
-	"mme_cfg_unalign_addr",
+	"cfg access error",
 	"qm_sw_err",
 	"sbte_dbg_intr_0",
 	"sbte_dbg_intr_1",
@@ -1525,17 +1531,57 @@ static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES]
 	RTR_ID_X_Y(17, 11)
 };
 
+enum rtr_id {
+	DCORE0_RTR0,
+	DCORE0_RTR1,
+	DCORE0_RTR2,
+	DCORE0_RTR3,
+	DCORE0_RTR4,
+	DCORE0_RTR5,
+	DCORE0_RTR6,
+	DCORE0_RTR7,
+	DCORE1_RTR0,
+	DCORE1_RTR1,
+	DCORE1_RTR2,
+	DCORE1_RTR3,
+	DCORE1_RTR4,
+	DCORE1_RTR5,
+	DCORE1_RTR6,
+	DCORE1_RTR7,
+	DCORE2_RTR0,
+	DCORE2_RTR1,
+	DCORE2_RTR2,
+	DCORE2_RTR3,
+	DCORE2_RTR4,
+	DCORE2_RTR5,
+	DCORE2_RTR6,
+	DCORE2_RTR7,
+	DCORE3_RTR0,
+	DCORE3_RTR1,
+	DCORE3_RTR2,
+	DCORE3_RTR3,
+	DCORE3_RTR4,
+	DCORE3_RTR5,
+	DCORE3_RTR6,
+	DCORE3_RTR7,
+};
+
 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
-	1, 1, 2, 2, 3, 3, 14, 14, 13, 13, 12, 12, 19, 19, 18, 18, 17,
-	17, 28, 28, 29, 29, 30, 30, 0
+	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
+	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
+	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
+	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
+	DCORE0_RTR0
 };
 
 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
-	0, 0, 15, 15, 16, 16, 31, 31, 0, 0
+	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
+	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
 };
 
 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
-	15, 15, 15, 15, 15, 16, 16, 16, 16, 31, 31, 31
+	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
+	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
 };
 
 struct sft_info {
@@ -1548,11 +1594,11 @@ static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE
 };
 
 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
-	0, 0
+	DCORE0_RTR0, DCORE0_RTR0
 };
 
 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
-	16, 31
+	DCORE2_RTR0, DCORE3_RTR7
 };
 
 struct mme_initiators_rtr_id {
@@ -1663,7 +1709,7 @@ struct gaudi2_cache_invld_params {
 };
 
 struct gaudi2_tpc_idle_data {
-	struct seq_file *s;
+	struct engines_data *e;
 	unsigned long *mask;
 	bool *is_idle;
 	const char *tpc_fmt;
@@ -1706,6 +1752,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
 	int dcore, inst, tpc_seq;
 	u32 offset;
 
+	/* init the return code */
+	ctx->rc = 0;
+
 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
@@ -1715,7 +1764,12 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
 
 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
 
-			ctx->fn(hdev, dcore, inst, offset, ctx->data);
+			ctx->fn(hdev, dcore, inst, offset, ctx);
+			if (ctx->rc) {
+				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
+							dcore, inst);
+				return;
+			}
 		}
 	}
 
@@ -1724,7 +1778,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
 
 	/* special check for PCI TPC (DCORE0_TPC6) */
 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
-	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx->data);
+	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
+	if (ctx->rc)
+		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
 }
 
 static bool gaudi2_host_phys_addr_valid(u64 addr)
@@ -1973,6 +2029,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
 	}
 
+	prop->num_engine_cores = CPU_ID_MAX;
 	prop->cfg_size = CFG_SIZE;
 	prop->max_asid = MAX_ASID;
 	prop->num_of_events = GAUDI2_EVENT_SIZE;
@@ -2005,9 +2062,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 
 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
 
-	prop->cb_va_start_addr = VA_HOST_SPACE_USER_MAPPED_CB_START;
-	prop->cb_va_end_addr = VA_HOST_SPACE_USER_MAPPED_CB_END;
-
 	prop->max_dec = NUMBER_OF_DEC;
 
 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
@@ -2477,7 +2531,6 @@ static int gaudi2_early_init(struct hl_device *hdev)
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct pci_dev *pdev = hdev->pdev;
 	resource_size_t pci_bar_size;
-	u32 fw_boot_status;
 	int rc;
 
 	rc = gaudi2_set_fixed_properties(hdev);
@@ -2505,22 +2558,14 @@ static int gaudi2_early_init(struct hl_device *hdev)
 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
 
-	/* If FW security is enabled at this point it means no access to ELBI */
-	if (hdev->asic_prop.fw_security_enabled) {
-		hdev->asic_prop.iatu_done_by_fw = true;
-		goto pci_init;
-	}
-
-	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, &fw_boot_status);
-	if (rc)
-		goto free_queue_props;
-
-	/* Check whether FW is configuring iATU */
-	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
-			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
+	/*
+	 * Only in pldm driver config iATU
+	 */
+	if (hdev->pldm)
+		hdev->asic_prop.iatu_done_by_fw = false;
+	else
 		hdev->asic_prop.iatu_done_by_fw = true;
 
-pci_init:
 	rc = hl_pci_init(hdev);
 	if (rc)
 		goto free_queue_props;
@@ -2676,6 +2721,8 @@ static int gaudi2_late_init(struct hl_device *hdev)
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	int rc;
 
+	hdev->asic_prop.supports_advanced_cpucp_rc = true;
+
 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
 					gaudi2->virt_msix_db_dma_addr);
 	if (rc) {
@@ -2703,23 +2750,7 @@ disable_pci_access:
 
 static void gaudi2_late_fini(struct hl_device *hdev)
 {
-	const struct hwmon_channel_info **channel_info_arr;
-	int i = 0;
-
-	if (!hdev->hl_chip_info->info)
-		return;
-
-	channel_info_arr = hdev->hl_chip_info->info;
-
-	while (channel_info_arr[i]) {
-		kfree(channel_info_arr[i]->config);
-		kfree(channel_info_arr[i]);
-		i++;
-	}
-
-	kfree(channel_info_arr);
-
-	hdev->hl_chip_info->info = NULL;
+	hl_hwmon_release_resources(hdev);
 }
 
 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
@@ -2994,7 +3025,6 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 	}
 
 	spin_lock_init(&gaudi2->hw_queues_lock);
-	spin_lock_init(&gaudi2->kdma_lock);
 
 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
 							&gaudi2->scratchpad_bus_address,
@@ -3551,7 +3581,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 	rc = gaudi2_dec_enable_msix(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
-		goto free_completion_irq;
+		goto free_event_irq;
 	}
 
 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
@@ -3582,6 +3612,10 @@ free_user_irq:
 
 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
 
+free_event_irq:
+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
+	free_irq(irq, cq);
+
 free_completion_irq:
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
 	free_irq(irq, cq);
@@ -3745,14 +3779,16 @@ static void gaudi2_stop_dec(struct hl_device *hdev)
 	gaudi2_stop_pcie_dec(hdev);
 }
 
-static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id)
+static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
 {
 	u32 reg_base, reg_val;
 
 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
+	if (run_mode == HL_ENGINE_CORE_RUN)
+		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
+	else
+		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
 
-	/* Halt ARC */
-	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
 }
 
@@ -3762,8 +3798,35 @@ static void gaudi2_halt_arcs(struct hl_device *hdev)
 
 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
 		if (gaudi2_is_arc_enabled(hdev, arc_id))
-			gaudi2_halt_arc(hdev, arc_id);
+			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
+	}
+}
+
+static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
+{
+	int rc;
+	u32 reg_base, val, ack_mask, timeout_usec = 100000;
+
+	if (hdev->pldm)
+		timeout_usec *= 100;
+
+	reg_base = gaudi2_arc_blocks_bases[cpu_id];
+	if (run_mode == HL_ENGINE_CORE_RUN)
+		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
+	else
+		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
+
+	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
+				val, ((val & ack_mask) == ack_mask),
+				1000, timeout_usec);
+
+	if (!rc) {
+		/* Clear */
+		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
+		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
 	}
+
+	return rc;
 }
 
 static void gaudi2_reset_arcs(struct hl_device *hdev)
@@ -3790,8 +3853,39 @@ static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
 
 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
 
-	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN)
+	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
+		if (!(hdev->nic_ports_mask & BIT(i)))
+			continue;
+
 		gaudi2_qman_manual_flush_common(hdev, queue_id);
+	}
+}
+
+static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
+					u32 num_cores, u32 core_command)
+{
+	int i, rc;
+
+
+	for (i = 0 ; i < num_cores ; i++) {
+		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
+			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
+	}
+
+	for (i = 0 ; i < num_cores ; i++) {
+		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
+			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
+
+			if (rc) {
+				dev_err(hdev->dev, "failed to %s arc: %d\n",
+					(core_command == HL_ENGINE_CORE_HALT) ?
+					"HALT" : "RUN", core_ids[i]);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
 }
 
 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
@@ -4124,11 +4218,15 @@ static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
 
 	/* Enable the QMAN channel.
-	 * PDMA1 QMAN configuration is different, as we do not allow user to
-	 * access CP2/3, it is reserved for the ARC usage.
+	 * PDMA QMAN configuration is different, as we do not allow user to
+	 * access some of the CPs.
+	 * PDMA0: CP2/3 are reserved for the ARC usage.
+	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
 	 */
 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
+	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
+		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
 	else
 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
 }
@@ -4501,10 +4599,10 @@ struct gaudi2_tpc_init_cfg_data {
 };
 
 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
-							u32 offset, void *data)
+					u32 offset, struct iterate_module_ctx *ctx)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
-	struct gaudi2_tpc_init_cfg_data *cfg_data = data;
+	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
 	u32 queue_id_base;
 	u8 seq;
 
@@ -4956,8 +5054,7 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
 	return 0;
 }
 
-static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base,
-					u32 stlb_base)
+static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
 {
 	u32 status, timeout_usec;
 	int rc;
@@ -4985,7 +5082,6 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base,
 		return rc;
 
 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
-	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, 0xF);
 
 	rc = hl_poll_timeout(
 		hdev,
@@ -5042,6 +5138,8 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
 	}
 
+	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
+
 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
 	if (rc)
 		return rc;
@@ -5092,6 +5190,8 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
 
+	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
+
 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
 	if (rc)
 		return rc;
@@ -5339,7 +5439,10 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms
 
 	if (!driver_performs_reset) {
 		/* set SP to indicate reset request sent to FW */
-		WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
+		if (dyn_regs->cpu_rst_status)
+			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
+		else
+			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
 
 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
@@ -5527,10 +5630,11 @@ static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
 	u64 hw_test_cap_bit = 0;
 
 	switch (hw_queue_id) {
-	case GAUDI2_QUEUE_ID_PDMA_0_0 ... GAUDI2_QUEUE_ID_PDMA_1_1:
+	case GAUDI2_QUEUE_ID_PDMA_0_0:
+	case GAUDI2_QUEUE_ID_PDMA_0_1:
+	case GAUDI2_QUEUE_ID_PDMA_1_0:
 		hw_cap_mask = HW_CAP_PDMA_MASK;
 		break;
-
 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
@@ -6129,7 +6233,7 @@ done:
 	return ret_val;
 }
 
-static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev)
+static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	size_t irq_arr_size;
@@ -6147,9 +6251,9 @@ static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev)
 }
 
 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
-					void *data)
+					struct iterate_module_ctx *ctx)
 {
-	struct gaudi2_tpc_idle_data *idle_data = (struct gaudi2_tpc_idle_data *)data;
+	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
 	bool is_eng_idle;
 	int engine_idx;
@@ -6172,14 +6276,15 @@ static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int ins
 	if (idle_data->mask && !is_eng_idle)
 		set_bit(engine_idx, idle_data->mask);
 
-	if (idle_data->s)
-		seq_printf(idle_data->s, idle_data->tpc_fmt, dcore, inst,
+	if (idle_data->e)
+		hl_engine_data_sprintf(idle_data->e,
+					idle_data->tpc_fmt, dcore, inst,
 					is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
 }
 
-static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
-					u8 mask_len, struct seq_file *s)
+static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+					struct engines_data *e)
 {
 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
@@ -6197,7 +6302,7 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 	struct gaudi2_tpc_idle_data tpc_idle_data = {
 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
-		.s = s,
+		.e = e,
 		.mask = mask,
 		.is_idle = &is_idle,
 	};
@@ -6209,8 +6314,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	int engine_idx, i, j;
 
 	/* EDMA, Two engines per Dcore */
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
 			"----  ----  -------  ------------  ----------------------\n");
 
@@ -6239,19 +6344,19 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 			if (mask && !is_eng_idle)
 				set_bit(engine_idx, mask);
 
-			if (s)
-				seq_printf(s, edma_fmt, i, j,
-						is_eng_idle ? "Y" : "N",
-						qm_glbl_sts0,
-						dma_core_idle_ind_mask);
+			if (e)
+				hl_engine_data_sprintf(e, edma_fmt, i, j,
+							is_eng_idle ? "Y" : "N",
+							qm_glbl_sts0,
+							dma_core_idle_ind_mask);
 		}
 	}
 
 	/* PDMA, Two engines in Full chip */
-	if (s)
-		seq_puts(s,
-			"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
-			"----  -------  ------------  ----------------------\n");
+	if (e)
+		hl_engine_data_sprintf(e,
+					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
+					"----  -------  ------------  ----------------------\n");
 
 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
@@ -6269,16 +6374,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 		if (mask && !is_eng_idle)
 			set_bit(engine_idx, mask);
 
-		if (s)
-			seq_printf(s, pdma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
-					dma_core_idle_ind_mask);
+		if (e)
+			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
+						qm_glbl_sts0, dma_core_idle_ind_mask);
 	}
 
 	/* NIC, twelve macros in Full chip */
-	if (s && hdev->nic_ports_mask)
-		seq_puts(s,
-			"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
-			"---  -------  ------------  ----------\n");
+	if (e && hdev->nic_ports_mask)
+		hl_engine_data_sprintf(e,
+					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
+					"---  -------  ------------  ----------\n");
 
 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
 		if (!(i & 1))
@@ -6302,15 +6407,15 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 		if (mask && !is_eng_idle)
 			set_bit(engine_idx, mask);
 
-		if (s)
-			seq_printf(s, nic_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
-					qm_cgm_sts);
+		if (e)
+			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
+						qm_glbl_sts0, qm_cgm_sts);
 	}
 
-	if (s)
-		seq_puts(s,
-			"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
-			"---  ----  -------  ------------  ---------------\n");
+	if (e)
+		hl_engine_data_sprintf(e,
+					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
+					"---  ----  -------  ------------  ---------------\n");
 	/* MME, one per Dcore */
 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
@@ -6327,8 +6432,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
 		is_idle &= is_eng_idle;
 
-		if (s)
-			seq_printf(s, mme_fmt, i, "N",
+		if (e)
+			hl_engine_data_sprintf(e, mme_fmt, i, "N",
 				is_eng_idle ? "Y" : "N",
 				qm_glbl_sts0,
 				mme_arch_sts);
@@ -6340,16 +6445,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	/*
 	 * TPC
 	 */
-	if (s && prop->tpc_enabled_mask)
-		seq_puts(s,
+	if (e && prop->tpc_enabled_mask)
+		hl_engine_data_sprintf(e,
 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
 			"----  ---  --------  ------------  ----------  ----------------------\n");
 
 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
 
 	/* Decoders, two each Dcore and two shared PCIe decoders */
-	if (s && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
-		seq_puts(s,
+	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
+		hl_engine_data_sprintf(e,
 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
 			"----  ---  -------  ---------------\n");
 
@@ -6370,13 +6475,14 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 			if (mask && !is_eng_idle)
 				set_bit(engine_idx, mask);
 
-			if (s)
-				seq_printf(s, dec_fmt, i, j, is_eng_idle ? "Y" : "N", dec_swreg15);
+			if (e)
+				hl_engine_data_sprintf(e, dec_fmt, i, j,
+							is_eng_idle ? "Y" : "N", dec_swreg15);
 		}
 	}
 
-	if (s && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
-		seq_puts(s,
+	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
+		hl_engine_data_sprintf(e,
 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
 			"--------  -------  ---------------\n");
 
@@ -6395,12 +6501,13 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 		if (mask && !is_eng_idle)
 			set_bit(engine_idx, mask);
 
-		if (s)
-			seq_printf(s, pcie_dec_fmt, i, is_eng_idle ? "Y" : "N", dec_swreg15);
+		if (e)
+			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
+						is_eng_idle ? "Y" : "N", dec_swreg15);
 	}
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
 			"----  ----  -------  ------------  ----------  -------------\n");
 
@@ -6419,8 +6526,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 		if (mask && !is_eng_idle)
 			set_bit(engine_idx, mask);
 
-		if (s)
-			seq_printf(s, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
+		if (e)
+			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, qm_cgm_sts, "-");
 	}
 
@@ -6443,22 +6550,6 @@ static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
 	spin_unlock(&gaudi2->hw_queues_lock);
 }
 
-static void gaudi2_kdma_lock(struct hl_device *hdev, int dcore_id)
-	__acquires(&gaudi2->kdma_lock)
-{
-	struct gaudi2_device *gaudi2 = hdev->asic_specific;
-
-	spin_lock(&gaudi2->kdma_lock);
-}
-
-static void gaudi2_kdma_unlock(struct hl_device *hdev, int dcore_id)
-	__releases(&gaudi2->kdma_lock)
-{
-	struct gaudi2_device *gaudi2 = hdev->asic_specific;
-
-	spin_unlock(&gaudi2->kdma_lock);
-}
-
 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
 {
 	return hdev->pdev->device;
@@ -6725,9 +6816,9 @@ static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
 }
 
 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
-					void *data)
+					struct iterate_module_ctx *ctx)
 {
-	struct gaudi2_tpc_mmu_data *mmu_data = (struct gaudi2_tpc_mmu_data *)data;
+	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
 
 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
@@ -7020,10 +7111,6 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
 		}
-
-		dev_err_ratelimited(hdev->dev,
-			"%s-RAZWI SHARED RR HBW WR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n",
-			name, razwi_hi, razwi_lo, razwi_xy);
 	} else {
 		if (read_razwi_regs) {
 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
@@ -7034,11 +7121,11 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
 		}
-
-		dev_err_ratelimited(hdev->dev,
-			"%s-RAZWI SHARED RR HBW AR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n",
-			name, razwi_hi, razwi_lo, razwi_xy);
 	}
+
+	dev_err_ratelimited(hdev->dev,
+		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
+		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
 }
 
 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
@@ -7296,7 +7383,79 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
 }
 
-static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev,
+static const char *gaudi2_get_initiators_name(u32 rtr_id)
+{
+	switch (rtr_id) {
+	case DCORE0_RTR0:
+		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
+	case DCORE0_RTR1:
+		return "TPC0/1";
+	case DCORE0_RTR2:
+		return "TPC2/3";
+	case DCORE0_RTR3:
+		return "TPC4/5";
+	case DCORE0_RTR4:
+		return "MME0_SBTE0/1";
+	case DCORE0_RTR5:
+		return "MME0_WAP0/SBTE2";
+	case DCORE0_RTR6:
+		return "MME0_CTRL_WR/SBTE3";
+	case DCORE0_RTR7:
+		return "MME0_WAP1/CTRL_RD/SBTE4";
+	case DCORE1_RTR0:
+		return "MME1_WAP1/CTRL_RD/SBTE4";
+	case DCORE1_RTR1:
+		return "MME1_CTRL_WR/SBTE3";
+	case DCORE1_RTR2:
+		return "MME1_WAP0/SBTE2";
+	case DCORE1_RTR3:
+		return "MME1_SBTE0/1";
+	case DCORE1_RTR4:
+		return "TPC10/11";
+	case DCORE1_RTR5:
+		return "TPC8/9";
+	case DCORE1_RTR6:
+		return "TPC6/7";
+	case DCORE1_RTR7:
+		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
+	case DCORE2_RTR0:
+		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
+	case DCORE2_RTR1:
+		return "TPC16/17";
+	case DCORE2_RTR2:
+		return "TPC14/15";
+	case DCORE2_RTR3:
+		return "TPC12/13";
+	case DCORE2_RTR4:
+		return "MME2_SBTE0/1";
+	case DCORE2_RTR5:
+		return "MME2_WAP0/SBTE2";
+	case DCORE2_RTR6:
+		return "MME2_CTRL_WR/SBTE3";
+	case DCORE2_RTR7:
+		return "MME2_WAP1/CTRL_RD/SBTE4";
+	case DCORE3_RTR0:
+		return "MME3_WAP1/CTRL_RD/SBTE4";
+	case DCORE3_RTR1:
+		return "MME3_CTRL_WR/SBTE3";
+	case DCORE3_RTR2:
+		return "MME3_WAP0/SBTE2";
+	case DCORE3_RTR3:
+		return "MME3_SBTE0/1";
+	case DCORE3_RTR4:
+		return "TPC18/19";
+	case DCORE3_RTR5:
+		return "TPC20/21";
+	case DCORE3_RTR6:
+		return "TPC22/23";
+	case DCORE3_RTR7:
+		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
+	default:
+	return "N/A";
+	}
+}
+
+static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
 							u64 rtr_ctrl_base_addr, bool is_write)
 {
 	u32 razwi_hi, razwi_lo;
@@ -7305,50 +7464,47 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev,
 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
 
-		dev_err_ratelimited(hdev->dev,
-			"RAZWI PSOC unmapped HBW WR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n",
-			rtr_ctrl_base_addr, razwi_hi, razwi_lo);
-
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
 	} else {
 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
-
 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
 
-		dev_err_ratelimited(hdev->dev,
-			"RAZWI PSOC unmapped HBW AR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n",
-			rtr_ctrl_base_addr, razwi_hi, razwi_lo);
-
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
 	}
+
+	dev_err_ratelimited(hdev->dev,
+		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
+		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
+
+	dev_err_ratelimited(hdev->dev,
+		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
 }
 
-static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev,
-					u64 rtr_ctrl_base_addr, bool is_write)
+static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
+							u64 rtr_ctrl_base_addr, bool is_write)
 {
 	u32 razwi_addr;
 
 	if (is_write) {
 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
 
-		dev_err_ratelimited(hdev->dev,
-			"RAZWI PSOC unmapped LBW WR error, ctr_base 0x%llx, captured address 0x%x\n",
-			rtr_ctrl_base_addr, razwi_addr);
-
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
 	} else {
 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
 
-		dev_err_ratelimited(hdev->dev,
-			"RAZWI PSOC unmapped LBW AR error, ctr_base 0x%llx, captured address 0x%x\n",
-			rtr_ctrl_base_addr, razwi_addr);
-
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
 	}
+
+	dev_err_ratelimited(hdev->dev,
+		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
+		is_write ? "WR" : "RD", rtr_id, razwi_addr);
+
+	dev_err_ratelimited(hdev->dev,
+		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
 }
 
 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
@@ -7366,21 +7522,16 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
 	}
 
 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
-
-	xy = (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK)
-		>> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_SHIFT;
+	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
 
 	dev_err_ratelimited(hdev->dev,
-		"PSOC RAZWI interrupt: Mask %d, WAS_AR %d, WAS_AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
-		(razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK)
-			>> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_SHIFT,
-		(razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK)
-			>> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_SHIFT,
-		(razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK)
-			>> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_SHIFT, xy,
-		(razwi_mask_info &
-			PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK)
-			>> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_SHIFT);
+		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
+		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
+		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
+		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
+		xy,
+		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
+
 	if (xy == 0) {
 		dev_err_ratelimited(hdev->dev,
 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
@@ -7410,16 +7561,20 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
 
 	if (hbw_aw_set)
-		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, true);
+		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
+						rtr_ctrl_base_addr, true);
 
 	if (hbw_ar_set)
-		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, false);
+		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
+						rtr_ctrl_base_addr, false);
 
 	if (lbw_aw_set)
-		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, true);
+		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
+						rtr_ctrl_base_addr, true);
 
 	if (lbw_ar_set)
-		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, false);
+		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
+						rtr_ctrl_base_addr, false);
 
 clear:
 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
@@ -7811,14 +7966,58 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_
 						gaudi2_dma_core_interrupts_cause[i]);
 }
 
+static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
+{
+	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
+
+	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
+	if (RREG32(razwi_happened_addr)) {
+		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
+							NULL);
+		WREG32(razwi_happened_addr, 0x1);
+	}
+
+	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
+	if (RREG32(razwi_happened_addr)) {
+		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
+							NULL);
+		WREG32(razwi_happened_addr, 0x1);
+	}
+
+	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
+	if (RREG32(razwi_happened_addr)) {
+		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
+							NULL);
+		WREG32(razwi_happened_addr, 0x1);
+	}
+
+	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
+	if (RREG32(razwi_happened_addr)) {
+		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
+							NULL);
+		WREG32(razwi_happened_addr, 0x1);
+	}
+}
+
 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data)
 {
 	int i;
 
-	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE; i++)
-		if (intr_cause_data & BIT_ULL(i))
-			dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
-						gaudi2_pcie_addr_dec_error_cause[i]);
+	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
+		if (!(intr_cause_data & BIT_ULL(i)))
+			continue;
+
+		dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
+					gaudi2_pcie_addr_dec_error_cause[i]);
+
+		switch (intr_cause_data & BIT_ULL(i)) {
+		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
+			break;
+		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
+			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev);
+			break;
+		}
+	}
 }
 
 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
@@ -8158,10 +8357,17 @@ static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
 		return true;
 	}
 
-	dev_err_ratelimited(hdev->dev,
-		"System Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Critical(%u). Error cause: %s\n",
-		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
-		sei_data->hdr.is_critical, hbm_mc_sei_cause[cause_idx]);
+	if (sei_data->hdr.is_critical)
+		dev_err(hdev->dev,
+			"System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
+			hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
+			hbm_mc_sei_cause[cause_idx]);
+
+	else
+		dev_err_ratelimited(hdev->dev,
+			"System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
+			hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
+			hbm_mc_sei_cause[cause_idx]);
 
 	/* Print error-specific info */
 	switch (cause_idx) {
@@ -8371,6 +8577,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	bool reset_required = false, skip_reset = false;
 	int index, sbte_index;
+	u64 event_mask = 0;
 	u16 event_type;
 
 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
@@ -8392,6 +8599,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		fallthrough;
 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
 		break;
 
@@ -8401,21 +8609,25 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		fallthrough;
 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
 		gaudi2_handle_qman_err(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		gaudi2_handle_arc_farm_sei_err(hdev);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
 		gaudi2_handle_cpu_sei_err(hdev);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
@@ -8423,6 +8635,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
 		gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause);
 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
@@ -8430,11 +8643,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
 						&eq_entry->razwi_with_intr_cause);
 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
 		gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
@@ -8465,6 +8680,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
 		gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_DEC0_SPI:
@@ -8480,6 +8696,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
 		gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
@@ -8492,6 +8709,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		gaudi2_handle_mme_err(hdev, index,
 				"CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info);
 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
@@ -8502,6 +8720,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
 		gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
@@ -8512,22 +8731,27 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
 		gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
 	case GAUDI2_EVENT_KDMA0_CORE:
 		gaudi2_handle_kdma_core_event(hdev,
 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
 		gaudi2_handle_dma_core_event(hdev,
 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
 		gaudi2_print_pcie_addr_dec_info(hdev,
 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
@@ -8536,25 +8760,30 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
 		gaudi2_handle_mmu_spi_sei_err(hdev, event_type);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
 		gaudi2_handle_hif_fatal(hdev, event_type,
 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PMMU_FATAL_0:
 		gaudi2_handle_pif_fatal(hdev,
 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
 		gaudi2_ack_psoc_razwi_event_handler(hdev);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 			reset_required = true;
@@ -8563,25 +8792,31 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 
 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
 		gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
 		gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
 		gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
 		gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_AXI_ECC:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
@@ -8595,17 +8830,24 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 					GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
 		gaudi2_handle_mme_sbte_err(hdev, index, sbte_index,
 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		break;
 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
@@ -8657,6 +8899,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
@@ -8664,43 +8907,53 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
 		gaudi2_print_clk_change_info(hdev, event_type);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
 		gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		/* Do nothing- FW will handle it */
 		break;
 
 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
 		gaudi2_handle_pcie_p2p_msix(hdev);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
 		skip_reset = !gaudi2_handle_sm_err(hdev, index);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
 						le64_to_cpu(eq_entry->data[0]));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
 						le64_to_cpu(eq_entry->data[0]));
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
 		gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err);
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI2_EVENT_ARC_DCCM_FULL:
 		hl_arc_event_handle(hdev, &eq_entry->arc_data);
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	default:
@@ -8716,15 +8969,22 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	if (!gaudi2_irq_map_table[event_type].msg)
 		hl_fw_unmask_irq(hdev, event_type);
 
+	if (event_mask)
+		hl_notifier_event_send_all(hdev, event_mask);
+
 	return;
 
 reset_device:
 	if (hdev->hard_reset_on_fw_events) {
 		hl_device_reset(hdev, reset_flags);
+		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
 	} else {
 		if (!gaudi2_irq_map_table[event_type].msg)
 			hl_fw_unmask_irq(hdev, event_type);
 	}
+
+	if (event_mask)
+		hl_notifier_event_send_all(hdev, event_mask);
 }
 
 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
@@ -9090,19 +9350,17 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
 	}
 
 	/* Create mapping on asic side */
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
 	hl_mmu_invalidate_cache_range(hdev, false,
 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
 				      ctx->asid, reserved_va_base, SZ_2M);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
 		goto unreserve_va;
 	}
 
-	hdev->asic_funcs->kdma_lock(hdev, 0);
-
 	/* Enable MMU on KDMA */
 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
 
@@ -9130,13 +9388,11 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
 
 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
 
-	hdev->asic_funcs->kdma_unlock(hdev, 0);
-
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
 				      ctx->asid, reserved_va_base, SZ_2M);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 unreserve_va:
 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
 free_data_buffer:
@@ -9189,11 +9445,11 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c
 		goto destroy_internal_cb_pool;
 	}
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
 					HOST_SPACE_INTERNAL_CB_SZ);
 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	if (rc)
 		goto unreserve_internal_cb_pool;
@@ -9218,11 +9474,11 @@ static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *
 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
 		return;
 
-	mutex_lock(&ctx->mmu_lock);
+	mutex_lock(&hdev->mmu_lock);
 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	gen_pool_destroy(hdev->internal_cb_pool);
 
@@ -9336,7 +9592,7 @@ static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 
 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
 {
-	struct hl_cb *cb = (struct hl_cb *) data;
+	struct hl_cb *cb = data;
 	struct packet_msg_short *pkt;
 	u32 value, ctl, pkt_size = sizeof(*pkt);
 
@@ -9429,7 +9685,7 @@ static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
 
 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
 {
-	struct hl_cb *cb = (struct hl_cb *) prop->data;
+	struct hl_cb *cb = prop->data;
 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
 
 	u64 monitor_base, fence_addr = 0;
@@ -9481,7 +9737,7 @@ static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_propert
 
 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
 {
-	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
+	struct hl_hw_sob *hw_sob = data;
 
 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
 
@@ -9724,7 +9980,7 @@ static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base
 
 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
 {
-	bool is_pmmu = (mmu_id == HW_CAP_PMMU ? true : false);
+	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	u32 mmu_base;
 
@@ -9881,6 +10137,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
 	return -EOPNOTSUPP;
 }
 
+int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
+{
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+
+	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
+		return 0;
+
+	/* TODO: add check for FW version using minor ver once it's known */
+	return hl_fw_send_device_activity(hdev, open);
+}
+
 static const struct hl_asic_funcs gaudi2_funcs = {
 	.early_init = gaudi2_early_init,
 	.early_fini = gaudi2_early_fini,
@@ -9927,11 +10194,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.send_heartbeat = gaudi2_send_heartbeat,
 	.debug_coresight = gaudi2_debug_coresight,
 	.is_device_idle = gaudi2_is_device_idle,
-	.non_hard_reset_late_init = gaudi2_non_hard_reset_late_init,
+	.compute_reset_late_init = gaudi2_compute_reset_late_init,
 	.hw_queues_lock = gaudi2_hw_queues_lock,
 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
-	.kdma_lock = gaudi2_kdma_lock,
-	.kdma_unlock = gaudi2_kdma_unlock,
 	.get_pci_id = gaudi2_get_pci_id,
 	.get_eeprom_data = gaudi2_get_eeprom_data,
 	.get_monitor_dump = gaudi2_get_monitor_dump,
@@ -9978,6 +10243,8 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
+	.set_engine_cores = gaudi2_set_engine_cores,
+	.send_device_activity = gaudi2_send_device_activity,
 };
 
 void gaudi2_set_asic_funcs(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index e4bc4009f05b..a99c348bbf39 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -15,7 +15,6 @@
 #include "../include/gaudi2/gaudi2_packets.h"
 #include "../include/gaudi2/gaudi2_fw_if.h"
 #include "../include/gaudi2/gaudi2_async_events.h"
-#include "../include/gaudi2/gaudi2_async_virt_events.h"
 
 #define GAUDI2_LINUX_FW_FILE	"habanalabs/gaudi2/gaudi2-fit.itb"
 #define GAUDI2_BOOT_FIT_FILE	"habanalabs/gaudi2/gaudi2-boot-fit.itb"
@@ -140,9 +139,6 @@
 #define VA_HOST_SPACE_HPAGE_START		0xFFF0800000000000ull
 #define VA_HOST_SPACE_HPAGE_END			0xFFF1000000000000ull /* 140TB */
 
-#define VA_HOST_SPACE_USER_MAPPED_CB_START	0xFFF1000000000000ull
-#define VA_HOST_SPACE_USER_MAPPED_CB_END	0xFFF1000100000000ull /* 4GB */
-
 /* 140TB */
 #define VA_HOST_SPACE_PAGE_SIZE		(VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
 
@@ -458,7 +454,6 @@ struct dup_block_ctx {
  *                 the user can map.
  * @lfsr_rand_seeds: array of MME ACC random seeds to set.
  * @hw_queues_lock: protects the H/W queues from concurrent access.
- * @kdma_lock: protects the KDMA engine from concurrent access.
  * @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
  *                             this memory region should be write-only.
  *                             currently used for HBW QMAN writes which is
@@ -510,9 +505,6 @@ struct dup_block_ctx {
  * @flush_db_fifo: flag to force flush DB FIFO after a write.
  * @hbm_cfg: HBM subsystem settings
  * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
- * @kdma_lock_mutex: used by simulator instead of kdma_lock.
- * @use_deprecated_event_mappings: use old event mappings which are about to be
- *                                 deprecated
  */
 struct gaudi2_device {
 	int (*cpucp_info_get)(struct hl_device *hdev);
@@ -521,7 +513,6 @@ struct gaudi2_device {
 	int				lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
 
 	spinlock_t			hw_queues_lock;
-	spinlock_t			kdma_lock;
 
 	void				*scratchpad_kernel_address;
 	dma_addr_t			scratchpad_bus_address;
@@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
 					u32 offended_addr);
 int gaudi2_init_security(struct hl_device *hdev);
 void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
+int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
 
 #endif /* GAUDI2P_H_ */
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
index eed16d642a5a..e9ac87828221 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
@@ -51,12 +51,18 @@
 	(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT)  | \
 	(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
 
-#define PDMA1_QMAN_ENABLE	\
+#define PDMA0_QMAN_ENABLE	\
 	((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
 	(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
 	(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT)  | \
 	(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
 
+#define PDMA1_QMAN_ENABLE	\
+	((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
+	(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
+	(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT)  | \
+	(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
+
 /* QM_IDLE_MASK is valid for all engines QM idle check */
 #define QM_IDLE_MASK	(DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \
 			DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \
@@ -138,4 +144,17 @@
 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT	15
 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK		0x8000
 
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT		0
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK		0x1
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT		1
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK		0x2
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT		2
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK		0x4
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT		3
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK		0x8
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT	4
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK	0x10
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT	5
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK	0x20
+
 #endif /* GAUDI2_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
index 89a06ff5ba34..c6906fb14229 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
@@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = {
 	mmPCIE_WRAP_BASE,
 };
 
+static const u32 gaudi2_pb_pcie_unsecured_regs[] = {
+	mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0,
+};
+
 static const u32 gaudi2_pb_thermal_sensor0[] = {
 	mmDCORE0_XFT_BASE,
 	mmDCORE0_TSTDVS_BASE,
@@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data {
 };
 
 static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset,
-					void *data)
+						struct iterate_module_ctx *ctx)
 {
-	struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data;
+	struct gaudi2_tpc_pb_data *pb_data = ctx->data;
 
 	hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec,
 					offset, pb_data->block_array_size);
@@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev)
 struct gaudi2_tpc_arc_pb_data {
 	u32 unsecured_regs_arr_size;
 	u32 arc_regs_arr_size;
-	int rc;
 };
 
 static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset,
-					void *data)
+						struct iterate_module_ctx *ctx)
 {
-	struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data;
+	struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data;
 
-	pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
+	ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
 					offset, gaudi2_pb_dcr0_tpc0_arc,
 					pb_data->arc_regs_arr_size,
 					gaudi2_pb_dcr0_tpc0_arc_unsecured_regs,
@@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev)
 	tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc);
 	tpc_arc_pb_data.unsecured_regs_arr_size =
 			ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs);
-	tpc_arc_pb_data.rc = 0;
+
 	tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges;
 	tpc_iter.data = &tpc_arc_pb_data;
 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
 
-	return tpc_arc_pb_data.rc;
+	return tpc_iter.rc;
 }
 
 static int gaudi2_init_pb_sm_objs(struct hl_device *hdev)
@@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
 	rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
 			HL_PB_SINGLE_INSTANCE, HL_PB_NA,
 			gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie),
-			NULL, HL_PB_NA);
+			gaudi2_pb_pcie_unsecured_regs,
+			ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs));
 
 	/* Thermal Sensor.
 	 * Skip when security is enabled in F/W, because the blocks are protected by privileged RR.
@@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data {
 };
 
 static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset,
-					void *data)
+					struct iterate_module_ctx *ctx)
 {
-	struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data;
+	struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data;
 
 	hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA,
 				gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index db4487c33582..5ef9e3ca97a6 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev)
  */
 void goya_late_fini(struct hl_device *hdev)
 {
-	const struct hwmon_channel_info **channel_info_arr;
 	struct goya_device *goya = hdev->asic_specific;
-	int i = 0;
 
 	cancel_delayed_work_sync(&goya->goya_work->work_freq);
 
-	if (!hdev->hl_chip_info->info)
-		return;
-
-	channel_info_arr = hdev->hl_chip_info->info;
-
-	while (channel_info_arr[i]) {
-		kfree(channel_info_arr[i]->config);
-		kfree(channel_info_arr[i]);
-		i++;
-	}
-
-	kfree(channel_info_arr);
-
-	hdev->hl_chip_info->info = NULL;
+	hl_hwmon_release_resources(hdev);
 }
 
 static void goya_set_pci_memory_regions(struct hl_device *hdev)
@@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev)
 	hdev->asic_prop.supports_compute_reset = true;
 	hdev->asic_prop.allow_inference_soft_reset = true;
 	hdev->supports_wait_for_multi_cs = false;
+	hdev->supports_ctx_switch = true;
 
 	hdev->asic_funcs->set_pci_memory_regions(hdev);
 
@@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 	return rc;
 }
 
-static int goya_non_hard_reset_late_init(struct hl_device *hdev)
+static int goya_compute_reset_late_init(struct hl_device *hdev)
 {
 	/*
 	 * Unmask all IRQs since some could have been received
@@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
-					u8 mask_len, struct seq_file *s)
+static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+				struct engines_data *e)
 {
 	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
 	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
@@ -5149,9 +5135,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 	u64 offset;
 	int i;
 
-	if (s)
-		seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
-				"---  -------  ------------  -------------\n");
+	if (e)
+		hl_engine_data_sprintf(e, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
+					"---  -------  ------------  -------------\n");
 
 	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
 
@@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 		if (mask && !is_eng_idle)
 			set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
-		if (s)
-			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+		if (e)
+			hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, dma_core_sts0);
 	}
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
 			"---  -------  ------------  --------------  ----------\n");
 
@@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 		if (mask && !is_eng_idle)
 			set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
-		if (s)
-			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+		if (e)
+			hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
 				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
 	}
 
-	if (s)
-		seq_puts(s,
+	if (e)
+		hl_engine_data_sprintf(e,
 			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
 			"---  -------  ------------  --------------  -----------\n");
 
@@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
 
 	if (mask && !is_eng_idle)
 		set_bit(GOYA_ENGINE_ID_MME_0, mask);
-	if (s) {
-		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+	if (e) {
+		hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
 				cmdq_glbl_sts0, mme_arch_sts);
-		seq_puts(s, "\n");
+		hl_engine_data_sprintf(e, "\n");
 	}
 
 	return is_idle;
@@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
 	return -EOPNOTSUPP;
 }
 
+static int goya_send_device_activity(struct hl_device *hdev, bool open)
+{
+	return 0;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = {
 	.send_heartbeat = goya_send_heartbeat,
 	.debug_coresight = goya_debug_coresight,
 	.is_device_idle = goya_is_device_idle,
-	.non_hard_reset_late_init = goya_non_hard_reset_late_init,
+	.compute_reset_late_init = goya_compute_reset_late_init,
 	.hw_queues_lock = goya_hw_queues_lock,
 	.hw_queues_unlock = goya_hw_queues_unlock,
-	.kdma_lock = NULL,
-	.kdma_unlock = NULL,
 	.get_pci_id = goya_get_pci_id,
 	.get_eeprom_data = goya_get_eeprom_data,
 	.get_monitor_dump = goya_get_monitor_dump,
@@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = goya_set_ddr_bar_base,
+	.send_device_activity = goya_send_device_activity,
 };
 
 /*
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index abf40e1c4965..baa5aa43b6f4 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -629,6 +629,12 @@ enum pq_init_status {
  * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
  *       Packet to perform engine core ASID configuration
  *
+ * CPUCP_PACKET_SEC_ATTEST_GET -
+ *       Get the attestaion data that is collected during various stages of the
+ *       boot sequence. the attestation data is also hashed with some unique
+ *       number (nonce) provided by the host to prevent replay attacks.
+ *       public key and certificate also provided as part of the FW response.
+ *
  * CPUCP_PACKET_MONITOR_DUMP_GET -
  *       Get monitors registers dump from the CpuCP kernel.
  *       The CPU will put the registers dump in the a buffer allocated by the driver
@@ -636,6 +642,10 @@ enum pq_init_status {
  *       passes the max size it allows the CpuCP to write to the structure, to prevent
  *       data corruption in case of mismatched driver/FW versions.
  *       Relevant only to Gaudi.
+ *
+ * CPUCP_PACKET_ACTIVE_STATUS_SET -
+ *       LKD sends FW indication whether device is free or in use, this indication is reported
+ *       also to the BMC.
  */
 
 enum cpucp_packet_id {
@@ -687,10 +697,17 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_RESERVED,			/* not used */
 	CPUCP_PACKET_ENGINE_CORE_ASID_SET,	/* internal */
 	CPUCP_PACKET_RESERVED2,			/* not used */
+	CPUCP_PACKET_SEC_ATTEST_GET,		/* internal */
 	CPUCP_PACKET_RESERVED3,			/* not used */
 	CPUCP_PACKET_RESERVED4,			/* not used */
-	CPUCP_PACKET_RESERVED5,			/* not used */
 	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
+	CPUCP_PACKET_RESERVED5,			/* not used */
+	CPUCP_PACKET_RESERVED6,			/* not used */
+	CPUCP_PACKET_RESERVED7,			/* not used */
+	CPUCP_PACKET_RESERVED8,			/* not used */
+	CPUCP_PACKET_RESERVED9,			/* not used */
+	CPUCP_PACKET_ACTIVE_STATUS_SET,		/* internal */
+	CPUCP_PACKET_ID_MAX			/* must be last */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -783,6 +800,9 @@ struct cpucp_packet {
 		 * result cannot be used to hold general purpose data.
 		 */
 		__le32 status_mask;
+
+		/* random, used once number, for security packets */
+		__le32 nonce;
 	};
 
 	/* For NIC requests */
@@ -813,10 +833,25 @@ enum cpucp_led_index {
 	CPUCP_LED2_INDEX
 };
 
+/*
+ * enum cpucp_packet_rc - Error return code
+ * @cpucp_packet_success	-> in case of success.
+ * @cpucp_packet_invalid	-> this is to support Goya and Gaudi platform.
+ * @cpucp_packet_fault		-> in case of processing error like failing to
+ *                                 get device binding or semaphore etc.
+ * @cpucp_packet_invalid_pkt	-> when cpucp packet is un-supported. This is
+ *                                 supported Greco onwards.
+ * @cpucp_packet_invalid_params	-> when checking parameter like length of buffer
+ *				   or attribute value etc. Supported Greco onwards.
+ * @cpucp_packet_rc_max		-> It indicates size of enum so should be at last.
+ */
 enum cpucp_packet_rc {
 	cpucp_packet_success,
 	cpucp_packet_invalid,
-	cpucp_packet_fault
+	cpucp_packet_fault,
+	cpucp_packet_invalid_pkt,
+	cpucp_packet_invalid_params,
+	cpucp_packet_rc_max
 };
 
 /*
@@ -1193,6 +1228,70 @@ enum cpu_reset_status {
 	CPU_RST_STATUS_SOFT_RST_DONE = 1,
 };
 
+#define SEC_PCR_DATA_BUF_SZ	256
+#define SEC_PCR_QUOTE_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
+#define SEC_SIGNATURE_BUF_SZ	255	/* (256 - 1) 1 byte used for size */
+#define SEC_PUB_DATA_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
+#define SEC_CERTIFICATE_BUF_SZ	2046	/* (2048 - 2) 2 bytes used for size */
+
+/*
+ * struct cpucp_sec_attest_info - attestation report of the boot
+ * @pcr_data: raw values of the PCR registers
+ * @pcr_num_reg: number of PCR registers in the pcr_data array
+ * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
+ * @nonce: number only used once. random number provided by host. this also
+ *	    passed to the quote command as a qualifying data.
+ * @pcr_quote_len: length of the attestation quote data (bytes)
+ * @pcr_quote: attestation report data structure
+ * @quote_sig_len: length of the attestation report signature (bytes)
+ * @quote_sig: signature structure of the attestation report
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key for the signed attestation
+ *		 (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the attestation signing key
+ */
+struct cpucp_sec_attest_info {
+	__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
+	__u8 pcr_num_reg;
+	__u8 pcr_reg_len;
+	__le16 pad0;
+	__le32 nonce;
+	__le16 pcr_quote_len;
+	__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
+	__u8 quote_sig_len;
+	__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
+	__le16 pub_data_len;
+	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
+	__le16 certificate_len;
+	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
+/*
+ * struct cpucp_dev_info_signed - device information signed by a secured device
+ * @info: device information structure as defined above
+ * @nonce: number only used once. random number provided by host. this number is
+ *	   hashed and signed along with the device information.
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @info_sig: signature of the info + nonce data.
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key info signed info data
+ *		 (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the signing key
+ */
+struct cpucp_dev_info_signed {
+	struct cpucp_info info;	/* assumed to be 64bit aligned */
+	__le32 nonce;
+	__le32 pad0;
+	__u8 info_sig_len;
+	__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+	__le16 pub_data_len;
+	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
+	__le16 certificate_len;
+	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
 /*
  * struct dcore_monitor_regs_data - DCORE monitor regs data.
  * the structure follows sync manager block layout. relevant only to Gaudi.
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index a3594119bc51..e0ea51cc7475 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -34,6 +34,7 @@ enum cpu_boot_err {
 	CPU_BOOT_ERR_BINNING_FAIL = 19,
 	CPU_BOOT_ERR_TPM_FAIL = 20,
 	CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
+	CPU_BOOT_ERR_EEPROM_FAIL = 22,
 	CPU_BOOT_ERR_ENABLED = 31,
 	CPU_BOOT_ERR_SCND_EN = 63,
 	CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
@@ -115,6 +116,9 @@ enum cpu_boot_err {
  * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	Failed to set threshold for tmperature
  *					sensor.
  *
+ * CPU_BOOT_ERR_EEPROM_FAIL		Failed reading EEPROM data. Defaults
+ *					are used.
+ *
  * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
  *					This is a main indication that the
  *					running FW populates the error
@@ -139,6 +143,7 @@ enum cpu_boot_err {
 #define CPU_BOOT_ERR0_BINNING_FAIL		(1 << CPU_BOOT_ERR_BINNING_FAIL)
 #define CPU_BOOT_ERR0_TPM_FAIL			(1 << CPU_BOOT_ERR_TPM_FAIL)
 #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	(1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
+#define CPU_BOOT_ERR0_EEPROM_FAIL		(1 << CPU_BOOT_ERR_EEPROM_FAIL)
 #define CPU_BOOT_ERR0_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
 #define CPU_BOOT_ERR1_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
 
@@ -426,7 +431,9 @@ struct cpu_dyn_regs {
 	__le32 gic_host_ints_irq;
 	__le32 gic_host_soft_rst_irq;
 	__le32 gic_rot_qm_irq_ctrl;
-	__le32 reserved1[22];		/* reserve for future use */
+	__le32 cpu_rst_status;
+	__le32 eng_arc_irq_ctrl;
+	__le32 reserved1[20];		/* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
@@ -465,6 +472,26 @@ enum comms_msg_type {
 	HL_COMMS_BINNING_CONF_TYPE = 3,
 };
 
+/*
+ * Binning information shared between LKD and FW
+ * @tpc_mask - TPC binning information
+ * @dec_mask - Decoder binning information
+ * @hbm_mask - HBM binning information
+ * @edma_mask - EDMA binning information
+ * @mme_mask_l - MME binning information lower 32
+ * @mme_mask_h - MME binning information upper 32
+ * @reserved - reserved field for 64 bit alignment
+ */
+struct lkd_fw_binning_info {
+	__le64 tpc_mask;
+	__le32 dec_mask;
+	__le32 hbm_mask;
+	__le32 edma_mask;
+	__le32 mme_mask_l;
+	__le32 mme_mask_h;
+	__le32 reserved;
+};
+
 /* TODO: remove this struct after the code is updated to use message */
 /* this is the comms descriptor header - meta data */
 struct comms_desc_header {
@@ -525,13 +552,7 @@ struct lkd_fw_comms_msg {
 		struct {
 			__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
 		};
-		struct {
-			__le64 tpc_binning_conf;
-			__le32 dec_binning_conf;
-			__le32 hbm_binning_conf;
-			__le32 edma_binning_conf;
-			__le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */
-		};
+		struct lkd_fw_binning_info binning_info;
 	};
 };
 
diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index d0e2c68a639f..6aa1b1412462 100644
--- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -132,6 +132,7 @@
 #include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h"
 #include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h"
 #include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h"
+#include "pcie_wrap_special_regs.h"
 
 #include "pdma0_qm_masks.h"
 #include "pdma0_core_masks.h"
@@ -239,6 +240,7 @@
 #define SFT_IF_RTR_OFFSET	(mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
 
 #define ARC_HALT_REQ_OFFSET	(mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
+#define ARC_HALT_ACK_OFFSET	(mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
 
 #define ARC_REGION_CFG_OFFSET(region) \
 	(mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h
new file mode 100644
index 000000000000..46558e7a7f63
--- /dev/null
+++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ **       DO NOT EDIT BELOW        **
+ ************************************/
+
+#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
+#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
+
+/*
+ *****************************************
+ *   PCIE_WRAP_SPECIAL
+ *   (Prototype: SPECIAL_REGS)
+ *****************************************
+ */
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC
+
+#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00
+
+#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04
+
+#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20
+
+#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24
+
+#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8
+
+#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC
+
+#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h
deleted file mode 100644
index 6d6ed7838a64..000000000000
--- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2022 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_
-#define __GAUDI2_ASYNC_VIRT_EVENTS_H_
-
-enum gaudi2_async_virt_event_id {
-	GAUDI2_EVENT_NIC3_QM1_OLD = 1206,
-	GAUDI2_EVENT_NIC4_QM0_OLD = 1207,
-	GAUDI2_EVENT_NIC4_QM1_OLD = 1208,
-	GAUDI2_EVENT_NIC5_QM0_OLD = 1209,
-	GAUDI2_EVENT_NIC5_QM1_OLD = 1210,
-	GAUDI2_EVENT_NIC6_QM0_OLD = 1211,
-	GAUDI2_EVENT_NIC6_QM1_OLD = 1212,
-	GAUDI2_EVENT_NIC7_QM0_OLD = 1213,
-	GAUDI2_EVENT_NIC7_QM1_OLD = 1214,
-	GAUDI2_EVENT_NIC8_QM0_OLD = 1215,
-	GAUDI2_EVENT_NIC8_QM1_OLD = 1216,
-	GAUDI2_EVENT_NIC9_QM0_OLD = 1217,
-	GAUDI2_EVENT_NIC9_QM1_OLD = 1218,
-	GAUDI2_EVENT_NIC10_QM0_OLD = 1219,
-	GAUDI2_EVENT_NIC10_QM1_OLD = 1220,
-	GAUDI2_EVENT_NIC11_QM0_OLD = 1221,
-	GAUDI2_EVENT_NIC11_QM1_OLD = 1222,
-	GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223,
-	GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224,
-	GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225,
-	GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226,
-	GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227,
-	GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228,
-	GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229,
-	GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230,
-	GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231,
-	GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232,
-	GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233,
-	GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234,
-	GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235,
-	GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236,
-	GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237,
-	GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238,
-	GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239,
-	GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240,
-	GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241,
-	GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242,
-	GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243,
-	GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244,
-	GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245,
-	GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246,
-	GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247,
-	GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248,
-};
-
-#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 2c4bb6d6e1a0..1cb71df966a4 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -424,7 +424,7 @@ static int ics932s401_detect(struct i2c_client *client,
 	if (revision != ICS932S401_REV)
 		dev_info(&adapter->dev, "Unknown revision %d\n", revision);
 
-	strlcpy(info->type, "ics932s401", I2C_NAME_SIZE);
+	strscpy(info->type, "ics932s401", I2C_NAME_SIZE);
 
 	return 0;
 }
diff --git a/drivers/misc/mchp_pci1xxxx/Kconfig b/drivers/misc/mchp_pci1xxxx/Kconfig
new file mode 100644
index 000000000000..4abb47de7219
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/Kconfig
@@ -0,0 +1,13 @@
+config GP_PCI1XXXX
+       tristate "Microchip PCI1XXXX PCIe to GPIO Expander + OTP/EEPROM manager"
+       depends on PCI
+       depends on GPIOLIB
+       select GPIOLIB_IRQCHIP
+       select AUXILIARY_BUS
+       help
+         PCI1XXXX is a PCIe GEN 3 switch with one of the endpoints having
+         multiple functions and one of the functions is a GPIO controller
+         which also has registers to interface with the OTP and EEPROM.
+         Select yes, no or module here to include or exclude the driver
+         for the GPIO function.
+
diff --git a/drivers/misc/mchp_pci1xxxx/Makefile b/drivers/misc/mchp_pci1xxxx/Makefile
new file mode 100644
index 000000000000..fc4615cfe28b
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_GP_PCI1XXXX) := mchp_pci1xxxx_gp.o mchp_pci1xxxx_gpio.o
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
new file mode 100644
index 000000000000..32af2b14ff34
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 Microchip Technology Inc.
+
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include "mchp_pci1xxxx_gp.h"
+
+struct aux_bus_device {
+	struct auxiliary_device_wrapper *aux_device_wrapper[2];
+};
+
+static DEFINE_IDA(gp_client_ida);
+static const char aux_dev_otp_e2p_name[15] = "gp_otp_e2p";
+static const char aux_dev_gpio_name[15] = "gp_gpio";
+
+static void gp_auxiliary_device_release(struct device *dev)
+{
+	struct auxiliary_device_wrapper *aux_device_wrapper =
+		(struct auxiliary_device_wrapper *)container_of(dev,
+				struct auxiliary_device_wrapper, aux_dev.dev);
+
+	ida_free(&gp_client_ida, aux_device_wrapper->aux_dev.id);
+	kfree(aux_device_wrapper);
+}
+
+static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct aux_bus_device *aux_bus;
+	int retval;
+
+	retval = pcim_enable_device(pdev);
+	if (retval)
+		return retval;
+
+	aux_bus = devm_kzalloc(&pdev->dev, sizeof(*aux_bus), GFP_KERNEL);
+	if (!aux_bus)
+		return -ENOMEM;
+
+	aux_bus->aux_device_wrapper[0] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[0]),
+						 GFP_KERNEL);
+	if (!aux_bus->aux_device_wrapper[0])
+		return -ENOMEM;
+
+	retval = ida_alloc(&gp_client_ida, GFP_KERNEL);
+	if (retval < 0)
+		goto err_ida_alloc_0;
+
+	aux_bus->aux_device_wrapper[0]->aux_dev.name = aux_dev_otp_e2p_name;
+	aux_bus->aux_device_wrapper[0]->aux_dev.dev.parent = &pdev->dev;
+	aux_bus->aux_device_wrapper[0]->aux_dev.dev.release = gp_auxiliary_device_release;
+	aux_bus->aux_device_wrapper[0]->aux_dev.id = retval;
+
+	aux_bus->aux_device_wrapper[0]->gp_aux_data.region_start = pci_resource_start(pdev, 0);
+	aux_bus->aux_device_wrapper[0]->gp_aux_data.region_length = pci_resource_end(pdev, 0);
+
+	retval = auxiliary_device_init(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	if (retval < 0)
+		goto err_aux_dev_init_0;
+
+	retval = auxiliary_device_add(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	if (retval)
+		goto err_aux_dev_add_0;
+
+	aux_bus->aux_device_wrapper[1] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[1]),
+						 GFP_KERNEL);
+	if (!aux_bus->aux_device_wrapper[1])
+		return -ENOMEM;
+
+	retval = ida_alloc(&gp_client_ida, GFP_KERNEL);
+	if (retval < 0)
+		goto err_ida_alloc_1;
+
+	aux_bus->aux_device_wrapper[1]->aux_dev.name = aux_dev_gpio_name;
+	aux_bus->aux_device_wrapper[1]->aux_dev.dev.parent = &pdev->dev;
+	aux_bus->aux_device_wrapper[1]->aux_dev.dev.release = gp_auxiliary_device_release;
+	aux_bus->aux_device_wrapper[1]->aux_dev.id = retval;
+
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.region_start = pci_resource_start(pdev, 0);
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.region_length = pci_resource_end(pdev, 0);
+
+	retval = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	retval = pci_irq_vector(pdev, 0);
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	pdev->irq = retval;
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.irq_num = pdev->irq;
+
+	retval = auxiliary_device_init(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	retval = auxiliary_device_add(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	if (retval)
+		goto err_aux_dev_add_1;
+
+	pci_set_drvdata(pdev, aux_bus);
+	pci_set_master(pdev);
+
+	return 0;
+
+err_aux_dev_add_1:
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev);
+
+err_aux_dev_init_1:
+	ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id);
+
+err_ida_alloc_1:
+	kfree(aux_bus->aux_device_wrapper[1]);
+
+err_aux_dev_add_0:
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev);
+
+err_aux_dev_init_0:
+	ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id);
+
+err_ida_alloc_0:
+	kfree(aux_bus->aux_device_wrapper[0]);
+
+	return retval;
+}
+
+static void gp_aux_bus_remove(struct pci_dev *pdev)
+{
+	struct aux_bus_device *aux_bus = pci_get_drvdata(pdev);
+
+	auxiliary_device_delete(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	auxiliary_device_delete(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev);
+}
+
+static const struct pci_device_id pci1xxxx_tbl[] = {
+	{ PCI_DEVICE(0x1055, 0xA005) },
+	{ PCI_DEVICE(0x1055, 0xA015) },
+	{ PCI_DEVICE(0x1055, 0xA025) },
+	{ PCI_DEVICE(0x1055, 0xA035) },
+	{ PCI_DEVICE(0x1055, 0xA045) },
+	{ PCI_DEVICE(0x1055, 0xA055) },
+	{0,}
+};
+MODULE_DEVICE_TABLE(pci, pci1xxxx_tbl);
+
+static struct pci_driver pci1xxxx_gp_driver = {
+	.name = "PCI1xxxxGP",
+	.id_table = pci1xxxx_tbl,
+	.probe = gp_aux_bus_probe,
+	.remove = gp_aux_bus_remove,
+};
+
+module_pci_driver(pci1xxxx_gp_driver);
+
+MODULE_DESCRIPTION("Microchip Technology Inc. PCI1xxxx GP expander");
+MODULE_AUTHOR("Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h
new file mode 100644
index 000000000000..37eec73b20d7
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022 Microchip Technology Inc. */
+
+#ifndef _GPIO_PCI1XXXX_H
+#define _GPIO_PCI1XXXX_H
+
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/auxiliary_bus.h>
+
+/* Perform operations like variable length write, read and write with read back for OTP / EEPROM
+ * Perform bit mode write in OTP
+ */
+
+struct gp_aux_data_type {
+	int irq_num;
+	resource_size_t region_start;
+	resource_size_t region_length;
+};
+
+struct auxiliary_device_wrapper {
+	struct auxiliary_device aux_dev;
+	struct gp_aux_data_type gp_aux_data;
+};
+
+#endif
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c
new file mode 100644
index 000000000000..3389803cb281
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 Microchip Technology Inc.
+// pci1xxxx gpio driver
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/gpio/driver.h>
+#include <linux/bio.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+
+#include "mchp_pci1xxxx_gp.h"
+
+#define PCI1XXXX_NR_PINS		93
+#define PERI_GEN_RESET			0
+#define OUT_EN_OFFSET(x)		((((x) / 32) * 4) + 0x400)
+#define INP_EN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x10)
+#define OUT_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x20)
+#define INP_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x30)
+#define PULLUP_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x40)
+#define PULLDOWN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x50)
+#define OPENDRAIN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x60)
+#define WAKEMASK_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x70)
+#define MODE_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x80)
+#define INTR_LO_TO_HI_EDGE_CONFIG(x)	((((x) / 32) * 4) + 0x400 + 0x90)
+#define INTR_HI_TO_LO_EDGE_CONFIG(x)	((((x) / 32) * 4) + 0x400 + 0xA0)
+#define INTR_LEVEL_CONFIG_OFFSET(x)	((((x) / 32) * 4) + 0x400 + 0xB0)
+#define INTR_LEVEL_MASK_OFFSET(x)	((((x) / 32) * 4) + 0x400 + 0xC0)
+#define INTR_STAT_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0xD0)
+#define DEBOUNCE_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0xE0)
+#define PIO_GLOBAL_CONFIG_OFFSET	(0x400 + 0xF0)
+#define PIO_PCI_CTRL_REG_OFFSET	(0x400 + 0xF4)
+#define INTR_MASK_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x100)
+#define INTR_STATUS_OFFSET(x)		(((x) * 4) + 0x400 + 0xD0)
+
+struct pci1xxxx_gpio {
+	struct auxiliary_device *aux_dev;
+	void __iomem *reg_base;
+	struct gpio_chip gpio;
+	spinlock_t lock;
+	int irq_base;
+};
+
+static int pci1xxxx_gpio_get_direction(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	u32 data;
+	int ret = -EINVAL;
+
+	data = readl(priv->reg_base + INP_EN_OFFSET(nr));
+	if (data & BIT(nr % 32)) {
+		ret =  1;
+	} else {
+		data = readl(priv->reg_base + OUT_EN_OFFSET(nr));
+		if (data & BIT(nr % 32))
+			ret =  0;
+	}
+
+	return ret;
+}
+
+static inline void pci1xxx_assign_bit(void __iomem *base_addr, unsigned int reg_offset,
+				      unsigned int bitpos, bool set)
+{
+	u32 data;
+
+	data = readl(base_addr + reg_offset);
+	if (set)
+		data |= BIT(bitpos);
+	else
+		data &= ~BIT(bitpos);
+	writel(data, base_addr + reg_offset);
+}
+
+static int pci1xxxx_gpio_direction_input(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INP_EN_OFFSET(nr), (nr % 32), true);
+	pci1xxx_assign_bit(priv->reg_base, OUT_EN_OFFSET(nr), (nr % 32), false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_get(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+
+	return (readl(priv->reg_base + INP_OFFSET(nr)) >> (nr % 32)) & 1;
+}
+
+static int pci1xxxx_gpio_direction_output(struct gpio_chip *gpio,
+					  unsigned int nr, int val)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+	u32 data;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INP_EN_OFFSET(nr), (nr % 32), false);
+	pci1xxx_assign_bit(priv->reg_base, OUT_EN_OFFSET(nr), (nr % 32), true);
+	data = readl(priv->reg_base + OUT_OFFSET(nr));
+	if (val)
+		data |= (1 << (nr % 32));
+	else
+		data &= ~(1 << (nr % 32));
+	writel(data, priv->reg_base + OUT_OFFSET(nr));
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static void pci1xxxx_gpio_set(struct gpio_chip *gpio,
+			      unsigned int nr, int val)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, OUT_OFFSET(nr), (nr % 32), val);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int pci1xxxx_gpio_set_config(struct gpio_chip *gpio, unsigned int offset,
+				    unsigned long config)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	switch (pinconf_to_config_param(config)) {
+	case PIN_CONFIG_BIAS_PULL_UP:
+		pci1xxx_assign_bit(priv->reg_base, PULLUP_OFFSET(offset), (offset % 32), true);
+		break;
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		pci1xxx_assign_bit(priv->reg_base, PULLDOWN_OFFSET(offset), (offset % 32), true);
+		break;
+	case PIN_CONFIG_BIAS_DISABLE:
+		pci1xxx_assign_bit(priv->reg_base, PULLUP_OFFSET(offset), (offset % 32), false);
+		pci1xxx_assign_bit(priv->reg_base, PULLDOWN_OFFSET(offset), (offset % 32), false);
+		break;
+	case PIN_CONFIG_DRIVE_OPEN_DRAIN:
+		pci1xxx_assign_bit(priv->reg_base, OPENDRAIN_OFFSET(offset), (offset % 32), true);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return ret;
+}
+
+static void pci1xxxx_gpio_irq_ack(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void pci1xxxx_gpio_irq_set_mask(struct irq_data *data, bool set)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INTR_MASK_OFFSET(gpio), (gpio % 32), set);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void pci1xxxx_gpio_irq_mask(struct irq_data *data)
+{
+	pci1xxxx_gpio_irq_set_mask(data, true);
+}
+
+static void pci1xxxx_gpio_irq_unmask(struct irq_data *data)
+{
+	pci1xxxx_gpio_irq_set_mask(data, false);
+}
+
+static int pci1xxxx_gpio_set_type(struct irq_data *data, unsigned int trigger_type)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned int bitpos = gpio % 32;
+
+	if (trigger_type & IRQ_TYPE_EDGE_FALLING) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_HI_TO_LO_EDGE_CONFIG(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio),
+				   bitpos, false);
+		irq_set_handler_locked(data, handle_edge_irq);
+	} else {
+		pci1xxx_assign_bit(priv->reg_base, INTR_HI_TO_LO_EDGE_CONFIG(gpio),
+				   bitpos, true);
+	}
+
+	if (trigger_type & IRQ_TYPE_EDGE_RISING) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LO_TO_HI_EDGE_CONFIG(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   false);
+		irq_set_handler_locked(data, handle_edge_irq);
+	} else {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LO_TO_HI_EDGE_CONFIG(gpio),
+				   bitpos, true);
+	}
+
+	if (trigger_type & IRQ_TYPE_LEVEL_LOW) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_CONFIG_OFFSET(gpio),
+				   bitpos, true);
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   true);
+		irq_set_handler_locked(data, handle_edge_irq);
+	}
+
+	if (trigger_type & IRQ_TYPE_LEVEL_HIGH) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_CONFIG_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   true);
+		irq_set_handler_locked(data, handle_edge_irq);
+	}
+
+	if ((!(trigger_type & IRQ_TYPE_LEVEL_LOW)) && (!(trigger_type & IRQ_TYPE_LEVEL_HIGH)))
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio), bitpos, true);
+
+	return true;
+}
+
+static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id)
+{
+	struct pci1xxxx_gpio *priv = dev_id;
+	struct gpio_chip *gc =  &priv->gpio;
+	unsigned long int_status = 0;
+	unsigned long flags;
+	u8 pincount;
+	int bit;
+	u8 gpiobank;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET, 16, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	for (gpiobank = 0; gpiobank < 3; gpiobank++) {
+		spin_lock_irqsave(&priv->lock, flags);
+		int_status = readl(priv->reg_base + INTR_STATUS_OFFSET(gpiobank));
+		spin_unlock_irqrestore(&priv->lock, flags);
+		if (gpiobank == 2)
+			pincount = 29;
+		else
+			pincount = 32;
+		for_each_set_bit(bit, &int_status, pincount) {
+			unsigned int irq;
+
+			spin_lock_irqsave(&priv->lock, flags);
+			writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank));
+			spin_unlock_irqrestore(&priv->lock, flags);
+			irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32)));
+			generic_handle_irq(irq);
+		}
+	}
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET, 16, false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static struct irq_chip pci1xxxx_gpio_irqchip = {
+	.name = "pci1xxxx_gpio",
+	.irq_ack = pci1xxxx_gpio_irq_ack,
+	.irq_mask = pci1xxxx_gpio_irq_mask,
+	.irq_unmask = pci1xxxx_gpio_irq_unmask,
+	.irq_set_type = pci1xxxx_gpio_set_type,
+};
+
+static int pci1xxxx_gpio_suspend(struct device *dev)
+{
+	struct pci1xxxx_gpio *priv = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   16, true);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   17, false);
+	pci1xxx_assign_bit(priv->reg_base, PERI_GEN_RESET, 16, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_resume(struct device *dev)
+{
+	struct pci1xxxx_gpio *priv = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   17, true);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   16, false);
+	pci1xxx_assign_bit(priv->reg_base, PERI_GEN_RESET, 16, false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_setup(struct pci1xxxx_gpio *priv, int irq)
+{
+	struct gpio_chip *gchip = &priv->gpio;
+	struct gpio_irq_chip *girq;
+	int retval;
+
+	gchip->label = dev_name(&priv->aux_dev->dev);
+	gchip->parent = &priv->aux_dev->dev;
+	gchip->owner = THIS_MODULE;
+	gchip->direction_input = pci1xxxx_gpio_direction_input;
+	gchip->direction_output = pci1xxxx_gpio_direction_output;
+	gchip->get_direction = pci1xxxx_gpio_get_direction;
+	gchip->get = pci1xxxx_gpio_get;
+	gchip->set = pci1xxxx_gpio_set;
+	gchip->set_config = pci1xxxx_gpio_set_config;
+	gchip->dbg_show = NULL;
+	gchip->base = -1;
+	gchip->ngpio =  PCI1XXXX_NR_PINS;
+	gchip->can_sleep = false;
+
+	retval = devm_request_threaded_irq(&priv->aux_dev->dev, irq,
+					   NULL, pci1xxxx_gpio_irq_handler,
+					   IRQF_ONESHOT, "PCI1xxxxGPIO", priv);
+
+	if (retval)
+		return retval;
+
+	girq = &priv->gpio.irq;
+	girq->chip = &pci1xxxx_gpio_irqchip;
+	girq->parent_handler = NULL;
+	girq->num_parents = 0;
+	girq->parents = NULL;
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_bad_irq;
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_probe(struct auxiliary_device *aux_dev,
+			       const struct auxiliary_device_id *id)
+
+{
+	struct auxiliary_device_wrapper *aux_dev_wrapper;
+	struct gp_aux_data_type *pdata;
+	struct pci1xxxx_gpio *priv;
+	int retval;
+
+	aux_dev_wrapper = (struct auxiliary_device_wrapper *)
+			  container_of(aux_dev, struct auxiliary_device_wrapper, aux_dev);
+
+	pdata = &aux_dev_wrapper->gp_aux_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	priv = devm_kzalloc(&aux_dev->dev, sizeof(struct pci1xxxx_gpio), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	spin_lock_init(&priv->lock);
+	priv->aux_dev = aux_dev;
+
+	if (!devm_request_mem_region(&aux_dev->dev, pdata->region_start, 0x800, aux_dev->name))
+		return -EBUSY;
+
+	priv->reg_base = devm_ioremap(&aux_dev->dev, pdata->region_start, 0x800);
+	if (!priv->reg_base)
+		return -ENOMEM;
+
+	writel(0x0264, (priv->reg_base + 0x400 + 0xF0));
+
+	retval = pci1xxxx_gpio_setup(priv, pdata->irq_num);
+
+	if (retval < 0)
+		return retval;
+
+	dev_set_drvdata(&aux_dev->dev, priv);
+
+	return devm_gpiochip_add_data(&aux_dev->dev, &priv->gpio, priv);
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(pci1xxxx_gpio_pm_ops, pci1xxxx_gpio_suspend, pci1xxxx_gpio_resume);
+
+static const struct auxiliary_device_id pci1xxxx_gpio_auxiliary_id_table[] = {
+	{.name = "mchp_pci1xxxx_gp.gp_gpio"},
+	{}
+};
+MODULE_DEVICE_TABLE(auxiliary, pci1xxxx_gpio_auxiliary_id_table);
+
+static struct auxiliary_driver pci1xxxx_gpio_driver = {
+	.driver = {
+		.name = "PCI1xxxxGPIO",
+		.pm = &pci1xxxx_gpio_pm_ops,
+		},
+	.probe = pci1xxxx_gpio_probe,
+	.id_table = pci1xxxx_gpio_auxiliary_id_table
+};
+module_auxiliary_driver(pci1xxxx_gpio_driver);
+
+MODULE_DESCRIPTION("Microchip Technology Inc. PCI1xxxx GPIO controller");
+MODULE_AUTHOR("Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 79305e4acce2..71fbf0bc8453 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -487,7 +487,7 @@ static void mei_nfc(struct mei_cl_device *cldev)
 	}
 
 	dev_dbg(bus->dev, "nfc radio %s\n", radio_name);
-	strlcpy(cldev->name, radio_name, sizeof(cldev->name));
+	strscpy(cldev->name, radio_name, sizeof(cldev->name));
 
 disconnect:
 	mutex_lock(&bus->device_lock);
diff --git a/drivers/misc/mei/gsc-me.c b/drivers/misc/mei/gsc-me.c
index 75765e4df4ed..e63cabd0818d 100644
--- a/drivers/misc/mei/gsc-me.c
+++ b/drivers/misc/mei/gsc-me.c
@@ -68,7 +68,6 @@ static int mei_gsc_probe(struct auxiliary_device *aux_dev,
 	hw = to_me_hw(dev);
 	hw->mem_addr = devm_ioremap_resource(device, &adev->bar);
 	if (IS_ERR(hw->mem_addr)) {
-		dev_err(device, "mmio not mapped\n");
 		ret = PTR_ERR(hw->mem_addr);
 		goto err;
 	}
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
index 9862c6cd3e32..5d0f68b95c29 100644
--- a/drivers/misc/mei/hw-txe.c
+++ b/drivers/misc/mei/hw-txe.c
@@ -176,7 +176,7 @@ static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req)
  * @dev: the device structure
  *
  * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from
- * from HICR_HOST_ALIVENESS_REQ register value
+ * HICR_HOST_ALIVENESS_REQ register value
  *
  * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value
  */
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index 6777c419a8da..d46dba2df5a1 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -257,6 +257,8 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
 		if (IS_ERR(ev_ctx))
 			return PTR_ERR(ev_ctx);
 		rc = ocxl_irq_set_handler(ctx, irq_id, irq_handler, irq_free, ev_ctx);
+		if (rc)
+			eventfd_ctx_put(ev_ctx);
 		break;
 
 	case OCXL_IOCTL_GET_METADATA:
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 8f786a225dcf..11530b4ec389 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -332,6 +332,22 @@ static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
 	return false;
 }
 
+static int pci_endpoint_test_validate_xfer_params(struct device *dev,
+		struct pci_endpoint_test_xfer_param *param, size_t alignment)
+{
+	if (!param->size) {
+		dev_dbg(dev, "Data size is zero\n");
+		return -EINVAL;
+	}
+
+	if (param->size > SIZE_MAX - alignment) {
+		dev_dbg(dev, "Maximum transfer data size exceeded\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
 				   unsigned long arg)
 {
@@ -363,9 +379,11 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
 		return false;
 	}
 
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
 	size = param.size;
-	if (size > SIZE_MAX - alignment)
-		goto err;
 
 	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
 	if (use_dma)
@@ -497,9 +515,11 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test,
 		return false;
 	}
 
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
 	size = param.size;
-	if (size > SIZE_MAX - alignment)
-		goto err;
 
 	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
 	if (use_dma)
@@ -595,9 +615,11 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test,
 		return false;
 	}
 
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
 	size = param.size;
-	if (size > SIZE_MAX - alignment)
-		goto err;
 
 	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
 	if (use_dma)
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 9f9af77f8d2e..f1336f43d3bd 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -334,10 +334,6 @@ extern int (*xp_cpu_to_nasid) (int);
 extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
 extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
 
-extern u64 xp_nofault_PIOR_target;
-extern int xp_nofault_PIOR(void *);
-extern int xp_error_PIOR(void);
-
 extern struct device *xp;
 extern enum xp_retval xp_init_uv(void);
 extern void xp_exit_uv(void);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 8f2de1893245..e71068f7759b 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -324,7 +324,7 @@ static void *qp_alloc_queue(u64 size, u32 flags)
 
 /*
  * Copies from a given buffer or iovector to a VMCI Queue.  Uses
- * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * kmap_local_page() to dynamically map required portions of the queue
  * by traversing the offset -> page translation structure for the queue.
  * Assumes that offset + size does not wrap around in the queue.
  */
@@ -345,7 +345,7 @@ static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
 		size_t to_copy;
 
 		if (kernel_if->host)
-			va = kmap(kernel_if->u.h.page[page_index]);
+			va = kmap_local_page(kernel_if->u.h.page[page_index]);
 		else
 			va = kernel_if->u.g.vas[page_index + 1];
 			/* Skip header. */
@@ -359,12 +359,12 @@ static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
 		if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
 					 from)) {
 			if (kernel_if->host)
-				kunmap(kernel_if->u.h.page[page_index]);
+				kunmap_local(va);
 			return VMCI_ERROR_INVALID_ARGS;
 		}
 		bytes_copied += to_copy;
 		if (kernel_if->host)
-			kunmap(kernel_if->u.h.page[page_index]);
+			kunmap_local(va);
 	}
 
 	return VMCI_SUCCESS;
@@ -372,7 +372,7 @@ static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
 
 /*
  * Copies to a given buffer or iovector from a VMCI Queue.  Uses
- * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * kmap_local_page() to dynamically map required portions of the queue
  * by traversing the offset -> page translation structure for the queue.
  * Assumes that offset + size does not wrap around in the queue.
  */
@@ -393,7 +393,7 @@ static int qp_memcpy_from_queue_iter(struct iov_iter *to,
 		int err;
 
 		if (kernel_if->host)
-			va = kmap(kernel_if->u.h.page[page_index]);
+			va = kmap_local_page(kernel_if->u.h.page[page_index]);
 		else
 			va = kernel_if->u.g.vas[page_index + 1];
 			/* Skip header. */
@@ -407,12 +407,12 @@ static int qp_memcpy_from_queue_iter(struct iov_iter *to,
 		err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
 		if (err != to_copy) {
 			if (kernel_if->host)
-				kunmap(kernel_if->u.h.page[page_index]);
+				kunmap_local(va);
 			return VMCI_ERROR_INVALID_ARGS;
 		}
 		bytes_copied += to_copy;
 		if (kernel_if->host)
-			kunmap(kernel_if->u.h.page[page_index]);
+			kunmap_local(va);
 	}
 
 	return VMCI_SUCCESS;
diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index d6e3c650bd11..cb9506f9cbd0 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -636,7 +636,7 @@ static int xsdfec_table_write(struct xsdfec_dev *xsdfec, u32 offset,
 	}
 
 	for (i = 0; i < nr_pages; i++) {
-		addr = kmap(pages[i]);
+		addr = kmap_local_page(pages[i]);
 		do {
 			xsdfec_regwrite(xsdfec,
 					base_addr + ((offset + reg) *
@@ -645,6 +645,7 @@ static int xsdfec_table_write(struct xsdfec_dev *xsdfec, u32 offset,
 			reg++;
 		} while ((reg < len) &&
 			 ((reg * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE));
+		kunmap_local(addr);
 		unpin_user_page(pages[i]);
 	}
 	return 0;