diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx
index 1ea2df564c4..c0d7f09ee44 100644
--- a/cuda_core/cuda/core/_device.pyx
+++ b/cuda_core/cuda/core/_device.pyx
@@ -377,7 +377,7 @@ cdef class DeviceProperties:
 
     @property
     def gpu_overlap(self) -> bool:
-        """bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead async_engine_count."""
+        """bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use :attr:`~DeviceProperties.async_engine_count` instead."""
         return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP))
 
     @property
@@ -662,7 +662,7 @@ cdef class DeviceProperties:
 
     @property
     def read_only_host_register_supported(self) -> bool:
-        """bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""
+        """bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTREGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""
         return bool(
             self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED)
         )
@@ -841,12 +841,12 @@ cdef class DeviceProperties:
 
     @property
     def mem_decompress_algorithm_mask(self) -> int:
-        """int: The returned valued shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""
+        """int: The returned value shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""
         return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK)
 
     @property
     def mem_decompress_maximum_length(self) -> int:
-        """int: The returned valued is the maximum length in bytes of a single decompress operation that is allowed."""
+        """int: The returned value is the maximum length in bytes of a single decompress operation that is allowed."""
         return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH)
 
     @property
@@ -897,7 +897,7 @@ cdef class DeviceProperties:
 
     @property
     def host_memory_pools_supported(self) -> bool:
-        """bool: Device suports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""
+        """bool: Device supports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""
         return bool(
             self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_MEMORY_POOLS_SUPPORTED)
         )
@@ -1033,7 +1033,7 @@ class Device:
         Parameters
         ----------
         peer : Device | int
-            The peer device to check accessibility to. Can be a Device object or device ID.
+            The peer device to check accessibility to. Can be a :obj:`~_device.Device` object or device ID.
         """
         peer = Device(peer)
         cdef int d1 = <int> self.device_id
@@ -1253,7 +1253,7 @@ class Device:
 
         Note
         ----
-        The newly context will not be set as current.
+        The newly created context will not be set as current.
 
         Parameters
         ----------
@@ -1269,7 +1269,7 @@ class Device:
         raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
 
     def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
-        """Create a Stream object.
+        """Create a :obj:`~_stream.Stream` object.
 
         New stream objects can be created in two different ways:
 
@@ -1300,7 +1300,7 @@ class Device:
         return Stream._init(obj=obj, options=options, device_id=self._device_id, ctx=self._context)
 
     def create_event(self, options: EventOptions | None = None) -> Event:
-        """Create an Event object without recording it to a Stream.
+        """Create an :obj:`~_event.Event` object without recording it to a :obj:`~_stream.Stream`.
 
         Note
         ----
diff --git a/cuda_core/cuda/core/_event.pyx b/cuda_core/cuda/core/_event.pyx
index 4a0491d8650..d236aa5790f 100644
--- a/cuda_core/cuda/core/_event.pyx
+++ b/cuda_core/cuda/core/_event.pyx
@@ -211,7 +211,20 @@ cdef class Event:
 
     @classmethod
     def from_ipc_descriptor(cls, ipc_descriptor: IPCEventDescriptor) -> Event:
-        """Import an event that was exported from another process."""
+        """Import an event that was exported from another process.
+
+        Parameters
+        ----------
+        ipc_descriptor : :obj:`~_memory._ipc.IPCEventDescriptor`
+            The IPC descriptor obtained from :attr:`~Event.ipc_descriptor` in
+            another process.
+
+        Returns
+        -------
+        :obj:`~_event.Event`
+            A new event backed by the imported IPC handle.
+
+        """
         cdef cydriver.CUipcEventHandle data
         memcpy(data.reserved, <const void*><const char*>(ipc_descriptor._reserved), sizeof(data.reserved))
         cdef Event self = Event.__new__(cls)
diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx
index 09aa9863cd7..cf784706e5e 100644
--- a/cuda_core/cuda/core/_linker.pyx
+++ b/cuda_core/cuda/core/_linker.pyx
@@ -188,7 +188,7 @@ class LinkerOptions:
     Attributes
     ----------
     name : str, optional
-        Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`.
+        Name of the linker. If the linking succeeds, the name is passed down to the generated :class:`ObjectCode`.
     arch : str, optional
         Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
         ``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index bb6fd97df6f..65df7091e67 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -204,8 +204,9 @@ cdef class Buffer:
 
         Parameters
         ----------
-        dst : :obj:`~_memory.Buffer`
-            Source buffer to copy data from
+        dst : :obj:`~_memory.Buffer`, optional
+            Destination buffer to copy data to. If not provided, a new buffer
+            is allocated using this buffer's memory resource.
         stream : :obj:`~_stream.Stream` | :obj:`~graph.GraphBuilder`
             Keyword argument specifying the stream for the
             asynchronous copy
diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
index 9f8e4bcd534..57494c1e915 100644
--- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
@@ -223,7 +223,7 @@ cdef class DeviceMemoryResource(_MemPool):
         Returns a tuple of sorted device IDs that currently have peer access to
         allocations from this memory pool.
 
-        When setting, accepts a sequence of Device objects or device IDs.
+        When setting, accepts a sequence of :obj:`~_device.Device` objects or device IDs.
         Setting to an empty sequence revokes all peer access.
 
         For non-owned pools (the default or current device pool), the state
diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx
index 2eaff7fb11b..7da61af5d48 100644
--- a/cuda_core/cuda/core/_module.pyx
+++ b/cuda_core/cuda/core/_module.pyx
@@ -257,7 +257,7 @@ cdef class KernelOccupancy:
         Returns
         -------
         :obj:`~MaxPotentialBlockSizeOccupancyResult`
-            An object with `min_grid_size` amd `max_block_size` attributes encoding
+            An object with `min_grid_size` and `max_block_size` attributes encoding
             the suggested launch configuration.
 
         Note
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
index 194ef6da53f..cfc66451c86 100644
--- a/cuda_core/cuda/core/_program.pyx
+++ b/cuda_core/cuda/core/_program.pyx
@@ -173,7 +173,7 @@ class ProgramOptions:
     Attributes
     ----------
     name : str, optional
-        Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`.
+        Name of the program. If the compilation succeeds, the name is passed down to the generated :class:`ObjectCode`.
     arch : str, optional
         Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
         ``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
@@ -272,13 +272,13 @@ class ProgramOptions:
         Disable the display of a diagnostic number for warning messages.
         Default: False
     diag_error : Union[int, list[int]], optional
-        Emit error for a specified diagnostic message number or comma separated list of numbers.
+        Emit error for a specified diagnostic message number or comma-separated list of numbers.
         Default: None
     diag_suppress : Union[int, list[int]], optional
-        Suppress a specified diagnostic message number or comma separated list of numbers.
+        Suppress a specified diagnostic message number or comma-separated list of numbers.
         Default: None
     diag_warn : Union[int, list[int]], optional
-        Emit warning for a specified diagnostic message number or comma separated lis of numbers.
+        Emit warning for a specified diagnostic message number or comma-separated list of numbers.
         Default: None
     brief_diagnostics : bool, optional
         Disable or enable showing source line and column info in a diagnostic.
diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx
index ca13811cd3c..fdb617f0325 100644
--- a/cuda_core/cuda/core/_stream.pyx
+++ b/cuda_core/cuda/core/_stream.pyx
@@ -227,7 +227,7 @@ cdef class Stream:
     def record(self, event: Event = None, options: EventOptions = None) -> Event:
         """Record an event onto the stream.
 
-        Creates an Event object (or reuses the given one) by
+        Creates an :obj:`~_event.Event` object (or reuses the given one) by
         recording on the stream.
 
         Parameters
@@ -269,6 +269,13 @@ cdef class Stream:
         work is completed. This is done by recording a new :obj:`~_event.Event`
         on the stream and then waiting on it.
 
+        Parameters
+        ----------
+        event_or_stream : :obj:`~_event.Event` | :obj:`~_stream.Stream`
+            The event or stream to wait for. Objects supporting the
+            ``__cuda_stream__`` protocol are also accepted and treated as
+            streams.
+
         """
         cdef Stream stream
         cdef EventHandle h_event
@@ -332,7 +339,7 @@ cdef class Stream:
         Note
         ----
         Stream lifetime is not managed, foreign object must remain
-        alive while this steam is active.
+        alive while this stream is active.
 
         Parameters
         ----------
diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx
index f306c036b8c..d1a7e97e1b6 100644
--- a/cuda_core/cuda/core/system/_system.pyx
+++ b/cuda_core/cuda/core/system/_system.pyx
@@ -88,6 +88,11 @@ def get_driver_version_full(kernel_mode: bool = False) -> tuple[int, int, int]:
 def get_nvml_version() -> tuple[int, ...]:
     """
     The version of the NVML library.
+
+    Returns
+    -------
+    version: tuple[int, ...]
+        Tuple of integers representing the NVML version components.
     """
     if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
         raise RuntimeError("NVML library is not available")
@@ -97,6 +102,11 @@ def get_nvml_version() -> tuple[int, ...]:
 def get_driver_branch() -> str:
     """
     Retrieves the driver branch of the NVIDIA driver installed on the system.
+
+    Returns
+    -------
+    branch: str
+        The driver branch string (e.g., ``"560"``, ``"open"``, etc.).
     """
     if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
         raise RuntimeError("NVML library is not available")
diff --git a/cuda_core/docs/source/getting-started.rst b/cuda_core/docs/source/getting-started.rst
index 1761f2cc37c..7ded390b65c 100644
--- a/cuda_core/docs/source/getting-started.rst
+++ b/cuda_core/docs/source/getting-started.rst
@@ -1,4 +1,4 @@
-.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 .. SPDX-License-Identifier: Apache-2.0
 
 .. currentmodule:: cuda.core
@@ -68,7 +68,7 @@ Don't forget to use :meth:`Device.set_current`!
    s = dev.create_stream()
 
 Next, we compile the CUDA C++ kernel from earlier using the :class:`Program` class.
-The result of the compilation  is saved as a CUBIN.
+The result of the compilation is saved as a CUBIN.
 Note the use of the ``name_expressions`` parameter to the :meth:`Program.compile` method to specify which kernel template instantiations to compile:
 
 .. code-block:: python