Tutorial 22 - Bundles On The GPU

Bundles are not exactly data themselves, they are a representation of a collection of attributes whose composition is determined at runtime. As such, they will always live on the CPU. However the attributes they are encapsulating have the same flexibility as other attributes to live on the CPU, GPU, or have their location decided at runtime.

For that reason it’s convenient to use the same “cpu”, “cuda”, and “any” memory types for the bundle attributes, with a slightly different interpretation.

  • cpu all attributes in the bundle will be on the CPU

  • gpu all attributes in the bundle will be on the GPU

  • any either some attributes in the bundle are on the CPU and some are on the GPU, or that decision will be made at runtime

For example if you had a bundle of attributes consisting of a large array of points and a boolean that controls the type of operation you will perform on them it makes sense to leave the boolean on the CPU and move the points to the GPU for more efficient processing.

OgnTutorialCpuGpuBundles.ogn

The ogn file shows the implementation of a node named “omni.graph.tutorials.CpuGpuBundles” with an input bundle on the CPU, an input bundle on the GPU, and an output bundle whose memory location is decided at runtime by a boolean.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
{
    "CpuGpuBundles": {
        "version": 1,
        "categories": "tutorials",
        "description": [
            "This is a tutorial node. It exercises functionality for accessing data in bundles that",
            "are on the GPU as well as bundles whose CPU/GPU location is decided at runtime. The",
            "compute looks for bundled attributes named 'points' and, if they are found, computes",
            "their dot products. If the bundle on the output contains an integer array type named",
            "'dotProducts' then the results are placed there, otherwise a new attribute of that name and",
            "type is created on the output bundle to hold the results.",
            "This node is identical to OgnTutorialCpuGpuBundlesPy.ogn, except it is implemented in C++."
        ],
        "tags": ["tutorial", "bundle", "gpu"],
        "tokens": ["points", "dotProducts"],
        "uiName": "Tutorial Node: CPU/GPU Bundles",
        "inputs": {
             "cpuBundle": {
                "type": "bundle",
                "description": "Input bundle whose data always lives on the CPU",
                "uiName": "CPU Input Bundle"
             },
             "gpuBundle": {
                 "type": "bundle",
                 "memoryType": "cuda",
                 "description": "Input bundle whose data always lives on the GPU",
                 "uiName": "GPU Input Bundle"
             },
             "gpu": {
                 "type": "bool",
                 "description": "If true then copy gpuBundle onto the output, otherwise copy cpuBundle",
                 "uiName": "Results To GPU"
             }
        },
        "outputs": {
            "cpuGpuBundle": {
                "type": "bundle",
                "memoryType": "any",
                "description": [
                    "This is the bundle with the merged data. If the 'gpu' attribute is set to true then this",
                    "bundle's contents will be entirely on the GPU, otherwise they will be on the CPU."
                ],
                "uiName": "Constructed Bundle"
             }
        }
    }
}

OgnTutorialCpuGpuBundles.cpp

The cpp file contains the implementation of the compute method. It creates a merged bundle in either the CPU or GPU based on the input boolean and runs an algorithm on the output location.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto.  Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#include <OgnTutorialCpuGpuBundlesDatabase.h>
extern "C" void cpuGpuDotProductCPU(float const(*p1)[3], float const(*p2)[3], float*, size_t);
extern "C" void cpuGpuDotProductGPU(float const (**p1)[3], float const (**p2)[3], float**, size_t);

namespace omni {
namespace graph {
namespace tutorials {

class OgnTutorialCpuGpuBundles
{
public:
    static bool compute(OgnTutorialCpuGpuBundlesDatabase& db)
    {
        const auto& gpu = db.inputs.gpu();
        // Bundles are merely abstract representations of a collection of attributes so you don't have to do anything
        // different when they are marked for GPU, or ANY memory location.
        const auto& cpuBundle = db.inputs.cpuBundle();
        const auto& gpuBundle = db.inputs.gpuBundle();
        auto& outputBundle = db.outputs.cpuGpuBundle();

        // Assign the correct destination bundle to the output based on the gpu flag
        if (gpu)
        {
            outputBundle = gpuBundle;
        }
        else
        {
            outputBundle = cpuBundle;
        }

        // Get the attribute references. They're the same whether the bundles are on the CPU or GPU
        const auto pointsCpuAttribute = cpuBundle.attributeByName(db.tokens.points);
        const auto pointsGpuAttribute = gpuBundle.attributeByName(db.tokens.points);
        auto dotProductAttribute = outputBundle.attributeByName(db.tokens.dotProducts);
        if (! dotProductAttribute.isValid())
        {
            dotProductAttribute = outputBundle.addAttribute(db.tokens.dotProducts, Type(BaseDataType::eFloat, 1, 1));
        }

        // Find the bundle contents to be processed
        if (gpu)
        {
            const auto points1 = pointsCpuAttribute.getGpu<float[][3]>();
            const auto points2 = pointsGpuAttribute.get<float[][3]>();
            auto dotProducts = dotProductAttribute.getGpu<float[]>();
            if (! points1)
            {
                db.logWarning("Skipping compute - No valid float[3][] attribute named '%s' on the CPU bundle", db.tokenToString(db.tokens.points));
                return false;
            }
            if (! points2)
            {
                db.logWarning("Skipping compute - No valid float[3][] attribute named '%s' on the GPU bundle", db.tokenToString(db.tokens.points));
                return false;
            }
            if (points1.size() != points2.size())
            {
                db.logWarning("Skipping compute - Point arrays are different sizes (%zu and %zu)", points1.size(), points2.size());
                return false;
            }
            dotProducts.resize(points1.size());
            if (! dotProducts)
            {
                db.logWarning("Skipping compute - No valid float[] attribute named '%s' on the output bundle", db.tokenToString(db.tokens.dotProducts));
                return false;
            }
            cpuGpuDotProductGPU(points1(), points2(), dotProducts(), points1.size());
        }
        else
        {
            const auto points1 = pointsCpuAttribute.get<float[][3]>();
            const auto points2 = pointsGpuAttribute.getCpu<float[][3]>();
            auto dotProducts = dotProductAttribute.getCpu<float[]>();
            if (! points1)
            {
                db.logWarning("Skipping compute - No valid float[3][] attribute named '%s' on the CPU bundle", db.tokenToString(db.tokens.points));
                return false;
            }
            if (! points2)
            {
                db.logWarning("Skipping compute - No valid float[3][] attribute named '%s' on the GPU bundle", db.tokenToString(db.tokens.points));
                return false;
            }
            if (points1.size() != points2.size())
            {
                db.logWarning("Skipping compute - Point arrays are different sizes (%zu and %zu)", points1.size(), points2.size());
                return false;
            }
            dotProducts.resize(points1.size());
            if (! dotProducts)
            {
                db.logWarning("Skipping compute - No valid dot product attribute on the output bundle");
                return false;
            }
            cpuGpuDotProductCPU(points1->data(), points2->data(), dotProducts->data(), points1.size());
        }
        return true;
    }
};

REGISTER_OGN_NODE()

} // namespace tutorials
} // namespace graph
} // namespace omni

OgnTutorialCpuGpuBundlesPy.py

The py file contains the same algorithm as the C++ node, with the node implementation language being different.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
Implementation of the Python node accessing attributes whose memory location is determined at runtime.
"""
import numpy as np
import omni.graph.core as og

# Types to check on bundled attributes
FLOAT_ARRAY_TYPE = og.Type(og.BaseDataType.FLOAT, array_depth=1)
FLOAT3_ARRAY_TYPE = og.Type(og.BaseDataType.FLOAT, tuple_count=3, array_depth=1, role=og.AttributeRole.POSITION)


class OgnTutorialCpuGpuBundlesPy:
    """Exercise bundle members through a Python OmniGraph node"""

    @staticmethod
    def compute(db) -> bool:
        """Implements the same algorithm as the C++ node OgnTutorialCpuGpuBundles.cpp.

        It follows the same code pattern for easier comparison, though in practice you would probably code Python
        nodes differently from C++ nodes to take advantage of the strengths of each language.
        """
        if db.inputs.gpu:
            # Invalid data yields no compute
            if not db.inputs.gpuBundle.valid:
                return True
            db.outputs.cpuGpuBundle = db.inputs.gpuBundle
        else:
            if not db.inputs.cpuBundle.valid:
                return True
            db.outputs.cpuGpuBundle = db.inputs.cpuBundle

        # Find and verify the attributes containing the points
        cpu_points = db.inputs.cpuBundle.attribute_by_name(db.tokens.points)
        if cpu_points.type != FLOAT3_ARRAY_TYPE:
            db.log_warning(
                f"Skipping compute - No valid float[3][] attribute named '{db.tokens.points}' on the CPU bundle"
            )
            return False
        gpu_points = db.inputs.gpuBundle.attribute_by_name(db.tokens.points)
        if gpu_points.type != FLOAT3_ARRAY_TYPE:
            db.log_warning(
                f"Skipping compute - No valid float[3][] attribute named '{db.tokens.points}' on the GPU bundle"
            )
            return False

        # If the attribute is not already on the output bundle then add it
        dot_product = db.outputs.cpuGpuBundle.attribute_by_name(db.tokens.dotProducts)
        if dot_product is None:
            dot_product = db.outputs.cpuGpuBundle.insert((og.Type(og.BaseDataType.FLOAT, array_depth=1), "dotProducts"))
        elif dot_product.type != FLOAT_ARRAY_TYPE:
            # Python types do not use a cast to find out if they are the correct type so explicitly check it instead
            db.log_warning(
                f"Skipping compute - No valid float[] attribute named '{db.tokens.dotProducts}' on the output bundle"
            )
            return False

        # Set the size to what is required for the dot product calculation
        dot_product.size = cpu_points.size

        # Use the correct data access based on whether the output is supposed to be on the GPU or not
        if db.inputs.gpu:
            # The second line is how the values would be extracted if Python supported GPU data extraction.
            # When it does this tutorial will be updated
            dot_product.cpu_value = np.einsum("ij,ij->i", cpu_points.value, gpu_points.cpu_value)
            # dot_product.gpu_value = np.einsum("ij,ij->i", cpu_points.gpu_value, gpu_points.value)
        else:
            dot_product.cpu_value = np.einsum("ij,ij->i", cpu_points.value, gpu_points.cpu_value)

        return True