7.1.5. Example: Taking A Profile
Sample program to retrieve performance profiles when executing Example: Adding Two Vectors
Execution Method
$ cd /opt/pfn/pfcomp/codegen/examples/
$ ./exec_with_env.sh python3 add_trace.py
Expected Output
The traced results
/opt/pfn/pfcomp/codegen/examples/add_trace.pb
Related Links
Sample Program
1import torch
2from mlsdk import CacheOptions, Context, MNDevice, storage, trace_scope
3
4
5def run_add():
6 device = MNDevice("mncore2:auto")
7 context = Context(device)
8 Context.switch_context(context)
9
10 def add(input: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
11 x = input["x"]
12 y = input["y"]
13 return {"out": x + y}
14
15 sample = {"x": torch.randn(3, 4), "y": torch.randn(3, 4)}
16
17 compiled_add = context.compile(
18 add,
19 sample,
20 storage.path("/tmp/add_two_tensors"),
21 options={"float_dtype": "float"},
22 cache_options=CacheOptions("/tmp/add_two_tensors_cache"),
23 )
24 result = compiled_add({"x": torch.ones(3, 4), "y": torch.ones(3, 4)})
25 result_on_cpu = result["out"].cpu()
26 print(f"{result_on_cpu=}")
27 assert torch.allclose(result_on_cpu, torch.ones(3, 4) * 2)
28
29
30if __name__ == "__main__":
31 with trace_scope("trace.pb"):
32 run_add()