# Copyright 2025 Daniil Shmelev
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========================================================================
from typing import Union
from pathlib import Path
import numpy as np
import torch
from .param_checks import check_type, check_non_neg, check_log_sig_method, check_n_jobs
from .error_codes import err_msg
from .dtypes import (CPSIG_SIG_TO_LOG_SIG,
CUSIG_SIG_TO_LOG_SIG_CUDA,
CPSIG_LOG_SIG_FROM_PATH, CUSIG_LOG_SIG_FROM_PATH_CUDA)
from .sig_length import sig_length, log_sig_length, aug_dim, _infer_scalar_term
from .sig import sig
from .data_handlers import SigOutputHandler, SigInputHandler, PathInputHandler
from .load_siglib import CPSIG, CUSIG, BUILT_WITH_CUDA
from .transform_path import transform_path
######################################################
# Python wrappers
######################################################
[docs]
def set_cache_dir(
dir : str
):
"""
Sets the cache directory to use in ``pysiglib.prepare_log_sig``
when ``use_disk=True``. If the cache directory is not explicitly
set by a call to this function, a default directory will be used:
- Windows: ``%LOCALAPPDATA%``
- Linux: ``~/.cache``
- Mac: ``~/Library/Caches``
This function is not thread safe.
:param dir: Path to cache directory
:type dir: str
Example usage:
----------------
.. code-block::
import pysiglib
# Set cache dir to a folder "my_cache_dir" in the current working directory
pysiglib.set_cache_dir("./my_cache_dir")
pysiglib.prepare_log_sig(5, 3, lead_lag=True, method=2, use_disk=True)
X = torch.rand((32,100,5))
X_log_sig = pysiglib.log_sig(X, 3, lead_lag=True, method=2)
"""
check_type(dir, "dir", str)
p = Path(dir)
if not p.exists():
raise ValueError(f"Path does not exist: {p}")
if not p.is_dir():
raise ValueError(f"Path is not a directory: {p}")
err_code = CPSIG.set_cache_dir(dir.encode("utf-8"))
if err_code:
raise Exception("Error in pysiglib.set_cache_dir: " + err_msg(err_code))
if BUILT_WITH_CUDA:
err_code = CUSIG.set_cache_dir_cuda(dir.encode("utf-8"))
if err_code:
raise Exception("Error in pysiglib.set_cache_dir (CUDA): " + err_msg(err_code))
[docs]
def prepare_log_sig(
dimension : int,
degree : int,
method : int,
*,
time_aug : bool = False,
lead_lag : bool = False,
use_disk : bool = False,
device : str = "both"
):
"""
Prepares for log signature computations. For details concerning the ``method`` parameter,
see the page :doc:`Computing Log Signatures </pages/log_signatures/log_sig_methods>`.
This function is not thread safe.
This function populates in-memory caches for the CPU and/or GPU, controlled by the
``device`` parameter. The CPU cache stores Lyndon words and projection matrices in
host memory, while the GPU cache (if CUDA is available) stores Lyndon indices, level
indices, and sparse matrices in device memory.
When ``use_disk=True``, both the CPU and GPU libraries read from and write to a
shared disk cache in the same binary format. If the disk cache already exists
(e.g. from a previous run), the data is loaded from disk instead of being recomputed.
For ``method=0``, no preparation is needed and this function returns immediately.
:param dimension: Dimension of the underlying path(s).
:type dimension: int
:param degree: Truncation degree of the log signature.
:type degree: int
:param method: Method for the log signature computation. Must be one of `0`, `1`, `2` or `3`.
Methods `1` and `2` require preparation; methods `0` and `3` do not.
:type method: int
:param time_aug: Whether time augmentation will be used in the computation.
:type time_aug: bool
:param lead_lag: Whether the lead lag transform will be used in the computation.
:type lead_lag: bool
:param use_disk: If ``False``, will cache prepared objects in memory only.
If ``True``, will also save these objects in a shared disk cache to be
re-used for future runs. The CPU and GPU libraries share the same
disk cache format and directory.
See additionally the documentation for ``pysiglib.set_cache_dir``.
:type use_disk: bool
:param device: Which device caches to prepare. Must be one of ``"cpu"``, ``"cuda"``,
or ``"both"`` (default). Use ``"cpu"`` to prepare only the CPU cache,
``"cuda"`` to prepare only the GPU cache, or ``"both"`` to prepare both.
:type device: str
Example usage:
----------------
.. code-block::
import pysiglib
pysiglib.prepare_log_sig(5, 3, lead_lag=True, method=2, use_disk=True)
X = torch.rand((32,100,5))
X_log_sig = pysiglib.log_sig(X, 3, lead_lag=True, method=2)
"""
check_type(dimension, "dimension", int)
check_type(degree, "degree", int)
check_type(method, "method", int)
check_log_sig_method(method)
check_type(time_aug, "time_aug", bool)
check_type(lead_lag, "lead_lag", bool)
check_type(device, "device", str)
if device not in ("cpu", "cuda", "both"):
raise ValueError("device must be 'cpu', 'cuda', or 'both'")
if method == 0 or method == 3:
return
aug_dimension = aug_dim(dimension, time_aug, lead_lag)
if device in ("cpu", "both"):
err_code = CPSIG.prepare_log_sig(
aug_dimension,
degree,
method,
use_disk
)
if err_code:
raise Exception("Error in pysiglib.prepare_log_sig: " + err_msg(err_code))
if BUILT_WITH_CUDA and device in ("cuda", "both"):
err_code = CUSIG.prepare_log_sig_cuda(aug_dimension, degree, method, use_disk)
if err_code:
raise Exception("Error in pysiglib.prepare_log_sig (CUDA): " + err_msg(err_code))
[docs]
def clear_cache(
*,
use_disk : bool = False,
device : str = "both"
):
"""
Clears the cache generated by ``pysiglib.prepare_log_sig``.
:param use_disk: If ``False``, will clear the cache from memory only.
If ``True``, will also clear the shared disk cache directory.
See additionally the documentation for
``pysiglib.set_cache_dir``.
:type use_disk: bool
:param device: Which device caches to clear. Must be one of ``"cpu"``, ``"cuda"``,
or ``"both"`` (default).
:type device: str
Example:
---------
.. code-block:: python
import torch
import pysiglib
pysiglib.prepare_log_sig(dimension=5, degree=4, method=2, use_disk=True)
path = torch.rand((10, 100, 5))
log_sig = pysiglib.log_sig(path, 4, n_jobs = -1)
print(log_sig)
pysiglib.clear_cache() # Clear cache from memory but keep on disk
"""
if device not in ("cpu", "cuda", "both"):
raise ValueError("device must be 'cpu', 'cuda', or 'both'")
if device in ("cpu", "both"):
err_code = CPSIG.clear_cache(use_disk)
if err_code:
raise Exception("Error in pysiglib.clear_cache: " + err_msg(err_code))
if BUILT_WITH_CUDA and device in ("cuda", "both"):
err_code = CUSIG.clear_cache_cuda(use_disk)
if err_code:
raise Exception("Error in pysiglib.clear_cache (CUDA): " + err_msg(err_code))
[docs]
def sig_to_log_sig(
sig : Union[np.ndarray, torch.tensor],
dimension : int,
degree : int,
*,
time_aug : bool = False,
lead_lag : bool = False,
method : int = 1,
n_jobs : int = 1
) -> Union[np.ndarray, torch.tensor]:
"""
Computes the log signature from the signature, using the specified method. For details,
see the page :doc:`Computing Log Signatures </pages/log_signatures/log_sig_methods>`.
:param sig: The signature or batch of signatures, given as a `numpy.ndarray` or `torch.tensor`.
For a single signature, this must be of shape ``sig_length``. For a batch of paths, this must
be of shape ``(batch_size, sig_length)``.
:type sig: numpy.ndarray | torch.tensor
:param dimension: Dimension of the underlying path(s).
:type dimension: int
:param degree: Truncation degree of the (log) signature(s).
:type degree: int
:param time_aug: Whether the signatures were computed with ``time_aug=True``.
:type time_aug: bool
:param lead_lag: Whether the signatures were computed with ``lead_lag=True``.
:type lead_lag: bool
:param method: Method to use for the log signature computation (`0`, `1` or `2`).
Method `3` is not supported here; use ``pysiglib.log_sig`` with ``method=3`` instead.
:type method: int
:param n_jobs: Number of threads to run in parallel.
If n_jobs = 1, the computation is run serially. If set to -1, all available threads
are used. For n_jobs below -1, (max_threads + 1 + n_jobs) threads are used. For example
if n_jobs = -2, all threads but one are used.
:type n_jobs: int
:return: Log signature or a batch of log signatures. For method ``0`` (expanded), the output
matches the scalar-term format of the input ``sig``. Methods ``1`` and ``2`` produce
log-sig-shaped output (no scalar term).
:rtype: numpy.ndarray | torch.tensor
Example usage:
----------------
.. code-block:: python
import torch
import pysiglib
pysiglib.prepare_log_sig(5, 3, lead_lag=True, method=2)
X = torch.rand((32,100,5))
X_sig = pysiglib.sig(X, 3, lead_lag=True)
X_log_sig = pysiglib.sig_to_log_sig(X_sig, 5, 3, lead_lag=True, method=2)
"""
check_type(dimension, "dimension", int)
check_non_neg(dimension, "dimension")
check_type(degree, "degree", int)
check_non_neg(degree, "degree")
check_type(time_aug, "time_aug", bool)
check_type(lead_lag, "lead_lag", bool)
check_type(method, "method", int)
check_log_sig_method(method)
if method == 3:
raise ValueError("method=3 is not supported in sig_to_log_sig. Use log_sig(path, degree, method=3) instead.")
aug_dimension = aug_dim(dimension, time_aug, lead_lag)
scalar_term = _infer_scalar_term(sig, dimension, degree, time_aug=time_aug, lead_lag=lead_lag)
sig_len = sig_length(aug_dimension, degree, scalar_term=scalar_term)
data = SigInputHandler(sig, sig_len, "sig")
out_len = log_sig_length(aug_dimension, degree) if method else sig_length(aug_dimension, degree, scalar_term=scalar_term)
result = SigOutputHandler(data, out_len)
if data.batch_size == 0:
return result.data
check_n_jobs(n_jobs)
if data.device == "cpu":
err_code = CPSIG_SIG_TO_LOG_SIG[data.dtype](
data.data_ptr, result.data_ptr, data.batch_size,
dimension, degree, time_aug, lead_lag, method, scalar_term, n_jobs)
else:
err_code = CUSIG_SIG_TO_LOG_SIG_CUDA[data.dtype](
data.data_ptr, result.data_ptr, data.batch_size,
aug_dimension, degree, method, scalar_term)
if err_code:
raise Exception("Error in pysiglib.sig_to_log_sig: " + err_msg(err_code))
return result.data
[docs]
def log_sig(
path : Union[np.ndarray, torch.tensor],
degree : int,
*,
time_aug : bool = False,
lead_lag : bool = False,
end_time : float = 1.,
method : int = 1,
scalar_term : bool = False,
n_jobs : int = 1
) -> Union[np.ndarray, torch.tensor]:
"""
Computes the log signature using the specified method. For details,
see the page :doc:`Computing Log Signatures </pages/log_signatures/log_sig_methods>`.
:param path: The underlying path or batch of paths, given as a `numpy.ndarray` or `torch.tensor`.
For a single path, this must be of shape ``(length, dimension)``. For a batch of paths, this must
be of shape ``(batch_size, length, dimension)``.
:type path: numpy.ndarray | torch.tensor
:param degree: Truncation degree of the (log) signature(s).
:type degree: int
:param time_aug: If set to True, will compute the log signature of the time-augmented path, :math:`\\hat{x}_t := (t, x_t)`,
defined as the original path with an extra channel set to time, :math:`t`. This channel spans :math:`[0, t_L]`,
where :math:`t_L` is given by the parameter ``end_time``.
:type time_aug: bool
:param lead_lag: If set to True, will compute the log signature of the path after applying the lead-lag transformation.
:type lead_lag: bool
:param end_time: End time for time-augmentation, :math:`t_L`.
:type end_time: float
:param method: Method to use for the log signature computation (`0`, `1`, `2` or `3`).
Methods `0`-`2` first compute the full signature and then project to the log signature.
Method `3` uses the Baker-Campbell-Hausdorff formula to compute the log signature
directly from the path without ever computing the full signature. This uses less
memory but is slower than methods `0`-`2` for typical dimensions and degrees.
:type method: int
:param scalar_term: If True, the output includes the leading constant 1 at index 0
(the empty-word term). If False (default), this leading element is stripped from the output.
Only affects method ``0`` (expanded) output; methods ``1`` and ``2`` produce
log-sig-shaped output with no scalar term.
:type scalar_term: bool
:param n_jobs: Number of threads to run in parallel.
If n_jobs = 1, the computation is run serially. If set to -1, all available threads
are used. For n_jobs below -1, (max_threads + 1 + n_jobs) threads are used. For example
if n_jobs = -2, all threads but one are used.
:type n_jobs: int
:return: Log signature or a batch of log signatures.
:rtype: numpy.ndarray | torch.tensor
Example usage:
----------------
.. code-block:: python
import torch
import pysiglib
pysiglib.prepare_log_sig(5, 3, lead_lag=True, method=2)
X = torch.rand((32,100,5))
X_log_sig = pysiglib.log_sig(X, 3, lead_lag=True, method=2)
"""
if method == 3:
if time_aug or lead_lag:
path = transform_path(path, time_aug=time_aug, lead_lag=lead_lag, end_time=end_time, n_jobs=n_jobs)
aug_dim = path.shape[-1]
ls_len = log_sig_length(aug_dim, degree)
data = PathInputHandler(path, False, False, 1.0, "path")
result = SigOutputHandler(data, ls_len)
if data.batch_size == 0:
return result.data
if data.device == "cpu":
err_code = CPSIG_LOG_SIG_FROM_PATH[data.dtype](
data.data_ptr, result.data_ptr, data.batch_size,
data.data_length, aug_dim, degree, n_jobs)
else:
err_code = CUSIG_LOG_SIG_FROM_PATH_CUDA[data.dtype](
data.data_ptr, result.data_ptr, data.batch_size,
data.data_length, aug_dim, degree)
if err_code:
raise Exception("Error in pysiglib.log_sig (method=3): " + err_msg(err_code))
return result.data
# Methods 0-2: compute sig then project to log sig.
sig_ = sig(path, degree, scalar_term=scalar_term, time_aug=time_aug, lead_lag=lead_lag, end_time=end_time, horner=True, n_jobs=n_jobs)
dimension = path.shape[-1]
return sig_to_log_sig(sig_, dimension, degree, time_aug=time_aug, lead_lag=lead_lag, method=method, n_jobs=n_jobs)