Source code for pysiglib.words

# Copyright 2026 Daniil Shmelev
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========================================================================

import itertools
from functools import cache
from .param_checks import check_type, check_non_neg, check_word

[docs] def words_of_length( alphabet_size : int, length : int ) -> list[tuple[int, ...]]: """ Returns all words of a given length. :param alphabet_size: Size of the alphabet. :type alphabet_size: int :param length: Length of words. :type length: int :return: All words of the given length. :rtype: list[tuple[int, ...]] Example: --------- .. code-block:: python import pysiglib # All words of length 2 over alphabet {0, 1} w = pysiglib.words_of_length(2, 2) print(w) # [(0, 0), (0, 1), (1, 0), (1, 1)] """ check_type(alphabet_size, "alphabet_size", int) check_type(length, "length", int) check_non_neg(alphabet_size, "alphabet_size") check_non_neg(length, "length") d = list(range(alphabet_size)) return list(itertools.product(d, repeat = length))
[docs] def words( alphabet_size : int, max_length : int ) -> list[tuple[int, ...]]: """ Returns all words up to a given length. :param alphabet_size: Size of the alphabet. :type alphabet_size: int :param max_length: Max length of words. :type max_length: int :return: All words up to the given length. :rtype: list[tuple[int, ...]] Example: --------- .. code-block:: python import pysiglib # All words up to length 2 over alphabet {0, 1} w = pysiglib.words(2, 2) print(w) # [(), (0,), (1,), (0, 0), (0, 1), (1, 0), (1, 1)] """ check_type(alphabet_size, "alphabet_size", int) check_type(max_length, "max_length", int) check_non_neg(alphabet_size, "alphabet_size") check_non_neg(max_length, "max_length") d = list(range(alphabet_size)) res = [tuple()] for level in range(1, max_length + 1): res += list(itertools.product(d, repeat = level)) return res
[docs] def lyndon_words_of_length( alphabet_size: int, length: int ) -> list[tuple[int, ...]]: """ Returns all Lyndon words of a given length. :param alphabet_size: Size of the alphabet. :type alphabet_size: int :param length: Length of words. :type length: int :return: All Lyndon words of the given length. :rtype: list[tuple[int, ...]] Example: --------- .. code-block:: python import pysiglib # All Lyndon words of length 3 over alphabet {0, 1} w = pysiglib.lyndon_words_of_length(2, 3) print(w) # [(0, 0, 1), (0, 1, 1)] """ check_type(alphabet_size, "alphabet_size", int) check_type(length, "length", int) check_non_neg(alphabet_size, "alphabet_size") check_non_neg(length, "length") if alphabet_size == 0 or length == 0: return [] res = [] word = [0] while word: m = len(word) if m == length: res.append(tuple(word)) while len(word) < length: word.append(word[-m]) while word and word[-1] == alphabet_size - 1: word.pop() if word: word[-1] += 1 return res
[docs] def lyndon_words( alphabet_size: int, max_length: int ) -> list[tuple[int, ...]]: """ Returns all Lyndon words up to a given length. :param alphabet_size: Size of the alphabet. :type alphabet_size: int :param max_length: Max length of words. :type max_length: int :return: All Lyndon words up to the given length. :rtype: list[tuple[int, ...]] Example: --------- .. code-block:: python import pysiglib # All Lyndon words up to length 3 over alphabet {0, 1} w = pysiglib.lyndon_words(2, 3) print(w) # [(0,), (1,), (0, 1), (0, 0, 1), (0, 1, 1)] """ check_type(alphabet_size, "alphabet_size", int) check_type(max_length, "max_length", int) check_non_neg(alphabet_size, "alphabet_size") check_non_neg(max_length, "max_length") res = [] for level in range(1, max_length + 1): res += lyndon_words_of_length(alphabet_size, level) return res
[docs] @cache def is_lyndon( word : tuple[int, ...] ) -> bool: """ Checks if a given word is a Lyndon word. :param word: Word :type word: tuple[int, ...] :return: Returns ``True`` if ``word`` is a Lyndon word and ``False`` otherwise. :rtype: bool Example: --------- .. code-block:: python import pysiglib print(pysiglib.is_lyndon((0, 1))) # True print(pysiglib.is_lyndon((1, 0))) # False print(pysiglib.is_lyndon((0, 0, 1))) # True """ check_word(word, float('inf'), "word") n = len(word) if n == 0: return False if n == 1: return True for i in range(1, n): if not (word < word[i:]): return False return True
[docs] @cache def word_to_idx( word : tuple[int, ...], alphabet_size : int, *, scalar_term : bool = False, ) -> int: """ Given a word :math:`(w_0, w_1, \\ldots, w_n)`, returns its flat index into a truncated signature. With ``scalar_term=True`` the empty word :math:`()` sits at index 0 and a word of length :math:`n+1` is at .. math:: \\sum_{i=0}^n (w_i + 1) d^i. With ``scalar_term=False`` (default) there is no empty-word entry, so all indices shift down by 1: the single-letter word ``(0,)`` is at index 0, and the empty word is invalid. :param word: Word :type word: tuple[int, ...] :param alphabet_size: Size of the alphabet :type alphabet_size: int :param scalar_term: Whether the target signature includes the leading scalar 1. Must match the format of the sig you intend to index. Default ``False``. :type scalar_term: bool :return: Flat index corresponding to ``word`` :rtype: int Example: --------- .. code-block:: python import torch import pysiglib length, dimension, degree = 100, 2, 3 x = torch.rand(size=(length, dimension)) sig = pysiglib.sig(x, degree) # scalar_term=False by default word = (0, 1) idx = pysiglib.word_to_idx(word, dimension) print(sig[idx]) # coefficient at word (0, 1) """ check_type(alphabet_size, "alphabet_size", int) check_non_neg(alphabet_size, "alphabet_size") check_word(word, alphabet_size, "word") if not word: if not scalar_term: raise ValueError( "The empty word has no index in a signature with scalar_term=False. " "Pass scalar_term=True if your signature includes the leading scalar 1." ) return 0 idx = 0 for i in word: idx = idx * alphabet_size + (i+1) return idx if scalar_term else idx - 1
[docs] @cache def idx_to_word( idx : int, alphabet_size : int, *, scalar_term : bool = False, ) -> tuple[int, ...]: """ Inverse of :func:`word_to_idx`. Given a flat index into a truncated signature, returns the corresponding word. With ``scalar_term=True``, index 0 maps to the empty word :math:`()`. With ``scalar_term=False`` (default), index 0 maps to the single-letter word ``(0,)`` and the empty word is unreachable. :param idx: Flat index :type idx: int :param alphabet_size: Size of the alphabet :type alphabet_size: int :param scalar_term: Whether the source signature includes the leading scalar 1. Must match the format of the sig the index was taken from. Default ``False``. :type scalar_term: bool :return: Word at ``idx`` :rtype: tuple[int, ...] Example: --------- .. code-block:: python import torch import pysiglib length, dimension, degree = 100, 2, 3 x = torch.rand(size=(length, dimension)) sig = pysiglib.sig(x, degree) # scalar_term=False by default word = pysiglib.idx_to_word(4, dimension) print(word, sig[4]) """ if not scalar_term: idx = idx + 1 word = [] while idx: idx, r = divmod(idx, alphabet_size) if not r: r = alphabet_size idx -= 1 word.append(r - 1) return tuple(reversed(word))