Creating Test Problems
Copyright 2025 National Technology & Engineering Solutions of Sandia,
LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
U.S. Government retains certain rights in this software.
We demonstrate how to use the create_problem
function to create test problems for decomposition algorithms.
import pyttb as ttb
from pyttb.create_problem import (
CPProblem,
ExistingCPSolution,
TuckerProblem,
MissingData,
create_problem,
)
# Set global random seed for reproducibility of this notebook
import numpy as np
np.random.seed(123)
Create a CP test problem
The create_problem
function generates both the solution (as a ktensor
for CP) and the test data (as a dense tensor
).
# Create a problem
cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3, noise=0.1)
no_missing_data = MissingData()
solution, data = create_problem(cp_specific_params, no_missing_data)
# Display the solution
print(solution)
ktensor of shape (5, 4, 3) with order F
weights=[0.94416002 0.50183668 0.62395295]
factor_matrices[0] =
[[-1.0856306 0.99734545 0.2829785 ]
[-1.50629471 -0.57860025 1.65143654]
[-2.42667924 -0.42891263 1.26593626]
[-0.8667404 -0.67888615 -0.09470897]
[ 1.49138963 -0.638902 -0.44398196]]
factor_matrices[1] =
[[-0.43435128 2.20593008 2.18678609]
[ 1.0040539 0.3861864 0.73736858]
[ 1.49073203 -0.93583387 1.17582904]
[-1.25388067 -0.6377515 0.9071052 ]]
factor_matrices[2] =
[[-1.4286807 -0.14006872 -0.8617549 ]
[-0.25561937 -2.79858911 -1.7715331 ]
[-0.69987723 0.92746243 -0.17363568]]
# Display the data
print(data)
tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[-1.18990893 1.28446351 2.07235179 -1.87633271]
[-3.12652349 1.07273265 2.34701048 -3.14030325]
[-2.81968366 2.67865791 4.10636867 -4.33460199]
[-0.49910248 1.58553609 1.67667918 -1.4803083 ]
[ 1.5935628 -1.73784063 -2.7256112 2.76967403]]
data[:, :, 1] =
[[-4.02748914 -0.53027464 1.39868896 0.35255157]
[-2.24482406 -0.51914665 -2.34027329 -2.45371282]
[-2.02367801 -0.3794908 -1.16866717 -2.43337295]
[ 2.46562453 0.78956773 -0.26223999 -0.47003828]
[ 3.48686179 0.07186695 -1.21278825 0.24950518]]
data[:, :, 2] =
[[ 0.84583153 0.55670008 0.42026956 -0.99690908]
[-1.5567177 0.8349424 1.8725418 -1.14868937]
[-1.57718852 1.46198797 2.6604315 -2.05249945]
[-0.82259772 0.42556336 1.14869343 -0.65901074]
[-0.28411876 -1.17623054 -1.27449033 1.31403245]]
# The difference between the true solution and measured data
# should match the specified noise setting
diff = (solution.full() - data).norm() / solution.full().norm()
print(diff)
0.10000000000000002
Creating a Tucker test problem
The create_problem
function can also create Tucker problems by providing a TuckerParams
data class as the first argument to create_problem
instead. In this case, the function generates the solution as a ttensor
.
tucker_specific_params = TuckerProblem(
shape=(5, 4, 3), num_factors=[3, 3, 2], noise=0.1
)
no_missing_data = MissingData()
solution, data = create_problem(tucker_specific_params, no_missing_data)
# Display the solution
print(solution)
TTensor of shape: (5, 4, 3)
Core is a
tensor of shape (3, 3, 2) with order F
data[:, :, 0] =
[[ 2.29546945 0.8628987 -0.13287838]
[ 0.31529775 0.94012555 -1.24988658]
[-0.75751615 0.66752096 -1.84400643]]
data[:, :, 1] =
[[ 0.82319976 0.06143129 -0.31048223]
[-0.71417742 1.06731682 0.3213871 ]
[ 0.33786152 -1.90931822 0.37383405]]
U[0] =
[[ 0.93898923 0.43781947 1.14109158]
[ 0.17145177 -1.54957884 -0.97402348]
[-1.0491106 -0.46483438 -0.49055989]
[ 1.0007457 2.14851419 1.43240926]
[-0.13335333 0.00577405 -0.66762081]]
U[1] =
[[-0.94061891 0.93080981 0.04634267]
[ 1.33673724 0.28026028 1.49663046]
[-0.68415163 0.335301 -1.12855526]
[-0.13372712 -0.78503925 -0.23590284]]
U[2] =
[[-1.41195749 -0.88776123]
[ 0.10426711 0.42249603]
[-0.20072189 -1.41672713]]
# Display the data
print(data)
tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[ 1.90571751 1.29306932 -2.66047991 0.4787608 ]
[ 3.32632534 -8.24046905 7.05868556 -0.94570443]
[-1.70172708 2.04521885 0.47297378 -1.76717467]
[-1.77933637 5.49652024 -7.81954496 2.61105222]
[-0.62849444 -2.47539421 1.61469082 0.71437041]]
data[:, :, 1] =
[[-0.90290826 0.53648692 0.06304186 0.10529605]
[-0.59241983 0.91173894 -0.68241772 0.38676663]
[ 0.40853234 -0.04163589 0.21205378 0.08396353]
[-0.53454083 0.26397327 0.43616478 -0.47223017]
[ 0.07478656 -0.04549533 0.20458064 -0.37257969]]
data[:, :, 2] =
[[ 3.01781992 -1.167676 1.59175537 -0.96841114]
[ 1.37702074 -0.87936349 0.47784026 -0.01377307]
[-1.51797541 1.40668289 -0.8199048 0.2912658 ]
[-0.00535056 -0.77270545 0.0753881 0.21781704]
[-1.98105208 0.16641742 -0.82378859 1.06506215]]
# The difference between the true solution and measured data
# should match the specified noise setting
diff = (solution.full() - data).norm() / solution.full().norm()
print(diff)
0.10000000000000002
Recreating the same test problem
We are still relying on numpy’s deprecated global random state. See #441
# Problem details
shape = [5, 4, 3]
num_factors = 3
seed = 123
missing_params = MissingData()
cp_specific_params = CPProblem(shape, num_factors=num_factors)
# Generate the first test problem
np.random.seed(seed)
solution_1, data_1 = create_problem(cp_specific_params, missing_params)
# Generate the second test problem
np.random.seed(seed)
solution_2, data_2 = create_problem(cp_specific_params, missing_params)
# Check that the solutions are identical
print(f"{solution_1.isequal(solution_2)=}")
# Check that the data are identical
print(f"{(data_1-data_2).norm()=}")
solution_1.isequal(solution_2)=True
(data_1-data_2).norm()=0.0
Options for creating factor matrices, core tensors, and weights
User specified functions may be provided to generate the relevant components of ktensors
or ttensors
.
# Example custom weight generator for CP Problems
cp_specific_params = CPProblem(shape=[5, 4, 3], num_factors=2, weight_generator=np.ones)
solution, _ = create_problem(cp_specific_params, missing_params)
print(f"{solution.weights}")
[1. 1.]
# Example custom core generator for Tucker
tucker_specific_params = TuckerProblem(
shape=[5, 4, 3], num_factors=[2, 2, 2], core_generator=ttb.tenones
)
solution, _ = create_problem(tucker_specific_params, missing_params)
print(f"{solution.core}")
tensor of shape (2, 2, 2) with order F
data[:, :, 0] =
[[1. 1.]
[1. 1.]]
data[:, :, 1] =
[[1. 1.]
[1. 1.]]
Create dense missing data problems
It’s possible to create problems that have a percentage of missing data. The problem generator randomly creates the pattern of missing data.
# Specify 25% missing data
missing_data_params = MissingData(missing_ratio=0.25)
# Show an example of randomly generated pattern
# 1 is known 0 is unknown
print(missing_data_params.get_pattern(shape=[5, 4, 3]))
tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[0. 1. 0. 0.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 0. 1. 1.]
[1. 0. 1. 1.]]
data[:, :, 1] =
[[0. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 0. 1.]
[1. 1. 0. 0.]
[0. 0. 1. 1.]]
data[:, :, 2] =
[[1. 1. 0. 1.]
[1. 1. 0. 1.]
[1. 0. 1. 1.]
[1. 1. 1. 0.]
[1. 1. 1. 1.]]
# Generate problem using a newly sampled pattern
solution, data = create_problem(cp_specific_params, missing_data_params)
# Show data (including noise) with missing entries zeroed out
print(data)
tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[ 0. -0. 0.56693215 2.58190757]
[ 0.66158503 0. -0.79429059 0. ]
[ 0.78897294 0.42402936 -0.19475623 0.10144052]
[-0.59738159 0. -0.21227392 -1.31519265]
[ 0.41890779 -0.23591209 0.21373757 1.09063717]]
data[:, :, 1] =
[[ 0. 0. -0.8308604 -3.0070611 ]
[-0.42325009 -1.75029574 0. 0. ]
[-0. -0.26850041 0. 0.27099 ]
[-0.02946564 -0.68734706 0.3698576 1.26406723]
[ 0.11801096 0.58192861 -0.26966109 -1.02398053]]
data[:, :, 2] =
[[-0.86106173 0. 0.12941959 0.15403532]
[-0.51241306 0.1323628 -0.16996373 -0.8436616 ]
[-0.84058048 0. 0.07952808 -0.31038574]
[ 0.59041554 0.57389936 0. 0.02993005]
[-0. -0.34216743 0.08667317 -0.24731408]]
Creating sparse missing data problems
If sparse_models
is set to true then the returned data is sparse. This should only be used with missing_ratio
>= 0.8.
missing_data_params = MissingData(missing_ratio=0.8, sparse_model=True)
# Here is a candidate pattern of known data
print(missing_data_params.get_pattern([5, 4, 3]))
sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F
[2, 3, 0] = 1.0
[2, 2, 2] = 1.0
[3, 2, 0] = 1.0
[3, 1, 0] = 1.0
[4, 0, 1] = 1.0
[2, 3, 1] = 1.0
[0, 3, 1] = 1.0
[1, 0, 1] = 1.0
[1, 2, 1] = 1.0
[1, 0, 0] = 1.0
[4, 0, 0] = 1.0
[4, 2, 0] = 1.0
# Here is the data (including noise) with zeros not explicitly represented.
solution, data = create_problem(cp_specific_params, missing_data_params)
print(data)
sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F
[0, 3, 1] = 0.0010425590714080446
[1, 0, 2] = 2.7971575122227903
[1, 2, 1] = 1.361218449073197
[2, 0, 2] = 2.936142238237179
[2, 1, 0] = -0.9431143950951519
[2, 2, 1] = -2.8901418904117255
[2, 3, 1] = 0.6023246711660994
[2, 3, 2] = -1.8081772825394073
[3, 0, 1] = 1.2167934515799839
[4, 2, 1] = -1.3832392092733012
[4, 3, 1] = 0.5225826308834477
[4, 3, 2] = -1.281954743777033
Create missing data problems with pre-specified pattern
A specific pattern (dense or sparse) can be use to represent missing data. This is also currently the recommended approach for reproducibility.
# Grab a pattern from before
pattern = MissingData(missing_ratio=0.25).get_pattern([5, 4, 3])
missing_data_params = MissingData(missing_pattern=pattern)
solution, data = create_problem(cp_specific_params, missing_data_params)
print(data)
tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[-0.21950875 -1.24192142 -0.2148323 -0.04852971]
[ 0. 1.46302477 0. 0.87607023]
[-1.22424401 -3.84354799 -2.13227461 -0. ]
[-0.51955219 0. 0.05716883 -0.26226716]
[ 0. -1.56695572 0.33177542 0.76481254]]
data[:, :, 1] =
[[ 6.06305025e-02 1.38680730e+00 0.00000000e+00 9.97566746e-05]
[ 2.40865138e+00 -1.40405155e+00 1.78426895e+00 0.00000000e+00]
[-1.49533203e+00 3.64435068e+00 -8.40249625e-01 -1.14382995e+00]
[-1.07908935e+00 -1.24658320e+00 -6.77237653e-01 -7.63311142e-01]
[ 1.54802072e+00 1.56105671e+00 0.00000000e+00 9.52013083e-01]]
data[:, :, 2] =
[[-0.11041614 -1.9174132 -0. 0. ]
[ 2.1336541 2.32864253 2.30408939 1.29730097]
[-2.11596787 -5.52822162 0. -0. ]
[-0.61243171 1.42532066 -0.53374239 -0.34866756]
[ 0. -0. 0. 0.84136435]]
Creating Sparse Problems (CP only)
If we assume each model parameter is the input to a Poisson process, then we can generate a sparse test problems. This requires that all the factor matrices and lambda be nonnegative. The default factor generator (‘randn’) won’t work since it produces both positive and negative values.
# Generate factor matrices with a few large entries in each column
# This will be the basis of our solution
shape = (20, 15, 10)
num_factors = 4
A = []
for n in range(len(shape)):
A.append(np.random.rand(shape[n], num_factors))
for r in range(num_factors):
p = np.random.permutation(np.arange(shape[n]))
idx = p[1 : round(0.2 * shape[n])]
A[n][idx, r] *= 10
S = ttb.ktensor(A)
# S.normalize(sort=True);
S.normalize(sort=True).weights
array([318.7110046 , 217.52096987, 199.52878341, 131.71421815])
# Create sparse test problem based on the solution.
# `sparse_generation` controls how many insertions to make based on the solution.
# The weight vector of the solution is automatically rescaled to match the number of insertions.
existing_params = ExistingCPSolution(S, noise=0.0, sparse_generation=500)
print(f"{S.weights=}")
solution, data = create_problem(existing_params)
print(
f"num_nozeros: {data.nnz}\n"
f"total_insertions: {np.sum(data.vals)}\n"
f"original weights vs rescaled: {S.weights / solution.weights}"
)
S.weights=array([318.7110046 , 217.52096987, 199.52878341, 131.71421815])
num_nozeros: 158
total_insertions: 500.0
original weights vs rescaled: [1.73494995 1.73494995 1.73494995 1.73494995]